This is a wrapper for codespell [1], a spell checker for source code. Codespell does not compare words to a dictionary, but rather works by checking words against a list of common typos, making it produce fewer false positives than other solutions. The script in this patch works around the lack of per-directory ignore lists and some oddities regarding capitalization in ignore lists. [1] (https://github.com/codespell-project/codespell/) RFC: Is there interest in having something like this in CI? Examples of spelling mistakes that were found using codespell: 4ad3c95f4bef5c7c9657de470fb74a4d14c8a331, 785a11cec8693de7df024aae68975dd1799b646a, 1452317b5c727eb17178942012f57f0c37631ae4. Signed-off-by: Tim Wiederhake <twiederh@xxxxxxxxxx> --- scripts/check-spelling.py | 115 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 115 insertions(+) create mode 100755 scripts/check-spelling.py diff --git a/scripts/check-spelling.py b/scripts/check-spelling.py new file mode 100755 index 0000000000..01371c0d1e --- /dev/null +++ b/scripts/check-spelling.py @@ -0,0 +1,115 @@ +#!/usr/bin/env python3 + +import argparse +import re +import subprocess +import os + + +IGNORE_LIST = [ + # ignore all translation files + ("/po/", []), + + # ignore this script + ("/scripts/check-spelling.py", []), + + # 3rd-party: keycodemapdb + ("/src/keycodemapdb/", []), + + # 3rd-party: VirtualBox SDK + ("/src/vbox/vbox_CAPI", [ + "aAdd", + "aCount", + "aLocation", + "aNumber", + "aParent", + "progess"]), + + # 3rd-party: qemu + ("/tests/qemucapabilitiesdata/caps_", "encyption"), + + # other + ("/", ["msdos", "MSDOS", "wan", "WAN", "hda", "HDA", "inout"]), + ("/NEWS.rst", ["crashers"]), + ("/docs/gitdm/companies/others", "Archiv"), + ("/docs/glib-adoption.rst", ["preferrable"]), + ("/docs/js/main.js", "whats"), + ("/examples/polkit/libvirt-acl.rules", ["userA", "userB", "userC"]), + ("/src/libvirt-domain.c", "PTD"), + ("/src/libxl/libxl_logger.c", ["purposedly"]), + ("/src/nwfilter/nwfilter_dhcpsnoop.c", "ether"), + ("/src/nwfilter/nwfilter_ebiptables_driver.c", "parm"), + ("/src/nwfilter/nwfilter_learnipaddr.c", "ether"), + ("/src/qemu/qemu_agent.c", "crypted"), + ("/src/qemu/qemu_agent.h", "crypted"), + ("/src/security/apparmor/libvirt-lxc", "devic"), + ("/src/security/apparmor/libvirt-qemu", "readby"), + ("/src/storage_file/storage_file_probe.c", "conectix"), + ("/src/util/virnetdevmacvlan.c", "calld"), + ("/src/util/virtpm.c", "parm"), + ("/tests/qemuagenttest.c", "IST"), + ("/tests/storagepoolxml2xml", "cant"), + ("/tests/sysinfodata/", ["sie"]), + ("/tests/testutils.c", ["nIn"]), + ("/tests/vircgroupdata/ovirt-node-6.6.mounts", "hald"), + ("/tests/virhostcpudata/", ["sie"]), + ("/tools/virt-host-validate-common.c", ["sie"]), +] + + +def check_spelling(directory): + """Returns list of tuple(filename, line number, word, suggestion).""" + process = subprocess.run( + ["codespell", directory], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + universal_newlines=True) + + if process.returncode not in (0, 65): + exit("error: unexpected returncode %s" % process.returncode) + + if process.stderr: + exit("error: unexpected output to stderr: \"%s\"" % process.stderr) + + line_pattern = re.compile("^(.*):(.*): (.*) ==> (.*)$") + for line in process.stdout.split("\n"): + line = line.strip().replace(directory, "") + if not line: + continue + match = line_pattern.match(line) + if not match: + exit("error: unexpected line: \"%s\"" % line) + yield match.groups() + + +def ignore(filename, linenumber, word, suggestion): + # Ignore abbreviations and ad-hoc variable names + if len(word) <= 2: + return True + + for f, w in IGNORE_LIST: + if not filename.startswith(f): + continue + if word in w or not w: + return True + return False + + +def main(): + parser = argparse.ArgumentParser(description="Check spelling") + parser.add_argument( + "dir", + help="Path to source directory", + type=os.path.realpath) + args = parser.parse_args() + + findings = [f for f in check_spelling(args.dir) if not ignore(*f)] + if findings: + template = "(\"{0}\", \"{2}\"),\t# line {1}, \"{3}\"?" + for finding in findings: + print(template.format(*finding)) + exit("error: %s spelling errors" % len(findings)) + + +if __name__ == "__main__": + main() -- 2.31.1