This is a wrapper for codespell [1], a spell checker for source code. Codespell does not compare words to a dictionary, but rather works by checking words against a list of common typos, making it produce fewer false positives than other solutions. The script in this patch works around the lack of per-directory ignore lists and some oddities regarding capitalization in ignore lists. [1] (https://github.com/codespell-project/codespell/) Signed-off-by: Tim Wiederhake <twiederh@xxxxxxxxxx> --- scripts/check-spelling.py | 119 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 119 insertions(+) create mode 100755 scripts/check-spelling.py diff --git a/scripts/check-spelling.py b/scripts/check-spelling.py new file mode 100755 index 0000000000..0480a506e8 --- /dev/null +++ b/scripts/check-spelling.py @@ -0,0 +1,119 @@ +#!/usr/bin/env python3 + +import argparse +import re +import subprocess +import os + + +IGNORE_LIST = [ + # ignore all translation files + ("/po/", []), + + # ignore all git files + ("/.git/", []), + + # ignore this script + ("/scripts/check-spelling.py", []), + + # 3rd-party: keycodemapdb + ("/src/keycodemapdb/", []), + + # 3rd-party: VirtualBox SDK + ("/src/vbox/vbox_CAPI", []), + + # 3rd-party: qemu + ("/tests/qemucapabilitiesdata/caps_", []), + + # other + ("/", ["msdos", "MSDOS", "wan", "WAN", "hda", "HDA", "inout"]), + ("/NEWS.rst", "crashers"), + ("/docs/gitdm/companies/others", "Archiv"), + ("/docs/glib-adoption.rst", "preferrable"), + ("/docs/js/main.js", "whats"), + ("/examples/polkit/libvirt-acl.rules", ["userA", "userB", "userC"]), + ("/src/libvirt-domain.c", "PTD"), + ("/src/libxl/libxl_logger.c", "purposedly"), + ("/src/nwfilter/nwfilter_dhcpsnoop.c", "ether"), + ("/src/nwfilter/nwfilter_ebiptables_driver.c", "parm"), + ("/src/nwfilter/nwfilter_learnipaddr.c", "ether"), + ("/src/qemu/qemu_agent.c", "crypted"), + ("/src/qemu/qemu_agent.h", "crypted"), + ("/src/qemu/qemu_process.c", "wee"), + ("/src/security/apparmor/libvirt-lxc", "devic"), + ("/src/security/apparmor/libvirt-qemu", "readby"), + ("/src/storage_file/storage_file_probe.c", "conectix"), + ("/src/util/virnetdevmacvlan.c", "calld"), + ("/src/util/virtpm.c", "parm"), + ("/tests/qemuagenttest.c", "IST"), + ("/tests/storagepoolxml2xml", "cant"), + ("/tests/sysinfodata/", "sie"), + ("/tests/testutils.c", "nIn"), + ("/tests/vircgroupdata/ovirt-node-6.6.mounts", "hald"), + ("/tests/virhostcpudata/", "sie"), + ("/tools/virt-host-validate-common.c", "sie"), +] + + +def ignore(filename, linenumber, word, suggestion): + if len(word) <= 2: + return True + + for f, w in IGNORE_LIST: + if not filename.startswith(f): + continue + if word in w or not w: + return True + return False + + +def main(): + line_pattern = re.compile("^(.*):(.*): (.*) ==> (.*)$") + output_template = "(\"{0}\", \"{2}\"),\t# line {1}, \"{3}\"?" + + parser = argparse.ArgumentParser(description="Check spelling") + parser.add_argument( + "dir", + help="Path to source directory. " + "Defaults to parent directory of this script", + type=os.path.realpath, + nargs='?') + args = parser.parse_args() + + if not args.dir: + args.dir = os.path.dirname(os.path.dirname(os.path.realpath(__file__))) + + process = subprocess.run( + ["codespell", args.dir], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + universal_newlines=True) + + if process.returncode not in (0, 65): + exit("error: unexpected returncode %s" % process.returncode) + + if process.stderr: + exit("error: unexpected output to stderr: \"%s\"" % process.stderr) + + findings = 0 + for line in process.stdout.split("\n"): + line = line.strip().replace(args.dir, "") + if not line: + continue + + match = line_pattern.match(line) + if not match: + exit("error: unexpected line: \"%s\"" % line) + + if ignore(*match.groups()): + continue + + print(output_template.format(*match.groups())) + findings += 1 + + if findings: + exit("error: %s spelling errors" % findings) + + +if __name__ == "__main__": + main() -- 2.31.1