This is a wrapper for codespell [1], a spell checker for source code. Codespell does not compare words to a dictionary, but rather works by checking words against a list of common typos, making it produce fewer false positives than other solutions. The script in this patch works around the lack of per-directory ignore lists and some oddities regarding capitalization in ignore lists. The ".codespellrc" file is used to coarsly filter out translation and git files, as scanning those makes up for roughly 50% of the run time otherwise. [1] (https://github.com/codespell-project/codespell/) Signed-off-by: Tim Wiederhake <twiederh@xxxxxxxxxx> --- .codespellrc | 2 + scripts/check-spelling.py | 135 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 137 insertions(+) create mode 100644 .codespellrc create mode 100755 scripts/check-spelling.py diff --git a/.codespellrc b/.codespellrc new file mode 100644 index 0000000000..0c45be445b --- /dev/null +++ b/.codespellrc @@ -0,0 +1,2 @@ +[codespell] +skip = .git/,*.po diff --git a/scripts/check-spelling.py b/scripts/check-spelling.py new file mode 100755 index 0000000000..ce3e7d89f0 --- /dev/null +++ b/scripts/check-spelling.py @@ -0,0 +1,135 @@ +#!/usr/bin/env python3 + +import argparse +import re +import subprocess +import os + + +IGNORE_LIST = [ + # ignore this script + ("scripts/check-spelling.py", []), + + # 3rd-party: keycodemapdb + ("src/keycodemapdb/", []), + + # 3rd-party: VirtualBox SDK + ("src/vbox/vbox_CAPI", []), + + # 3rd-party: qemu + ("tests/qemucapabilitiesdata/caps_", []), + + # other + ("", ["msdos", "MSDOS", "wan", "WAN", "hda", "HDA", "inout"]), + ("NEWS.rst", "crashers"), + ("docs/gitdm/companies/others", "Archiv"), + ("docs/glib-adoption.rst", "preferrable"), + ("docs/js/main.js", "whats"), + ("examples/polkit/libvirt-acl.rules", ["userA", "userB", "userC"]), + ("src/libvirt-domain.c", "PTD"), + ("src/libxl/libxl_logger.c", "purposedly"), + ("src/nwfilter/nwfilter_dhcpsnoop.c", "ether"), + ("src/nwfilter/nwfilter_ebiptables_driver.c", "parm"), + ("src/nwfilter/nwfilter_learnipaddr.c", "ether"), + ("src/qemu/qemu_agent.c", "crypted"), + ("src/qemu/qemu_agent.h", "crypted"), + ("src/qemu/qemu_process.c", "wee"), + ("src/security/apparmor/libvirt-lxc", "devic"), + ("src/security/apparmor/libvirt-qemu", "readby"), + ("src/storage_file/storage_file_probe.c", "conectix"), + ("src/util/virnetdevmacvlan.c", "calld"), + ("src/util/virtpm.c", "parm"), + ("tests/qemuagenttest.c", "IST"), + ("tests/storagepoolxml2xml", "cant"), + ("tests/sysinfodata/", "sie"), + ("tests/testutils.c", "nIn"), + ("tests/vircgroupdata/ovirt-node-6.6.mounts", "hald"), + ("tests/virhostcpudata/", "sie"), + ("tools/virt-host-validate-common.c", "sie"), +] + + +def ignore(filename, linenumber, word, suggestion): + if len(word) <= 2: + return True + + for f, w in IGNORE_LIST: + if not filename.startswith(f): + continue + if word in w or not w: + return True + return False + + +def main(): + line_pattern = re.compile("^(.*):(.*): (.*) ==> (.*)$") + output_template = "(\"{0}\", \"{2}\"),\t# line {1}, \"{3}\"?" + + parser = argparse.ArgumentParser(description="Check spelling") + parser.add_argument( + "dir", + help="Path to source directory. " + "Defaults to parent directory of this script", + type=os.path.realpath, + nargs='?') + parser.add_argument( + "-i", + "--ignore", + help="File to ignore. Can be specified more than once", + metavar="FILE", + default=list(), + action="append") + parser.add_argument( + "--ignore-untracked", + help="Ignore all files not tracked by git", + action="store_true") + args = parser.parse_args() + + if not args.dir: + args.dir = os.path.dirname(os.path.dirname(os.path.realpath(__file__))) + + if args.ignore_untracked: + args.ignore.extend(subprocess.check_output( + ["git", "-C", args.dir, "ls-files", "--others"], + universal_newlines=True).split("\n")) + + try: + process = subprocess.run( + [ + "codespell", + args.dir, + "--config", + os.path.join(args.dir, ".codespellrc")], + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + universal_newlines=True) + except FileNotFoundError: + exit("error: codespell not found") + if process.returncode not in (0, 65): + exit("error: unexpected returncode %s" % process.returncode) + + if process.stderr: + exit("error: unexpected output to stderr: \"%s\"" % process.stderr) + + findings = 0 + for line in process.stdout.split("\n"): + line = line.strip().replace(args.dir, "").lstrip("/") + if not line: + continue + + match = line_pattern.match(line) + if not match: + exit("error: unexpected line: \"%s\"" % line) + + if match.group(1) in args.ignore or ignore(*match.groups()): + continue + + print(output_template.format(*match.groups())) + findings += 1 + + if findings: + exit("error: %s spelling errors" % findings) + + +if __name__ == "__main__": + main() -- 2.31.1