Cache the results of clang-tidy. The cache is keyed, for each file, on: * the file name, * the exact command used to compile the file to detect changes in arguments, * the hash of the preprocessor stage output to detect changes in includes. A later patch also adds the list of enabled checks to the cache key. Running clang-tidy uncached takes between 95 and 110 minutes single threaded (just over 9 minutes wall time on a 12 core builder), depending on the set of enabled checks. In the ideal case, where no source files changes, this number is reduced to 80 seconds (9 seconds on a 12 core builder), when caching is enabled. This makes clang-tidy much more pleasant to work with locally, but is not enough to guarantee painless CI operation: While GitLab does support caching between builds and can be configured to retain the cache even when the job fails, this does not happen when the job times out after 60 minutes or the job is manually aborted. Signed-off-by: Tim Wiederhake <twiederh@xxxxxxxxxx> --- scripts/run-clang-tidy.py | 83 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 81 insertions(+), 2 deletions(-) diff --git a/scripts/run-clang-tidy.py b/scripts/run-clang-tidy.py index dc5880878b..cc9c20ea32 100755 --- a/scripts/run-clang-tidy.py +++ b/scripts/run-clang-tidy.py @@ -1,14 +1,17 @@ #!/usr/bin/env python3 import argparse +import hashlib import json import multiprocessing import os import queue import re +import shlex import subprocess import sys import threading +import time spam = [ @@ -44,6 +47,10 @@ def parse_args(): default=multiprocessing.cpu_count(), type=int, help="Number of threads to run") + parser.add_argument( + "--cache", + dest="cache", + help="Path to cache directory") return parser.parse_args() @@ -67,14 +74,75 @@ def run_clang_tidy(item): } +def cache_name(item): + if not args.cache: + return None + + cmd = shlex.split(item["command"]) + for index, element in enumerate(cmd): + if element == "-o": + cmd[index + 1] = "/dev/stdout" + continue + if element == "-MD": + cmd[index] = None + if element in ("-MQ", "-MF"): + cmd[index] = None + cmd[index + 1] = None + cmd = [c for c in cmd if c is not None] + cmd.append("-E") + + result = subprocess.run( + cmd, + stdout=subprocess.PIPE, + universal_newlines=True) + + if result.returncode != 0: + return None + + hashsum = hashlib.sha256() + hashsum.update(item["command"].encode()) + hashsum.update(result.stdout.encode()) + + basename = "".join([c if c.isalnum() else "_" for c in item["output"]]) + return os.path.join(args.cache, "%s-%s" % (basename, hashsum.hexdigest())) + + +def cache_read(filename): + if filename is None: + return None + + try: + with open(filename) as f: + return json.load(f) + except FileNotFoundError: + pass + except json.decoder.JSONDecodeError: + pass + return None + + +def cache_write(filename, result): + if filename is None: + return + + with open(filename, "w") as f: + json.dump(result, f) + + def worker(): while True: item = items.get() os.chdir(item["directory"]) - print(item["file"]) + cache = cache_name(item) + result = cache_read(cache) + with lock: + print(item["file"], "" if result is None else "(from cache)") + + if result is None: + result = run_clang_tidy(item) - result = run_clang_tidy(item) + cache_write(cache, result) with lock: if result["returncode"] != 0: @@ -92,6 +160,10 @@ items = queue.Queue() lock = threading.Lock() findings = list() +if args.cache: + args.cache = os.path.abspath(args.cache) + os.makedirs(args.cache, exist_ok=True) + for _ in range(args.thread_num): threading.Thread(target=worker, daemon=True).start() @@ -102,6 +174,13 @@ with open(os.path.join(args.build_dir, "compile_commands.json")) as f: items.join() +if args.cache: + cutoffdate = time.time() - 7 * 24 * 60 * 60 + for filename in os.listdir(args.cache): + pathname = os.path.join(args.cache, filename) + if os.path.getmtime(pathname) < cutoffdate: + os.remove(pathname) + if findings: print("Findings in %s file(s):" % len(findings)) for finding in findings: -- 2.26.2