On Mon, 25 Nov 2024 at 05:12, Eric Biggers <ebiggers@xxxxxxxxxx> wrote: > > From: Eric Biggers <ebiggers@xxxxxxxxxx> > > Add a Python script that generates constants for computing the given CRC > variant(s) using x86's pclmulqdq or vpclmulqdq instructions. > There is nothing x86 specific about this, right? Except perhaps the choice of fold distances? > It can also generate the traditional byte-at-a-time tables. > > Only small changes should be needed for this script to also work to > generate the constants needed for CRC computation on other architectures > with a carryless multiplication instruction, such as arm64. > > Signed-off-by: Eric Biggers <ebiggers@xxxxxxxxxx> > --- > scripts/crc/gen-crc-consts.py | 207 ++++++++++++++++++++++++++++++++++ > 1 file changed, 207 insertions(+) > create mode 100755 scripts/crc/gen-crc-consts.py > > diff --git a/scripts/crc/gen-crc-consts.py b/scripts/crc/gen-crc-consts.py > new file mode 100755 > index 0000000000000..84f0902e1cd7b > --- /dev/null > +++ b/scripts/crc/gen-crc-consts.py > @@ -0,0 +1,207 @@ > +#!/usr/bin/env python3 > +# SPDX-License-Identifier: GPL-2.0-or-later > +# > +# Script that generates constants for computing the given CRC variant(s). > +# > +# Copyright 2024 Google LLC > +# > +# Author: Eric Biggers <ebiggers@xxxxxxxxxx> > + > +import sys > + > +# XOR (add) an iterable of polynomials. > +def xor(iterable): > + res = 0 > + for val in iterable: > + res ^= val > + return res > + > +# Multiply two polynomials. > +def clmul(a, b): > + return xor(a << i for i in range(b.bit_length()) if (b & (1 << i)) != 0) > + > +# Polynomial division floor(a / b). > +def div(a, b): > + q = 0 > + while a.bit_length() >= b.bit_length(): > + q ^= 1 << (a.bit_length() - b.bit_length()) > + a ^= b << (a.bit_length() - b.bit_length()) > + return q > + > +# Reduce the polynomial 'a' modulo the polynomial 'b'. > +def reduce(a, b): > + return a ^ clmul(div(a, b), b) > + > +# Pretty-print a polynomial. > +def pprint_poly(prefix, poly): > + terms = ['1' if i == 0 else 'x' if i == 1 else f'x^{i}' > + for i in reversed(range(poly.bit_length())) > + if (poly & (1 << i)) != 0] > + j = 0 > + while j < len(terms): > + s = prefix + terms[j] + (' +' if j < len(terms) - 1 else '') > + j += 1 > + while j < len(terms) and len(s) < 72: > + s += ' ' + terms[j] + (' +' if j < len(terms) - 1 else '') > + j += 1 > + print(s) > + prefix = ' * ' + (' ' * (len(prefix) - 3)) > + > +# Reverse the bits of a polynomial. > +def bitreverse(poly, num_bits): > + return xor(1 << (num_bits - 1 - i) for i in range(num_bits) > + if (poly & (1 << i)) != 0) > + > +# Format a polynomial as hex. Bit-reflect it if the CRC is LSB-first. > +def fmt_poly(variant, poly, num_bits): > + if variant.lsb: > + poly = bitreverse(poly, num_bits) > + return f'0x{poly:0{2*num_bits//8}x}' > + > +# Print a comment describing constants generated for the given CRC variant. > +def print_header(variant, what): > + print('/*') > + s = f'{"least" if variant.lsb else "most"}-significant-bit-first CRC-{variant.bits}' > + print(f' * {what} generated for {s} using') > + pprint_poly(' * G(x) = ', variant.G) > + print(' */') > + > +# Print a polynomial as hex, but drop a term if needed to keep it in 64 bits. > +def print_poly_truncate65thbit(variant, poly, num_bits, desc): > + if num_bits > 64: > + assert num_bits == 65 > + if variant.lsb: > + assert (poly & 1) != 0 > + poly >>= 1 > + desc += ' - 1' > + else: > + poly ^= 1 << 64 > + desc += ' - x^64' > + num_bits = 64 > + print(f'\t\t{fmt_poly(variant, poly, num_bits)},\t/* {desc} */') > + > +class CrcVariant: > + def __init__(self, bits, generator_poly, bit_order): > + self.bits = bits > + if bit_order not in ['lsb', 'msb']: > + raise ValueError('Invalid value for bit_order') > + self.lsb = bit_order == 'lsb' > + self.name = f'crc{bits}_{bit_order}_0x{generator_poly:0{(2*bits+7)//8}x}' > + if self.lsb: > + generator_poly = bitreverse(generator_poly, bits) > + self.G = generator_poly ^ (1 << bits) > + > +# Generate tables for byte-at-a-time CRC computation. > +def gen_sliceby1_tables(variants): > + for v in variants: > + print('') > + print_header(v, 'CRC table') > + print(f'static const u{v.bits} __maybe_unused {v.name}_table[256] = {{') > + s = '' > + for i in range(256): > + remainder = (bitreverse(i, 8) if v.lsb else i) << (v.bits - 8) > + for _ in range(8): > + remainder <<= 1 > + if (remainder & (1 << v.bits)) != 0: > + remainder ^= v.G > + next_entry = fmt_poly(v, remainder, v.bits) + ',' > + if len(s + next_entry) > 71: > + print(f'\t{s}') > + s = '' > + s += (' ' if s else '') + next_entry > + if s: > + print(f'\t{s}') > + print('};') > + > +# Generate constants for carryless multiplication based CRC computation. > +def gen_x86_pclmul_consts(variants): > + # These are the distances, in bits, to generate folding constants for. > + FOLD_DISTANCES = [2048, 1024, 512, 256, 128] > + > + for v in variants: > + print('') > + print_header(v, 'CRC folding constants') > + print('static const struct {') > + if not v.lsb: > + print('\tu8 bswap_mask[16];') > + for i in FOLD_DISTANCES: > + print(f'\tu64 fold_across_{i}_bits_consts[2];') > + print('\tu8 shuf_table[48];') > + print('\tu64 barrett_reduction_consts[2];') > + if v.lsb and v.bits < 64: > + print('\tu64 extract_crc_mask[2];') > + print(f'}} {v.name}_consts __cacheline_aligned __maybe_unused = {{') > + > + # Byte-reflection mask, needed for MSB CRCs > + if not v.lsb: > + print('\t.bswap_mask = {' + ', '.join(str(i) for i in reversed(range(16))) + '},') > + > + # Fold constants for all distances down to 128 bits > + k = v.bits - 65 if v.lsb else 0 > + for i in FOLD_DISTANCES: > + print(f'\t.fold_across_{i}_bits_consts = {{') > + for j in [64, 0] if v.lsb else [0, 64]: > + const = reduce(1<<(i+j+k), v.G) > + pow_desc = f'{i}{"+" if j >= 0 else "-"}{abs(j)}' > + if k != 0: > + pow_desc += f'{"+" if k >= 0 else "-"}{abs(k)}' > + print(f'\t\t{fmt_poly(v, const, v.bits)},\t/* x^({pow_desc}) mod G(x) */') > + print('\t},') > + > + # Shuffle table for handling 1..15 bytes at end > + print('\t.shuf_table = {') > + print('\t\t' + (16*'-1, ').rstrip()) > + print('\t\t' + ''.join(f'{i:2}, ' for i in range(16)).rstrip()) > + print('\t\t' + (16*'-1, ').rstrip()) > + print('\t},') > + > + # Barrett reduction constants for reducing 128 bits to the final CRC > + m = 63 if v.lsb else 64 > + print('\t.barrett_reduction_consts = {') > + print_poly_truncate65thbit(v, div(1<<(m+v.bits), v.G), m+1, > + f'floor(x^{m+v.bits} / G(x))') > + print_poly_truncate65thbit(v, v.G, v.bits+1, 'G(x)') > + print('\t},') > + if v.lsb and v.bits < 64: > + print(f'\t.extract_crc_mask = {{0, 0x{(1<<(v.bits))-1:x}}},') > + > + print('};') > + > +def parse_crc_variants(vars_string): > + variants = [] > + for var_string in vars_string.split(','): > + bits, bit_order, generator_poly = var_string.split('_') > + assert bits.startswith('crc') > + bits = int(bits.removeprefix('crc')) > + assert generator_poly.startswith('0x') > + generator_poly = generator_poly.removeprefix('0x') > + assert len(generator_poly) % 2 == 0 > + generator_poly = int(generator_poly, 16) > + variants.append(CrcVariant(bits, generator_poly, bit_order)) > + return variants > + > +if len(sys.argv) != 3: > + sys.stderr.write(f'Usage: {sys.argv[0]} CONSTS_TYPE[,CONSTS_TYPE]... CRC_VARIANT[,CRC_VARIANT]...\n') > + sys.stderr.write(' CONSTS_TYPE can be sliceby1 or x86_pclmul\n') > + sys.stderr.write(' CRC_VARIANT is crc${num_bits}_${bit_order}_${generator_poly_as_hex}\n') > + sys.stderr.write(' E.g. crc16_msb_0x8bb7 or crc32_lsb_0xedb88320\n') > + sys.stderr.write(' Polynomial must use the given bit_order and exclude x^{num_bits}\n') > + sys.exit(1) > + > +print('/* SPDX-License-Identifier: GPL-2.0-or-later */') Does it make sense to add a GPL header into a generated file? > +print('/*') > +print(' * CRC constants generated by:') > +print(' *') > +print(f' *\t{sys.argv[0]} {" ".join(sys.argv[1:])}') > +print(' *') > +print(' * Do not edit manually.') > +print(' */') > +consts_types = sys.argv[1].split(',') > +variants = parse_crc_variants(sys.argv[2]) > +for consts_type in consts_types: > + if consts_type == 'sliceby1': > + gen_sliceby1_tables(variants) > + elif consts_type == 'x86_pclmul': > + gen_x86_pclmul_consts(variants) > + else: > + raise ValueError(f'Unknown consts_type: {consts_type}') > -- > 2.47.0 >