fc-case/fc-case.py | 74 +++++++++++++++++++++++++---------------------------- 1 file changed, 35 insertions(+), 39 deletions(-) New commits: commit 3bb79f0a4ac7977942d75a16a9e7ac9cb353509d Merge: 6e1081d 2bb54d3 Author: Akira TAGOH <akira@xxxxxxxxx> Date: Fri Nov 22 12:52:46 2024 +0000 Merge branch 'issues/433' into 'main' fc-case: Correct the license header of fccase.h Closes #433 See merge request fontconfig/fontconfig!347 commit 2bb54d3b815d9932ed57d875b3077c1c7a4d767d Author: Akira TAGOH <akira@xxxxxxxxx> Date: Fri Nov 22 19:49:27 2024 +0900 fc-case: Correct the license header of fccase.h fccase.h is generated against CaseFolding.txt from Unicode.org. All the data available in this file should be licensed under a terms of use https://www.unicode.org/terms_of_use.html Fixes https://gitlab.freedesktop.org/fontconfig/fontconfig/-/issues/433 diff --git a/fc-case/fc-case.py b/fc-case/fc-case.py index 360bd32..be808b3 100755 --- a/fc-case/fc-case.py +++ b/fc-case/fc-case.py @@ -28,29 +28,32 @@ import argparse import string import sys + class CaseFoldClass(Enum): COMMON = 1 FULL = 2 SIMPLE = 3 TURKIC = 4 + class CaseFoldMethod(Enum): RANGE = 0 EVEN_ODD = 1 FULL = 2 + caseFoldClassMap = { - 'C' : CaseFoldClass.COMMON, - 'F' : CaseFoldClass.FULL, - 'S' : CaseFoldClass.SIMPLE, - 'T' : CaseFoldClass.TURKIC + 'C': CaseFoldClass.COMMON, + 'F': CaseFoldClass.FULL, + 'S': CaseFoldClass.SIMPLE, + 'T': CaseFoldClass.TURKIC } folds = [] def ucs4_to_utf8(ucs4): utf8_rep = [] - + if ucs4 < 0x80: utf8_rep.append(ucs4) bits = -6 @@ -70,17 +73,19 @@ def ucs4_to_utf8(ucs4): utf8_rep.append(((ucs4 >> 30) & 0x01) | 0xFC) bits = 24 else: - return []; + return [] while bits >= 0: utf8_rep.append(((ucs4 >> bits) & 0x3F) | 0x80) - bits-= 6 + bits -= 6 return utf8_rep + def utf8_size(ucs4): return len(ucs4_to_utf8(ucs4)) + case_fold_method_name_map = { CaseFoldMethod.RANGE: 'FC_CASE_FOLD_RANGE,', CaseFoldMethod.EVEN_ODD: 'FC_CASE_FOLD_EVEN_ODD,', @@ -115,7 +120,8 @@ if __name__=='__main__': tokens = line.split('; ') if len(tokens) < 3: - print('Not enough tokens in line {}'.format(cnt), file=sys.stderr) + print('Not enough tokens in line {}'.format(cnt), + file=sys.stderr) sys.exit(1) # Get upper case value @@ -125,14 +131,14 @@ if __name__=='__main__': cfclass = caseFoldClassMap[tokens.pop(0)] # Get list of result characters - lower = list(map(lambda s: int(s,16), tokens.pop(0).split())) + lower = list(map(lambda s: int(s, 16), tokens.pop(0).split())) # print('\t----> {:04X} {} {}'.format(upper, cfclass, lower)) if not minFoldChar: minFoldChar = upper - maxFoldChar = upper; + maxFoldChar = upper if cfclass in [CaseFoldClass.COMMON, CaseFoldClass.FULL]: if len(lower) == 1: @@ -146,18 +152,18 @@ if __name__=='__main__': if foldExtends: # This modifies the last fold item in the array too - fold['count'] = upper - fold['upper'] + 1; + fold['count'] = upper - fold['upper'] + 1 else: fold = {} fold['upper'] = upper - fold['offset'] = lower[0] - upper; + fold['offset'] = lower[0] - upper if fold['offset'] == 1: fold['method'] = CaseFoldMethod.EVEN_ODD else: fold['method'] = CaseFoldMethod.RANGE fold['count'] = 1 folds.append(fold) - expand = utf8_size (lower[0]) - utf8_size(upper) + expand = utf8_size(lower[0]) - utf8_size(upper) else: fold = {} fold['upper'] = upper @@ -185,20 +191,14 @@ if __name__=='__main__': if args.output_file: sys.stdout = open(args.output_file, 'w', encoding='utf-8') - # Read the template file - if args.template_file: - tmpl_file = open(args.template_file, 'r', encoding='utf-8') - else: - tmpl_file = sys.stdin - - # Scan the input until the marker is found - # FIXME: this is a bit silly really, might just as well harcode - # the license header in the script and drop the template - for line in tmpl_file: - if line.strip() == '@@@': - break - print(line, end='') - + print('/*') + print(' * This file was generated against CaseFolding.txt from' + ' Unicode.org.') + print(' * All the data in array is a part of them and licensed' + ' under a terms of use:') + print(' * https://www.unicode.org/terms_of_use.html') + print(' */') + print('') # Dump these tables print('#define FC_NUM_CASE_FOLD\t{}'.format(len(folds))) print('#define FC_NUM_CASE_FOLD_CHARS\t{}'.format(len(foldChars))) @@ -211,14 +211,14 @@ if __name__=='__main__': # Dump out ranges print('static const FcCaseFold fcCaseFold[FC_NUM_CASE_FOLD] = {') for f in folds: - short_offset = f['offset'] - if short_offset < -32367: - short_offset += 65536 - if short_offset > 32368: - short_offset -= 65536 - print(' {} 0x{:08x}, {:22s} 0x{:04x}, {:6d} {},'.format('{', - f['upper'], case_fold_method_name_map[f['method']], - f['count'], short_offset, '}')) + short_offset = f['offset'] + if short_offset < -32367: + short_offset += 65536 + if short_offset > 32368: + short_offset -= 65536 + print(f' {{ 0x{f["upper"]:08x}, ' + f'{case_fold_method_name_map[f["method"]]:22s} ' + f'0x{f["count"]:04x}, {short_offset:6d} }},') print('};\n') # Dump out "other" values @@ -233,8 +233,4 @@ if __name__=='__main__': print('0x{:02x}'.format(c), end=end) print('\n};') - # And flush out the rest of the input file - for line in tmpl_file: - print(line, end='') - sys.stdout.flush()