Kilian Kilger (2): git-p4: fix bug with encoding of p4 client name git-p4: refactoring of p4CmdList() git-p4.py | 51 ++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 42 insertions(+), 9 deletions(-) base-commit: e4a4b31577c7419497ac30cebe30d755b97752c5 Published-As: https://github.com/gitgitgadget/git/releases/tag/pr-git-1285%2Fcohomology%2Fmaint-v3 Fetch-It-Via: git fetch https://github.com/gitgitgadget/git pr-git-1285/cohomology/maint-v3 Pull-Request: https://github.com/git/git/pull/1285 Range-diff vs v2: -: ----------- > 1: 87e7809b75a git-p4: fix bug with encoding of p4 client name 1: 3280a9579bc ! 2: 4a81423f0e8 git-p4: fix bug with encoding of p4 client name @@ Metadata Author: Kilian Kilger <kkilger@xxxxxxxxx> ## Commit message ## - git-p4: fix bug with encoding of p4 client name + git-p4: refactoring of p4CmdList() - The Perforce client name can contain arbitrary characters - which do not decode to UTF-8. Use the fallback strategy - implemented in metadata_stream_to_writable_bytes() also - for the client name. + The function p4CmdList executes a Perforce command and + decodes the marshalled python dictionary. Special care has to be + taken for certain dictionary values which contain non-unicode characters. + The old handling contained separate hacks for each of the corresponding + dictionary keys. This commit tries to refactor the coding to handle the + special cases uniformely. Signed-off-by: Kilian Kilger <kkilger@xxxxxxxxx> @@ git-p4.py: def p4CmdList(cmd, stdin=None, stdin_mode='w+b', cb=None, skip_info=F if bytes is not str: - # Decode unmarshalled dict to use str keys and values, except for: - # - `data` which may contain arbitrary binary data -- # - `desc` or `FullName` which may contain non-UTF8 encoded text handled below, eagerly converted to bytes +- # - `desc` or `client` or `FullName` which may contain non-UTF8 encoded text handled below, eagerly converted to bytes - # - `depotFile[0-9]*`, `path`, or `clientFile` which may contain non-UTF8 encoded text, handled by decode_path() + # Decode unmarshalled dict to use str keys and values. Special cases are handled below. decoded_entry = {} for key, value in entry.items(): key = key.decode() -- if isinstance(value, bytes) and not (key in ('data', 'desc', 'FullName', 'path', 'clientFile') or key.startswith('depotFile')): +- if isinstance(value, bytes) and not (key in ('data', 'desc', 'FullName', 'path', 'clientFile', 'client') or key.startswith('depotFile')): + if isinstance(value, bytes) and p4KeyWhichCanBeDirectlyDecoded(key): value = value.decode() decoded_entry[key] = value @@ git-p4.py: def p4CmdList(cmd, stdin=None, stdin_mode='w+b', cb=None, skip_info=F continue - if 'desc' in entry: - entry['desc'] = metadata_stream_to_writable_bytes(entry['desc']) +- if 'client' in entry: +- entry['client'] = metadata_stream_to_writable_bytes(entry['client']) - if 'FullName' in entry: - entry['FullName'] = metadata_stream_to_writable_bytes(entry['FullName']) + for key in p4KeysContainingNonUtf8Chars(): -- gitgitgadget