The marshalled dict in the response given on STDOUT by p4 uses `str` for keys and string values. When run using python3, these values are deserialized as `bytes`, leading to a whole host of problems as the rest of the code assumes `str` is used throughout. An exception is made for the `data` field as it may contain arbitrary binary data that is not text, as well as `depotFile` which may contain text encoded with something other than ASCII or UTF-8. Signed-off-by: Yang Zhao <yang.zhao@xxxxxxxxxxxxxx> --- git-p4.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/git-p4.py b/git-p4.py index 60c73b6a37..ead9d816e1 100755 --- a/git-p4.py +++ b/git-p4.py @@ -36,6 +36,7 @@ unicode = str bytes = bytes basestring = (str,bytes) + use_encoded_streams = True else: # 'unicode' exists, must be Python 2 str = str @@ -643,6 +644,15 @@ def p4CmdList(cmd, stdin=None, stdin_mode='w+b', cb=None, skip_info=False, try: while True: entry = marshal.load(p4.stdout) + if use_encoded_streams: + # Decode unmarshalled dict to use str keys and values, except for: + # - `data` which may contain arbitrary binary data + # - `depotFile` which may contain non-UTF8 encoded text + decoded_entry = {} + for key, value in entry.items(): + key = key.decode() + decoded_entry[key] = value.decode() if not (key in ['data', 'depotFile'] or isinstance(value, str)) else value + entry = decoded_entry if skip_info: if 'code' in entry and entry['code'] == 'info': continue -- 2.24.0.windows.2