Try to decode file paths in responses from p4 as soon as possible so that we are working with unicode string throughout the rest of the flow. This makes python 3 a lot happier. Signed-off-by: Yang Zhao <yang.zhao@xxxxxxxxxxxxxx> --- This is probably the most risky patch out of the set. It's very likely that I've neglected to consider certain corner cases with decoding of path data. git-p4.py | 34 ++++++++++++++++++---------------- 1 file changed, 18 insertions(+), 16 deletions(-) diff --git a/git-p4.py b/git-p4.py index 6821d6aafd..bd693e1404 100755 --- a/git-p4.py +++ b/git-p4.py @@ -650,11 +650,27 @@ def p4CmdList(cmd, stdin=None, stdin_mode='w+b', cb=None, skip_info=False, if use_encoded_streams: # Decode unmarshalled dict to use str keys and values, except for: # - `data` which may contain arbitrary binary data - # - `depotFile` which may contain non-UTF8 encoded text + # - `depotFile` which may contain non-UTF8 encoded text, and is decoded + # according to git-p4.pathEncoding config decoded_entry = {} for key, value in entry.items(): key = key.decode() - decoded_entry[key] = value.decode() if not (key in ['data', 'depotFile'] or isinstance(value, str)) else value + if key == 'data': + pass + elif key == 'depotFile': + try: + value = value.decode('ascii') + except: + encoding = 'utf-8' + if gitConfig('git-p4.pathEncoding'): + encoding = gitConfig('git-p4.pathEncoding') + path = path.decode(encoding, 'replace') + if verbose: + print('Path with non-ASCII characters detected. Used %s to decode: %s ' % (encoding, path)) + elif not isinstance(value, str): + value = value.decode() + + decoded_entry[key] = value entry = decoded_entry if skip_info: if 'code' in entry and entry['code'] == 'info': @@ -2758,24 +2774,11 @@ def writeToGitStream(self, gitMode, relPath, contents): self.gitStream.write(d) self.gitStream.write('\n') - def encodeWithUTF8(self, path): - try: - path.decode('ascii') - except: - encoding = 'utf8' - if gitConfig('git-p4.pathEncoding'): - encoding = gitConfig('git-p4.pathEncoding') - path = path.decode(encoding, 'replace').encode('utf8', 'replace') - if self.verbose: - print('Path with non-ASCII characters detected. Used %s to encode: %s ' % (encoding, path)) - return path - # output one file from the P4 stream # - helper for streamP4Files def streamOneP4File(self, file, contents): relPath = self.stripRepoPath(file['depotFile'], self.branchPrefixes) - relPath = self.encodeWithUTF8(relPath) if verbose: if 'fileSize' in self.stream_file: size = int(self.stream_file['fileSize']) @@ -2858,7 +2861,6 @@ def streamOneP4File(self, file, contents): def streamOneP4Deletion(self, file): relPath = self.stripRepoPath(file['path'], self.branchPrefixes) - relPath = self.encodeWithUTF8(relPath) if verbose: sys.stdout.write("delete %s\n" % relPath) sys.stdout.flush() -- 2.24.0.windows.2