The marshalled dict in the response given on STDOUT by p4 uses `str` for keys and string values. When run using python3, these values are deserialized as `bytes`, leading to a whole host of problems as the rest of the code assumes `str` is used throughout. This patch changes the deserialization behaviour such that, as much as possible, text output from p4 is decoded to native unicode strings. Exceptions are made for the field `data` as it is usually arbitrary binary data. `depotFile[0-9]*`, `path`, and `clientFile` are also exempt as they contain path information which may not be UTF-8 encoding compatible, and must survive round-trip back to p4. Signed-off-by: Yang Zhao <yang.zhao@xxxxxxxxxxxxxx> SQUASH: use unicode string internally throughout --- git-p4.py | 61 ++++++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 45 insertions(+), 16 deletions(-) diff --git a/git-p4.py b/git-p4.py index ebeef35a92..6720c7b24a 100755 --- a/git-p4.py +++ b/git-p4.py @@ -157,6 +157,19 @@ def die(msg): sys.stderr.write(msg + "\n") sys.exit(1) +# We need different encoding/decoding strategies for text data being passed +# around in pipes depending on python version +if sys.version_info.major >= 3: + def decode_text_stream(s): + return s.decode() if isinstance(s, bytes) else s + def encode_text_stream(s): + return s.encode() if isinstance(s, str) else s +else: + def decode_text_stream(s): + return s + def encode_text_stream(s): + return s.encode('utf_8') if isinstance(s, unicode) else s + def write_pipe(c, stdin): if verbose: sys.stderr.write('Writing pipe: %s\n' % str(c)) @@ -186,7 +199,7 @@ def read_pipe_full(c): expand = isinstance(c,basestring) p = subprocess.Popen(c, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=expand) (out, err) = p.communicate() - return (p.returncode, out, err) + return (p.returncode, out, decode_text_stream(err)) def read_pipe(c, ignore_error=False): """ Read output from command. Returns the output text on @@ -209,11 +222,11 @@ def read_pipe_text(c): if retcode != 0: return None else: - return out.rstrip() + return decode_text_stream(out).rstrip() -def p4_read_pipe(c, ignore_error=False): +def p4_read_pipe(c, ignore_error=False, raw=False): real_cmd = p4_build_cmd(c) - return read_pipe(real_cmd, ignore_error) + return read_pipe(real_cmd, ignore_error, raw=raw) def read_pipe_lines(c): if verbose: @@ -222,7 +235,7 @@ def read_pipe_lines(c): expand = isinstance(c, basestring) p = subprocess.Popen(c, stdout=subprocess.PIPE, shell=expand) pipe = p.stdout - val = pipe.readlines() + val = [decode_text_stream(line) for line in pipe.readlines()] if pipe.close() or p.wait(): die('Command failed: %s' % str(c)) @@ -253,6 +266,7 @@ def p4_has_move_command(): cmd = p4_build_cmd(["move", "-k", "@from", "@to"]) p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) (out, err) = p.communicate() + err = decode_text_stream(err) # return code will be 1 in either case if err.find("Invalid option") >= 0: return False @@ -633,6 +647,20 @@ def p4CmdList(cmd, stdin=None, stdin_mode='w+b', cb=None, skip_info=False, try: while True: entry = marshal.load(p4.stdout) + if bytes is not str: + # Decode unmarshalled dict to use str keys and values, except for: + # - `data` which may contain arbitrary binary data + # - `depotFile[0-9]*`, `path`, or `clientFile` which may contain non-UTF8 encoded text + decoded_entry = {} + for key, value in entry.items(): + key = key.decode() + if isinstance(value, bytes) and not (key in ('data', 'path', 'clientFile') or key.startswith('depotFile')): + value = value.decode() + decoded_entry[key] = value + # Parse out data if it's an error response + if decoded_entry.get('code') == 'error' and 'data' in decoded_entry: + decoded_entry['data'] = decoded_entry['data'].decode() + entry = decoded_entry if skip_info: if 'code' in entry and entry['code'] == 'info': continue @@ -850,6 +878,7 @@ def branch_exists(branch): cmd = [ "git", "rev-parse", "--symbolic", "--verify", branch ] p = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) out, _ = p.communicate() + out = decode_text_stream(out) if p.returncode: return False # expect exactly one line of output: the branch name @@ -1993,7 +2022,7 @@ def applyCommit(self, id): tmpFile = os.fdopen(handle, "w+b") if self.isWindows: submitTemplate = submitTemplate.replace("\n", "\r\n") - tmpFile.write(submitTemplate) + tmpFile.write(encode_text_stream(submitTemplate)) tmpFile.close() if self.prepare_p4_only: @@ -2040,11 +2069,11 @@ def applyCommit(self, id): if self.edit_template(fileName): # read the edited message and submit tmpFile = open(fileName, "rb") - message = tmpFile.read() + message = decode_text_stream(tmpFile.read()) tmpFile.close() if self.isWindows: message = message.replace("\r\n", "\n") - submitTemplate = message[:message.index(separatorLine)] + submitTemplate = encode_text_stream(message[:message.index(separatorLine)]) if update_shelve: p4_write_pipe(['shelve', '-r', '-i'], submitTemplate) @@ -2145,7 +2174,7 @@ def exportGitTags(self, gitTags): print("Not creating p4 label %s for tag due to option" \ " --prepare-p4-only" % name) else: - p4_write_pipe(["label", "-i"], labelTemplate) + p4_write_pipe(["label", "-i"], encode_text_stream(labelTemplate)) # Use the label p4_system(["tag", "-l", name] + @@ -2469,7 +2498,7 @@ def append(self, view_line): def convert_client_path(self, clientFile): # chop off //client/ part to make it relative - if not clientFile.startswith(self.client_prefix): + if not decode_path(clientFile).startswith(self.client_prefix): die("No prefix '%s' on clientFile '%s'" % (self.client_prefix, clientFile)) return clientFile[len(self.client_prefix):] @@ -2729,7 +2758,7 @@ def splitFilesIntoBranches(self, commit): return branches def writeToGitStream(self, gitMode, relPath, contents): - self.gitStream.write('M %s inline %s\n' % (gitMode, relPath)) + self.gitStream.write(encode_text_stream(u'M {} inline {}\n'.format(gitMode, relPath))) self.gitStream.write('data %d\n' % sum(len(d) for d in contents)) for d in contents: self.gitStream.write(d) @@ -2770,7 +2799,7 @@ def streamOneP4File(self, file, contents): git_mode = "120000" # p4 print on a symlink sometimes contains "target\n"; # if it does, remove the newline - data = ''.join(contents) + data = ''.join(decode_text_stream(c) for c in contents) if not data: # Some version of p4 allowed creating a symlink that pointed # to nothing. This causes p4 errors when checking out such @@ -2824,7 +2853,7 @@ def streamOneP4File(self, file, contents): pattern = p4_keywords_regexp_for_type(type_base, type_mods) if pattern: regexp = re.compile(pattern, re.VERBOSE) - text = ''.join(contents) + text = ''.join(decode_text_stream(c) for c in contents) text = regexp.sub(r'$\1$', text) contents = [ text ] @@ -2839,7 +2868,7 @@ def streamOneP4Deletion(self, file): if verbose: sys.stdout.write("delete %s\n" % relPath) sys.stdout.flush() - self.gitStream.write("D %s\n" % relPath) + self.gitStream.write(encode_text_stream(u'D {}\n'.format(relPath))) if self.largeFileSystem and self.largeFileSystem.isLargeFile(relPath): self.largeFileSystem.removeLargeFile(relPath) @@ -2939,9 +2968,9 @@ def streamP4FilesCbSelf(entry): if 'shelved_cl' in f: # Handle shelved CLs using the "p4 print file@=N" syntax to print # the contents - fileArg = '%s@=%d' % (f['path'], f['shelved_cl']) + fileArg = f['path'] + encode_text_stream('@={}'.format(f['shelved_cl'])) else: - fileArg = '%s#%s' % (f['path'], f['rev']) + fileArg = f['path'] + encode_text_stream('#{}'.format(f['rev'])) fileArgs.append(fileArg) -- 2.21.0.windows.1