git-p4 previously contained seperate code-paths for Python 2 and 3 to abstract away the differences in string handling behaviour between the two platforms. This patch removes the Python 2 code-paths within this abstraction without removing the abstractions themselves. These will be removed in later patches to further modernise the script. The motivation for this change is that there is a family of issues with git-p4's handling of incoming text data when it contains bytes which cannot be decoded into UTF-8 characters. For text files created in Windows, CP1252 Smart Quote Characters (0x93 and 0x94) are seen fairly frequently. These codes are invalid in UTF-8, so if the script encounters any file or file name containing them, on Python 2 the symbols will be corrupted, and on Python 3 the script will fail with an exception. In order to address these issues it will be necessary to overhaul git-p4's handling of incoming data. Keeping a clean separation between encoded bytes and decoded text is much easier to do in Python 3. If Python 2 support must be maintained, this will require careful testing of the separate code paths for each platform, which is unreasonable given that Python 2 is now thoroughly deprecated. The minimum supported Python version has been set to 3.6. This version is no longer supported by the Python project, however at the current time it is still available for use in RHEL 8. No features from newer versions of Python are currently required. Signed-off-by: Joel Holdsworth <jholdsworth@xxxxxxxxxx> --- git-p4.py | 90 ++++++++++++++++++------------------------------------- 1 file changed, 29 insertions(+), 61 deletions(-) diff --git a/git-p4.py b/git-p4.py index 2b4500226a..e3fe86e4f2 100755 --- a/git-p4.py +++ b/git-p4.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # # git-p4.py -- A tool for bidirectional operation between a Perforce depot and git. # @@ -16,8 +16,9 @@ # pylint: disable=too-many-branches,too-many-nested-blocks # import sys -if sys.version_info.major < 3 and sys.version_info.minor < 7: - sys.stderr.write("git-p4: requires Python 2.7 or later.\n") +if (sys.version_info.major < 3 or + (sys.version_info.major == 3 and sys.version_info.minor < 6)): + sys.stderr.write("git-p4: requires Python 3.6 or later.\n") sys.exit(1) import os import optparse @@ -36,16 +37,6 @@ import errno import glob -# On python2.7 where raw_input() and input() are both availble, -# we want raw_input's semantics, but aliased to input for python3 -# compatibility -# support basestring in python3 -try: - if raw_input and input: - input = raw_input -except: - pass - verbose = False # Only labels/tags matching this will be imported/exported @@ -176,35 +167,16 @@ def prompt(prompt_text): if response in choices: return response -# We need different encoding/decoding strategies for text data being passed -# around in pipes depending on python version -if bytes is not str: - # For python3, always encode and decode as appropriate - def decode_text_stream(s): - return s.decode() if isinstance(s, bytes) else s - def encode_text_stream(s): - return s.encode() if isinstance(s, str) else s -else: - # For python2.7, pass read strings as-is, but also allow writing unicode - def decode_text_stream(s): - return s - def encode_text_stream(s): - return s.encode('utf_8') if isinstance(s, unicode) else s +def decode_text_stream(s): + return s.decode() if isinstance(s, bytes) else s +def encode_text_stream(s): + return s.encode() if isinstance(s, str) else s def decode_path(path): """Decode a given string (bytes or otherwise) using configured path encoding options """ encoding = gitConfig('git-p4.pathEncoding') or 'utf_8' - if bytes is not str: - return path.decode(encoding, errors='replace') if isinstance(path, bytes) else path - else: - try: - path.decode('ascii') - except: - path = path.decode(encoding, errors='replace') - if verbose: - print('Path with non-ASCII characters detected. Used {} to decode: {}'.format(encoding, path)) - return path + return path.decode(encoding, errors='replace') if isinstance(path, bytes) else path def run_git_hook(cmd, param=[]): """Execute a hook if the hook exists.""" @@ -289,8 +261,8 @@ def write_pipe(c, stdin): def p4_write_pipe(c, stdin): real_cmd = p4_build_cmd(c) - if bytes is not str and isinstance(stdin, str): - stdin = encode_text_stream(stdin) + if isinstance(stdin, str): + stdin = stdin.encode() return write_pipe(real_cmd, stdin) def read_pipe_full(c): @@ -762,21 +734,18 @@ def p4CmdList(cmd, stdin=None, stdin_mode='w+b', cb=None, skip_info=False, result = [] try: while True: - entry = marshal.load(p4.stdout) - if bytes is not str: - # Decode unmarshalled dict to use str keys and values, except for: - # - `data` which may contain arbitrary binary data - # - `depotFile[0-9]*`, `path`, or `clientFile` which may contain non-UTF8 encoded text - decoded_entry = {} - for key, value in entry.items(): - key = key.decode() - if isinstance(value, bytes) and not (key in ('data', 'path', 'clientFile') or key.startswith('depotFile')): - value = value.decode() - decoded_entry[key] = value - # Parse out data if it's an error response - if decoded_entry.get('code') == 'error' and 'data' in decoded_entry: - decoded_entry['data'] = decoded_entry['data'].decode() - entry = decoded_entry + # Decode unmarshalled dict to use str keys and values, except for: + # - `data` which may contain arbitrary binary data + # - `depotFile[0-9]*`, `path`, or `clientFile` which may contain non-UTF8 encoded text + entry = {} + for key, value in marshal.load(p4.stdout).items(): + key = key.decode() + if isinstance(value, bytes) and not (key in ('data', 'path', 'clientFile') or key.startswith('depotFile')): + value = value.decode() + entry[key] = value + # Parse out data if it's an error response + if entry.get('code') == 'error' and 'data' in entry: + entry['data'] = entry['data'].decode() if skip_info: if 'code' in entry and entry['code'] == 'info': continue @@ -3840,14 +3809,13 @@ def openStreams(self): self.gitStream = self.importProcess.stdin self.gitError = self.importProcess.stderr - if bytes is not str: - # Wrap gitStream.write() so that it can be called using `str` arguments - def make_encoded_write(write): - def encoded_write(s): - return write(s.encode() if isinstance(s, str) else s) - return encoded_write + # Wrap gitStream.write() so that it can be called using `str` arguments + def make_encoded_write(write): + def encoded_write(s): + return write(s.encode() if isinstance(s, str) else s) + return encoded_write - self.gitStream.write = make_encoded_write(self.gitStream.write) + self.gitStream.write = make_encoded_write(self.gitStream.write) def closeStreams(self): if self.gitStream is None: -- 2.33.0