Our Perforce server experienced some kind of database corruption a few years ago. While the file data and revision history are mostly intact, some metadata for several changesets got lost. For example, inspecting certain changelists produces errors. """ $ p4 describe -s 12345 Date 2019/02/26 16:46:17: Operation: user-describe Operation 'user-describe' failed. Change 12345 description missing! """ While some metadata (like changeset descriptions) is obviously lost, most of it can be reconstructed via other commands: * `p4 changes -l -t //...@12345,12345` -- to obtain date+time, author, beginning of changeset description; * `p4 files -a //...@12345,12345` -- to obtain file revisions, file types, file actions; * `p4 diff2 -u //...@12344 //...@12345` -- to get a unified diff of text files in a changeset; * `p4 print -o binary.blob@12345 //depot/binary.blob@12345` -- to get a revision of a binary file. It might be possible to teach git-p4 to fallback to other methods if `p4 describe` fails, but it's probably too special-cased (really depends on kind and scale of DB corruption), so some manual intervention is perhaps acceptable. So, with some manual work, it's possible to reconstruct `p4 -G describe ...` output manually. In our case, once git-p4 passes `p4 describe` stage, it can proceed further just fine. Thus, it's tempting to feed resurrected metadata to git-p4 when a normal `p4 describe` would fail. This functionality may be useful to cache changelist information, or to make some changes to changelist info before feeding it to git-p4. A new config parameter is introduced to tell git-p4 to load certain changelist descriptions from files instead of from a server. For simplicity, it's one marshalled file per changelist. ``` git config --add git-p4.changelistDescriptionFile 12345.marshal git config --add git-p4.changelistDescriptionFile 12346.marshal ``` The following trivial script may be used to produce marshalled `p4 -G describe`-compatible output. """ #!/usr/bin/env python import marshal import time # recovered commits of interest changes = [ { 'change': '12345', 'status': 'submitted', 'code': 'stat', 'user': 'username1', 'time': str(int(time.mktime(time.strptime('2019/02/28 16:00:30', '%Y/%m/%d %H:%M:%S')))), 'client': 'username1_hostname1', 'desc': 'A bug is fixed.\nDetails are below:<lost>\n', 'depotFile0': '//depot/branch1/foo.sh', 'action0': 'edit', 'rev0': '28', 'type0': 'xtext', 'depotFile1': '//depot/branch1/bar.py', 'action1': 'edit', 'rev1': '43', 'type1': 'text', 'depotFile2': '//depot/branch1/baz.doc', 'action2': 'edit', 'rev2': '8', 'type2': 'binary', 'depotFile3': '//depot/branch1/qqq.c', 'action3': 'edit', 'rev3': '6', 'type3': 'ktext', }, ] for change in changes: marshal.dump(change, open('{0}.marshal'.format(change['change']), 'wb')) """ Or, the following script may be used to produce marshalled `p4 -G describe`-compatible output for our particular database corruption. """ #!/usr/bin/env python import itertools import marshal import subprocess import tempfile def p4_unmarshal(fileobj): result = [] while True: try: result += [marshal.load(fileobj)] except EOFError: break return result def p4_describe_fallback(cl): with tempfile.TemporaryFile() as p4_changes_output: with tempfile.TemporaryFile() as p4_files_output: subprocess.check_call(['p4', '-G', 'changes', '-l', '-t', '//...@{0},{0}'.format(cl)], stdout=p4_changes_output) subprocess.check_call(['p4', '-G', 'files', '-a', '//...@{0},{0}'.format(cl)], stdout=p4_files_output) p4_changes_output.seek(0) p4_files_output.seek(0) p4_changes_unmarshalled = p4_unmarshal(p4_changes_output) p4_files_unmarshalled = p4_unmarshal(p4_files_output) described_cl = p4_changes_unmarshalled[0] # there is usually only one entry described_cl['desc'] += '<lost>\n' assert described_cl['change'] == str(cl) for (file_info, i) in itertools.izip(p4_files_unmarshalled, itertools.count()): for f in ('depotFile', 'action', 'rev', 'type'): described_cl['{}{}'.format(f, i)] = file_info[f] assert file_info['change'] == described_cl['change'] return described_cl cls_wanted = ( 12345, 12346 ) for cl in cls_wanted: with open('{0}.marshal'.format(cl), 'wb') as f: cl_info = p4_describe_fallback(cl) marshal.dump(cl_info, f) """ Signed-off-by: Andrey Mazo <amazo@xxxxxxxxxxxxxx> --- Documentation/git-p4.txt | 27 +++++++++++++++++++++++++++ git-p4.py | 24 +++++++++++++++++++++++- 2 files changed, 50 insertions(+), 1 deletion(-) diff --git a/Documentation/git-p4.txt b/Documentation/git-p4.txt index ceabab8b86..f751ae729f 100644 --- a/Documentation/git-p4.txt +++ b/Documentation/git-p4.txt @@ -654,10 +654,37 @@ git config --add git-p4.mapUser "p4user = First Last <mail@xxxxxxxxxxx>" ------------- + A mapping will override any user information from P4. Mappings for multiple P4 user can be defined. +git-p4.changelistDescriptionFile:: + This config variable points 'git p4' to a file, + containing a serialized (marshalled) changelist description. + 'git p4' loads a description from such a file + instead of asking Perforce server about it. + The file format is the same as produced by 'p4 -G describe -s <changelist>'. + This option can be specified multiple times + to feed multiple changelist descriptions to 'git p4'. + The path is relative to git work tree, + file names or extensions don't matter. + This example loads 2 changelist descriptions: ++ +------------- +git config --add git-p4.changelistDescriptionFile cl-12345.marshal +git config --add git-p4.changelistDescriptionFile cl-12347.marshal +------------- ++ +Under some circumstances (for example, Perforce database corruption) +this option is useful to supply changelist description to 'git p4' bypassing 'p4'. +Also, it can be used for caching of changelist descriptions +to reduce load on the Perforce server in case of successive imports +(say, when splitting the depot into multiple Git repositories) +or for overriding some changelist information. +This config variable is generally not needed after the initial import, +so it can be removed from the config file +together with corresponding description files after the import. + Submit variables ~~~~~~~~~~~~~~~~ git-p4.detectRenames:: Detect renames. See linkgit:git-diff[1]. This can be true, false, or a score as expected by 'git diff -M'. diff --git a/git-p4.py b/git-p4.py index 98b2b7bbca..e65df92d75 100755 --- a/git-p4.py +++ b/git-p4.py @@ -2613,10 +2613,12 @@ def __init__(self): self.initialParents = {} self.tz = "%+03d%02d" % (- time.timezone / 3600, ((- time.timezone % 3600) / 60)) self.labels = {} + self.loadedChangelistDescriptions = {} + # Force a checkpoint in fast-import and wait for it to finish def checkpoint(self): self.gitStream.write("checkpoint\n\n") self.gitStream.write("progress checkpoint\n\n") out = self.gitOutput.readline() @@ -3319,10 +3321,25 @@ def getBranchMappingFromGitBranches(self): branch = "main" else: branch = branch[len(self.projectName):] self.knownBranches[branch] = branch + def loadChangelistDescFromFile(self): + changelistDescriptionFiles = gitConfigList("git-p4.changelistDescriptionFile") + for clMarshalledDescFile in changelistDescriptionFiles: + if not clMarshalledDescFile: + continue + + try: + with open(clMarshalledDescFile, 'rb') as clFileObj: + clDesc = marshal.load(clFileObj) + if not ("status" in clDesc and "user" in clDesc and "time" in clDesc and "change" in clDesc): + die("Changelist description read from {0} doesn't have required fields".format(clMarshalledDescFile)) + self.loadedChangelistDescriptions[int(clDesc["change"])] = clDesc + except (IOError, TypeError, ValueError, EOFError) as e: + die("Can't read changelist description from {0}: {1}".format(clMarshalledDescFile, str(e))) + def updateOptionDict(self, d): option_keys = {} if self.keepRepoPath: option_keys['keepRepoPath'] = 1 @@ -3420,11 +3437,14 @@ def searchParent(self, parent, branch, target): return None def importChanges(self, changes, origin_revision=0): cnt = 1 for change in changes: - description = p4_describe(change) + if change in self.loadedChangelistDescriptions: + description = self.loadedChangelistDescriptions[change] + else: + description = p4_describe(change) self.updateOptionDict(description) if not self.silent: sys.stdout.write("\rImporting revision %s (%s%%)" % (change, cnt * 100 / len(changes))) sys.stdout.flush() @@ -3706,10 +3726,12 @@ def run(self, args): bad_changesfile = True break if bad_changesfile: die("Option --changesfile is incompatible with revision specifiers") + self.loadChangelistDescFromFile() + newPaths = [] for p in self.depotPaths: if p.find("@") != -1: atIdx = p.index("@") self.changeRange = p[atIdx:] -- 2.19.2