Importing a long history from Perforce into git using the git-p4 tool can be especially challenging. The `git p4 clone` operation is based on an all-or-nothing transactionality guarantee. Under real-world conditions like network unreliability or a busy Perforce server, `git p4 clone` and `git p4 sync` operations can easily fail, forcing a user to restart the import process from the beginning. The longer the history being imported, the more likely a fault occurs during the process. Long enough imports thus become statistically unlikely to ever succeed. The underlying git fast-import protocol supports an explicit checkpoint command. The idea here is to optionally allow the user to force an explicit checkpoint every <x> seconds. If the sync/clone operation fails branches are left updated at the appropriate commit available during the latest checkpoint. This allows a user to resume importing Perforce history while only having to repeat at most approximately <x> seconds worth of import activity. Signed-off-by: Ori Rawlings <orirawlings@xxxxxxxxx> --- Documentation/git-p4.txt | 12 ++++++- git-p4.py | 7 ++++- t/t9830-git-p4-checkpoint-period.sh | 59 ++++++++++++++++++++++++++++++- 3 files changed, 78 insertions(+), 0 deletions(-) create mode 100755 t/t9830-git-p4-checkpoint-period.sh diff --git a/Documentation/git-p4.txt b/Documentation/git-p4.txt index c83aaf3..e48ed6d 100644 --- a/Documentation/git-p4.txt +++ b/Documentation/git-p4.txt @@ -252,6 +252,18 @@ Git repository: Use a client spec to find the list of interesting files in p4. See the "CLIENT SPEC" section below. +--checkpoint-period <n>:: + Issue explicit 'checkpoint' commands to the underlying + linkgit:git-fast-import[1] approximately every 'n' seconds. If + syncing or cloning from the Perforce server is interrupted, the + process can be resumed from the most recent checkpoint with a + new 'sync' invocation. This is useful in the situations where a + large amount of changes are being imported over an unreliable + network connection. Explicit checkpoints can take up to several + minutes each, so a suitable value for the checkpoint period is + approximately 1200 seconds. By default, no explicit checkpoints + are performed. + -/ <path>:: Exclude selected depot paths when cloning or syncing. diff --git a/git-p4.py b/git-p4.py index fd5ca52..4c84871 100755 --- a/git-p4.py +++ b/git-p4.py @@ -2244,6 +2244,7 @@ class P4Sync(Command, P4UserMap): optparse.make_option("-/", dest="cloneExclude", action="append", type="string", help="exclude depot path"), + optparse.make_option("--checkpoint-period", dest="checkpointPeriod", type="int", help="Period in seconds between explict git fast-import checkpoints (by default, no explicit checkpoints are performed)"), ] self.description = """Imports from Perforce into a git repository.\n example: @@ -2276,6 +2277,7 @@ class P4Sync(Command, P4UserMap): self.tempBranches = [] self.tempBranchLocation = "refs/git-p4-tmp" self.largeFileSystem = None + self.checkpointPeriod = None if gitConfig('git-p4.largeFileSystem'): largeFileSystemConstructor = globals()[gitConfig('git-p4.largeFileSystem')] @@ -3031,6 +3033,7 @@ class P4Sync(Command, P4UserMap): def importChanges(self, changes): cnt = 1 + self.lastCheckpointTime = time.time() for change in changes: description = p4_describe(change) self.updateOptionDict(description) @@ -3107,6 +3110,10 @@ class P4Sync(Command, P4UserMap): self.initialParent) # only needed once, to connect to the previous commit self.initialParent = "" + + if self.checkpointPeriod >= 0 and time.time() - self.lastCheckpointTime >= self.checkpointPeriod: + self.checkpoint() + self.lastCheckpointTime = time.time() except IOError: print self.gitError.read() sys.exit(1) diff --git a/t/t9830-git-p4-checkpoint-period.sh b/t/t9830-git-p4-checkpoint-period.sh new file mode 100755 index 0000000..6ba4914 --- /dev/null +++ b/t/t9830-git-p4-checkpoint-period.sh @@ -0,0 +1,59 @@ +#!/bin/sh + +test_description='git p4 checkpoint-period tests' + +. ./lib-git-p4.sh + +p4_submit_each () { + for file in $@ + do + echo $file > "$file" && + p4 add "$file" && + p4 submit -d "$file" + done +} + +test_expect_success 'start p4d' ' + start_p4d +' + +test_expect_success 'no explicit checkpoints' ' + cd "$cli" && + p4_submit_each file1 file2 file3 && + git p4 clone --dest="$git" //depot@all && + test_when_finished cleanup_git && + ( + git -C "$git" reflog refs/remotes/p4/master >lines && + test_line_count = 1 lines && + p4_submit_each file4 file5 file6 && + git -C "$git" p4 sync && + git -C "$git" reflog refs/remotes/p4/master >lines && + test_line_count = 2 lines + ) +' + +test_expect_success 'restart p4d' ' + kill_p4d && + start_p4d +' + +test_expect_success 'checkpoint every 0 seconds, i.e. every commit' ' + cd "$cli" && + p4_submit_each file1 file2 file3 && + git p4 clone --dest="$git" --checkpoint-period 0 //depot@all && + test_when_finished cleanup_git && + ( + git -C "$git" reflog refs/remotes/p4/master >lines && + test_line_count = 3 lines && + p4_submit_each file4 file5 file6 && + git -C "$git" p4 sync --checkpoint-period 0 && + git -C "$git" reflog refs/remotes/p4/master >lines && + test_line_count = 6 lines + ) +' + +test_expect_success 'kill p4d' ' + kill_p4d +' + +test_done -- git-series 0.8.10