This python script walks the commit sequence imported by svn-fe. For each commit, it tries to identify the branch that was changed. Commits are rewritten to be rooted according to the standard layout. A basic heuristic of matching trees is used to find parents for the first commit in a branch and for tags. Signed-off-by: David Barr <david.barr@xxxxxxxxxxxx> --- contrib/svn-fe/svn-filter-root.py | 107 +++++++++++++++++++++++++++++++++++++ fast-import.c | 9 +++ 2 files changed, 116 insertions(+), 0 deletions(-) create mode 100755 contrib/svn-fe/svn-filter-root.py diff --git a/contrib/svn-fe/svn-filter-root.py b/contrib/svn-fe/svn-filter-root.py new file mode 100755 index 0000000..72d248f --- /dev/null +++ b/contrib/svn-fe/svn-filter-root.py @@ -0,0 +1,107 @@ +#!/usr/bin/python +from subprocess import * +import re +import os + +subroot_re = re.compile("^trunk|^branches/[^/]*|^tags/[^/]*") + +tree_re = re.compile("^tree ([0-9a-f]{40})", flags=re.MULTILINE) +parent_re = re.compile("^parent ([0-9a-f]{40})", flags=re.MULTILINE) +author_re = re.compile("^author (.*)$", flags=re.MULTILINE) +committer_re = re.compile("^committer (.*)$", flags=re.MULTILINE) + +git_svn_id_re = re.compile("^git-svn-id[^@]*", flags=re.MULTILINE) + +ref_commit = {} +tree_commit = {} +count = 1 + +# Open a cat-file process for subtree lookups +subtree_process = Popen(["git","cat-file","--batch-check"], stdin=PIPE, stdout=PIPE) + +# Iterate over commits from subversion imported with svn-fe +revlist = Popen(["git","rev-list","--reverse","--topo-order","--default","HEAD"], stdout=PIPE) +cat_file = Popen(["git","cat-file","--batch"], stdin=revlist.stdout, stdout=PIPE) +object_header = cat_file.stdout.readline().strip().split(" "); +while len(object_header) == 3: + object_body = cat_file.stdout.read(int(object_header[2])) + cat_file.stdout.read(1) + git_commit = object_header[0] + (commit_header, blank_line, commit_message) = object_body.partition("\n\n") + object_header = cat_file.stdout.readline().strip().split(" "); + + author = author_re.search(commit_header).group() + committer = committer_re.search(commit_header).group() + + # Diff against the empty tree if no parent + match = parent_re.search(commit_header) + if match: + parent = match.group(1) + else: + parent = "4b825dc642cb6eb9a060e54bf8d69288fbee4904" + + # Find a common path prefix in the changes for the revision + subroot = "" + changes = Popen(["git","diff","--name-only",parent,git_commit], stdout=PIPE) + for path in changes.stdout: + match = subroot_re.match(path) + if match: + subroot = match.group() + changes.terminate() + break + + # Attempt to rewrite the commit on top of the matching branch + if subroot == "": + print "progress Weird commit - no subroot." + else: + # Rewrite git-svn-id in the log to point to the subtree + commit_message = git_svn_id_re.sub('\g<0>/'+subroot, commit_message) + subtree_process.stdin.write(git_commit+":"+subroot+"\n") + subtree_process.stdin.flush() + subtree_line = subtree_process.stdout.readline() + if re.match("^.*missing$", subtree_line): + print "progress Weird commit - invalid subroot" + continue + subtree = subtree_line[0:40] + # Map the svn tag/branch name to a git-friendly one + ref = "refs/heads/" + re.sub(" ", "%20", subroot) + # Choose a parent for the rewritten commit + if ref in ref_commit: + parent = ref_commit[ref] + elif subtree in tree_commit: + parent = tree_commit[subtree] + else: + parent = "" + # Update tags if necessary + if re.match("^refs/heads/tags/", ref): + if parent == "": + print "progress Weird tag - no matching commit." + else: + tagname = ref[16:] + print "tag "+tagname + print "from "+parent + print "tagger "+committer[10:] + print "data "+str(len(commit_message)) + print commit_message + else: + # Default to trunk if the branch is new + if parent == "" and "refs/heads/trunk" in ref_commit: + parent = ref_commit["refs/heads/trunk"] + print "commit "+ref + print "mark :"+str(count) + print author + print committer + print "data "+str(len(commit_message)) + print commit_message + if parent != "": + print "from "+parent + print "M 040000 "+subtree+" \"\"" + commit = ":"+str(count) + # Advance the matching branch + ref_commit[ref] = commit + # Update latest commit by tree to drive parent matching + tree_commit[subtree] = commit + print "progress " + str(count) + count = count + 1 + +subtree_process.terminate() diff --git a/fast-import.c b/fast-import.c index 2317b0f..8f68a89 100644 --- a/fast-import.c +++ b/fast-import.c @@ -1454,6 +1454,15 @@ static int tree_content_set( n = slash1 - p; else n = strlen(p); + if (!slash1 && !n) { + if (!S_ISDIR(mode)) + die("Root cannot be a non-directory"); + hashcpy(root->versions[1].sha1, sha1); + if (root->tree) + release_tree_content_recursive(root->tree); + root->tree = subtree; + return 1; + } if (!n) die("Empty path component found in input"); if (!slash1 && !S_ISDIR(mode) && subtree) -- 1.7.3.4.g45608.dirty -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html