The python script attached is a try at providing a sane conversion from Darcs to GIT. It tries to map darcs conflict resolutions onto git branch merges. Regarding GFI, it's a breeze to work with; my compliments to its author. My only gripe is the need to specify a branch for each commit. Darcs uses changeset based storage. It doesn't really have branches, but it does record divergent changes and merges of resulting conflicts. Hence, it's not clear which refs/head/BRANCH should be used when creating a commit object. I found it easiest to write each commit to a refs/head/darcs-tmp-COUNT branch, use the reset command to specify at the end which commits are tops of branches, and delete the temporary branches. So, my feature request: please make the "commit" command always accept a "from" command, and make the "refs" argument optional. This will cleanup my converter, and separate out two logical functions of the gfi "commit" command: creating a commit object, and advancing the head ref. -- Han-Wen Nienhuys - hanwen@xxxxxxxxx - http://www.xs4all.nl/~hanwen
import os import sys import time import xml.dom.minidom import re import gdbm as dbmodule import gzip import optparse ################################################################ # globals silent=False mail_to_name_dict = {} pending_patches = {} used_tags = {} ################################################################ # utils class PullConflict (Exception): pass class CommandFailed (Exception): pass def progress (s): sys.stderr.write (s + '\n') def get_cli_options (): p = optparse.OptionParser () p.usage='''darcs2git [OPTIONS] DARCS-REPO''' p.description='''Convert darcs repo to git. This tool is a one shot conversion utility for Darcs repositories. It requires Git version that has git-fast-import. It does not support incremental updating. This tool will import the patches in chronological order, and only creates merges when a resolved conflict is detected. TODO: - correct time zone handling - ''' def update_map (option, opt, value, parser): for l in open (value).readlines (): (mail, name) = tuple (l.strip ().split ('=')) mail_to_name_dict[mail] = name p.add_option ('-a', '--authors', action='callback', callback=update_map, type='string', nargs=1, help='read a text file, containing EMAIL=NAME lines') p.add_option ('-d', '--destination', action='store', type='string', default='', dest='target_git_repo', help='where to put the resulting Git repo.') p.add_option ('--verbose', action='store_true', dest='verbose', default=False, help='show commands as they are invoked') options, args = p.parse_args () if not args: p.print_help () sys.exit (2) global silent silent = not options.verbose if not options.target_git_repo: p = args[0] p = os.path.abspath (p) options.target_git_repo = os.path.basename (p).replace ('.darcs', '') options.target_git_repo += '.git' return (options, args) def read_pipe (cmd, ignore_errors=False): if not silent: progress ('pipe %s' % cmd) pipe = os.popen (cmd) val = pipe.read () if pipe.close () and not ignore_errors: raise CommandFailed ("Pipe failed: %s" % cmd) return val def system (c, ignore_error=0): if not silent: progress ( c) if os.system (c) and not ignore_error: raise CommandFailed ("Command failed: %s" % c) def darcs_date_to_git (x): t = time.strptime (x, '%Y%m%d%H%M%S') return '%d' % int (time.mktime (t)) def darcs_timezone (x) : time.strptime (x, '%a %b %d %H:%M:%S %Z %Y') # todo return "+0100" ################################################################ # darcs class DarcsConversionRepo: def __init__ (self, dir, patches): self.dir = dir self.patches = patches def clean (self): system ('rm -rf %s' % self.dir) def pull (self, patch): id = patch.attributes['hash'] source_repo = patch.dir dir = self.dir system ('cd %(dir)s && darcs pull --quiet --all --match "hash %(id)s" %(source_repo)s ' % locals ()) def go_from_to (self, from_patch, to_patch): """Move the repo to FROM_PATCH, then go to TO_PATCH. Raise PullConflict if conflict is detected This uses the fishy technique of writing the inventory and constructing the pristine tree with 'darcs repair' It might be quicker and/or more correct to wind/rewind the repo with pull and unpull. """ dir = os.path.abspath (self.dir) system ('rm -rf %(dir)s && mkdir %(dir)s && darcs init --repo %(dir)s' % locals ()) source = to_patch.dir if from_patch: iv = open (dir + '/_darcs/inventory', 'w') for p in self.patches[:from_patch.number+1]: os.link (p.filename (), dir + '/_darcs/patches/' + os.path.basename (p.filename ())) iv.write (p.header ()) iv.close () progress ('Go to patch %d' % from_patch.number) system ('cd %(dir)s && darcs repair --quiet' % locals ()) system ('rsync -a %(dir)s/_darcs/pristine/ %(dir)s/' % locals ()) try: self.pull (to_patch) success = 'No conflicts to resolve' in read_pipe ('cd %(dir)s && echo y|darcs resolve' % locals ()) except CommandFailed: raise PullConflict () if not success: raise PullConflict () def has_patch (self, p): id = p.attributes['hash'] f = self.dir + '/_darcs/patches/' + id return os.path.exists (f) def pristine_tree (self): return self.dir + '/_darcs/pristine' class DarcsPatch: def __init__ (self, xml, dir): self.xml = xml self.dir = dir self.number = -1 self.attributes = {} for (nm, value) in xml.attributes.items(): self.attributes[nm] = value # fixme: ugh attributes vs. methods. self.extract_author () self.extract_message () self.extract_time () def filename (self): return self.dir + '/_darcs/patches/' + self.attributes['hash'] def contents (self): f = gzip.open (self.filename ()) return f.read () def header (self): lines = self.contents ().split ('\n') name = lines[0] committer = lines[1] + '\n' committer = re.sub ('] {\n$', ']\n', committer) committer = re.sub ('] *\n$', ']\n', committer) comment = '' if not committer.endswith (']\n'): for l in lines[2:]: if l[0] == ']': comment += ']\n' break comment += l + '\n' header = name + '\n' + committer if comment: header += comment return header def extract_author (self): mail = self.attributes['author'] name = '' m = re.search ("^(.*) <(.*)>$", mail) if m: name = m.group (1) mail = m.group (2) else: try: name = mail_to_name_dict[mail] except KeyError: name = mail.split ('@')[0] self.author_name = name self.author_mail = mail def extract_time (self): self.date = darcs_date_to_git (self.attributes['date']) + ' ' + darcs_timezone (self.attributes['local_date']) def name (self): patch_name = '(no comment)' try: name_elt = self.xml.getElementsByTagName ('name')[0] patch_name = name_elt.childNodes[0].data except IndexError: pass return patch_name def extract_message (self): patch_name = self.name () comment_elts = self.xml.getElementsByTagName ('comment') comment = '' if comment_elts: comment = comment_elts[0].childNodes[0].data if self.attributes['inverted'] == 'True': patch_name = 'UNDO: ' + patch_name self.message = '%s\n\n%s' % (patch_name, comment) def tag_name (self): patch_name = self.name () if patch_name.startswith ("TAG "): tag = patch_name[4:] tag = re.sub (r'\s', '_', tag).strip () tag = re.sub (r':', '_', tag).strip () return tag return '' def get_darcs_patches (darcs_repo): progress ('reading patches.') xml_string = read_pipe ('darcs changes --xml --reverse --repo ' + darcs_repo) dom = xml.dom.minidom.parseString(xml_string) xmls = dom.documentElement.getElementsByTagName('patch') patches = [DarcsPatch (x, darcs_repo) for x in xmls] n = 0 for p in patches: p.number = n n += 1 return patches ################################################################ # GIT export def export_tree (tree, gfi): tree = os.path.normpath (tree) gfi.write ('deleteall\n') for (root, dirs, files) in os.walk (tree): for f in files: rf = os.path.normpath (os.path.join (root, f)) s = open (rf).read () rf = rf.replace (tree + '/', '') gfi.write ('M 644 inline %s\n' % rf) gfi.write ('data %d\n%s\n' % (len (s), s)) gfi.write ('\n') def export_commit (repo, patch, last_patch, gfi): gfi.write ('commit refs/heads/darcstmp%d\n' % patch.number) gfi.write ('mark :%d\n' % (patch.number + 1)) gfi.write ('committer %s <%s> %s\n' % (patch.author_name, patch.author_mail, patch.date)) gfi.write ('data %d\n%s\n' % (len (patch.message), patch.message)) if last_patch: gfi.write ('from :%d\n' % (last_patch.number + 1)) if pending_patches.has_key (last_patch.number): del pending_patches[last_patch.number] for (n, p) in pending_patches.items (): if repo.has_patch (p): gfi.write ('merge :%d\n' % (n + 1)) del pending_patches[n] pending_patches[patch.number] = patch export_tree (repo.pristine_tree (), gfi) def export_pending (gfi): if len (pending_patches.items ()) == 1: gfi.write ('reset refs/heads/master\n') gfi.write ('from :%d\n\n' % (pending_patches.values()[0].number+1)) return for (n, p) in pending_patches.items (): gfi.write ('reset refs/heads/master%d\n' % n) gfi.write ('from :%d\n\n' % (n+1)) patches = pending_patches.values() patch = patches[0] gfi.write ('commit refs/heads/master\n') gfi.write ('committer %s <%s> %s\n' % (patch.author_name, patch.author_mail, patch.date)) msg = 'tie together' gfi.write ('data %d\n%s\n' % (len(msg), msg)) gfi.write ('from :%d\n' % (patch.number + 1)) for p in patches[1:]: gfi.write ('merge :%d\n' % (p.number + 1)) gfi.write ('\n') def export_tag (patch, gfi): gfi.write ('tag %s\n' % patch.tag_name ()) gfi.write ('from :%d\n' % (patch.number + 1)) gfi.write ('tagger %s <%s> %s\n' % (patch.author_name, patch.author_mail, patch.date)) gfi.write ('data %d\n%s\n' % (len (patch.message), patch.message)) ################################################################ # main. def main (): (options, args) = get_cli_options () darcs_repo = os.path.abspath (args[0]) git_repo = os.path.abspath (options.target_git_repo) system ('mkdir %(git_repo)s && cd %(git_repo)s && git --bare init' % locals ()) os.environ['GIT_DIR'] = git_repo gfi = os.popen ('git-fast-import', 'w') # patches = get_darcs_patches (darcs_repo) conv_repo = DarcsConversionRepo ("darcs2git.tmpdarcs", patches) for p in patches: parent = p.number - 1 last = None while 1: if parent >= 0: last = patches[parent] try: conv_repo.go_from_to (last, p) break except PullConflict: ## simplistic, may not be enough. progress ('conflict, going one back') parent -= 1 if parent < 0: raise Exception('urg') progress ('Export %d -> %d (total %d)' % (parent, p.number, len (patches))) export_commit (conv_repo, p, last, gfi) if p.tag_name (): export_tag (p, gfi) export_pending (gfi) gfi.close () system ('rm %(git_repo)s/refs/heads/darcstmp*' % locals ()) conv_repo.clean () main ()