On Fri, Mar 06, 2015 at 12:24:57PM -0700, Kevin Fenzi wrote: > ...and we discovered a problem with this version... > > Look for a revised one soon. ;( > > kevin Allright, here's a fixed version. We discussed it further in IRC, but this simplifies the method of shelling out to be less error prone but also introduces some more complexity with a threadpool. On its own, the new method of assessing existing branches took something like 900 seconds to complete. The threadpool cuts that down significantly to 120 seconds. --- pkgdb_sync_git_branches.py.orig 2015-03-06 15:49:32.421638745 -0500 +++ pkgdb_sync_git_branches.py.mine 2015-03-06 16:07:19.308145295 -0500 @@ -26,8 +26,10 @@ """ +import multiprocessing.pool import os import subprocess +import time import requests @@ -52,6 +54,7 @@ MKBRANCH = '/usr/local/bin/mkbranch' SETUP_PACKAGE = '/usr/local/bin/setup_git_package' +THREADS = 20 VERBOSE = False @@ -63,7 +66,7 @@ pass -def _invoke(program, args): +def _invoke(program, args, cwd=None): '''Run a command and raise an exception if an error occurred. :arg program: The program to invoke @@ -75,27 +78,25 @@ cmdLine.extend(args) if VERBOSE: print ' '.join(cmdLine) + print ' in', cwd - if VERBOSE: - program = subprocess.Popen( - cmdLine, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) - else: - program = subprocess.Popen(cmdLine, stderr=subprocess.STDOUT) + program = subprocess.Popen( + cmdLine, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, cwd=cwd) + + stdout, stderr = program.communicate() - retCode = program.wait() - if retCode != 0: + if program.returncode != 0: e = ProcessError() - e.returnCode = retCode + e.returnCode = program.returncode e.cmd = ' '.join(cmdLine) - if VERBOSE: - output = program.stdout.read() - e.message = 'Error, "%s" returned %s: %s' % ( - e.cmd, e.returnCode, output) - print e.message - else: - e.message = 'Error, "%s" returned %s' % (e.cmd, e.returnCode) + e.cwd = cwd + e.message = 'Error, "%s" (in %r) returned %s\n stdout: %s\n stderr: %s' % ( + e.cmd, e.cwd, e.returnCode, stdout, stderr) + print e.message raise e + return stdout.strip() + def _create_branch(pkgname, branch): '''Create a specific branch for a package. @@ -104,34 +105,33 @@ :arg branch: Name of the branch to create ''' + branch = branch.replace('*', '').strip() if branch == 'master': print 'ERROR: Proudly refusing to create master branch. Invalid repo?' print 'INFO: Please check %s repo' % pkgname return - branchpath = os.path.join( - GIT_FOLDER, '%s.git' % pkgname, 'refs/heads', branch) - if not os.path.exists(branchpath): - try: - _invoke(MKBRANCH, [branch, pkgname]) - except ProcessError, e: - if e.returnCode == 255: - # This is a warning, not an error - return - raise - finally: - fedmsg.publish( - topic='branch', - modname='git', - msg=dict( - agent='pkgdb', - name=pkgname, - branch=branch, - ), - ) - elif VERBOSE: - print 'Was asked to create branch %s of package %s, but it '\ - 'already exists' % (pkgname, branch) + branches = get_git_branch(pkgname) + if branch in branches: + print 'ERROR: Refusing to create a branch %s that exists' % branch + return + + try: + _invoke(MKBRANCH, [branch, pkgname]) + fedmsg.publish( + topic='branch', + modname='git', + msg=dict( + agent='pkgdb', + name=pkgname, + branch=branch, + ), + ) + except ProcessError, e: + if e.returnCode == 255: + # This is a warning, not an error + return + raise def pkgdb_pkg_branch(): @@ -167,8 +167,11 @@ print 'Could not find %s' % git_folder return set() - head_folder = os.path.join(git_folder, 'refs', 'heads') - return set(os.listdir(head_folder)) + branches = [ + lclbranch.replace('*', '').strip() + for lclbranch in _invoke('git', ['branch'], cwd=git_folder).split('\n') + ] + return set(branches) def branch_package(pkgname, branches): @@ -182,10 +185,11 @@ print 'Fixing package %s for branches %s' % (pkgname, branches) # Create the devel branch if necessary - if not os.path.exists( - os.path.join(GIT_FOLDER, '%s.git' % pkgname)): + exists = os.path.exists(os.path.join(GIT_FOLDER, '%s.git' % pkgname)) + if not exists or 'master' not in get_git_branch(pkgname): _invoke(SETUP_PACKAGE, [pkgname]) - branches.remove('master') # SETUP_PACKAGE creates master + if 'master' in branches: + branches.remove('master') # SETUP_PACKAGE creates master fedmsg.publish( topic='branch', modname='git', @@ -209,10 +213,12 @@ local_pkgs = set(os.listdir(GIT_FOLDER)) local_pkgs = set([it.replace('.git', '') for it in local_pkgs]) + print "Found %i local packages" % len(local_pkgs) pkgdb_info = pkgdb_pkg_branch() pkgdb_pkgs = set(pkgdb_info.keys()) + print "Found %i pkgdb packages" % len(pkgdb_pkgs) ## Commented out as we keep the git of retired packages while they won't ## show up in the information retrieved from pkgdb. @@ -225,19 +231,36 @@ print 'Some packages are present in pkgdb but not locally:' print ', '.join(sorted(pkgdb_pkgs - local_pkgs)) + + print "Finding the lists of local branches for local repos." + start = time.time() + if THREADS == 1: + git_branch_lookup = map(get_git_branch, sorted(pkgdb_info)) + else: + threadpool = multiprocessing.pool.ThreadPool(processes=THREADS) + git_branch_lookup = threadpool.map(get_git_branch, sorted(pkgdb_info)) + + # Zip that list of results up into a lookup dict. + git_branch_lookup = dict(zip(sorted(pkgdb_info), git_branch_lookup)) + + print "Found all local git branches in %0.2fs" % (time.time() - start) + tofix = set() for pkg in sorted(pkgdb_info): pkgdb_branches = pkgdb_info[pkg] - git_branches = get_git_branch(pkg) + git_branches = git_branch_lookup[pkg] diff = (pkgdb_branches - git_branches) if diff: print '%s missing: %s' % (pkg, ','.join(sorted(diff))) + print pkgdb_branches, git_branches tofix.add(pkg) branch_package(pkg, diff) if tofix: print 'Packages fixed (%s): %s' % ( len(tofix), ', '.join(sorted(tofix))) + else: + print 'Didn\'t find any packages to fix.' if __name__ == '__main__':
Attachment:
signature.asc
Description: PGP signature
_______________________________________________ infrastructure mailing list infrastructure@xxxxxxxxxxxxxxxxxxxxxxx https://admin.fedoraproject.org/mailman/listinfo/infrastructure