On Sun, Aug 30, 2009 at 04:57:52PM +0200, Henning Garus wrote: > On Sun, Aug 30, 2009 at 01:18:32PM +0200, Xavier wrote: > > On Sun, Aug 30, 2009 at 12:56 PM, Henning > > Garus<henning.garus@xxxxxxxxxxxxxx> wrote: > > > On Sun, Aug 30, 2009 at 01:56:23AM +0200, Xavier wrote: > > >> > > >> Great, thanks! It indeed found all the problems I had noticed, and much more. > > >> > > >> It would be nice if this script could be automatically run as well, > > >> once per week or so. > > >> > > >> Can you share the script used? Then we need to figure out if it can be > > >> run in the same place than the other script. > > > > > > Since my script is largely based on check_packages.py that should be fairly > > > straightforward. In fact my script expects parse_pkgbuilds.sh in the same > > > directory. > > > > > > I have uploaded the script to codepad: http://codepad.org/tSmNwYNI > > > > > > > I see. Then I am not sure whether we want to keep this check separate > > or just include it in check_packages.py > > I kept it separate, because it deals with DBs and the ABS tree, while > check_packages.py deals with the ABS tree only. On the other hand, integrating > it should speed things up a bit (you run parse_pkgbuilds.sh only once) and we > get rid of some duplicated code. On the downside the output can be quite long > with activated --vercmp, But I am not sure if that is even useful. > > Somehow integrating feels like the better idea, I will look into it. Here it is. seems a bit shorter this way. I also changed the handling of the any arch. Checking any alone does not seem very useful, so I allowed multiple abs roots to be specified.
>From 9f1d948bf3bd61f45e737c2b67cb4ae28cae9184 Mon Sep 17 00:00:00 2001 From: Henning Garus <henning.garus@xxxxxxxxx> Date: Tue, 1 Sep 2009 23:54:47 +0200 Subject: [PATCH 1/2] check_packages.py: Allow multiple abs-trees By parsing multiple abs trees we can add any when parsing the other trees, checking any standalone doesn't make much sense. Signed-off-by: Henning Garus <henning.garus@xxxxxxxxx> --- cron-jobs/check_archlinux/check_packages.py | 32 ++++++++++++++------------ cron-jobs/integrity-check | 2 +- 2 files changed, 18 insertions(+), 16 deletions(-) diff --git a/cron-jobs/check_archlinux/check_packages.py b/cron-jobs/check_archlinux/check_packages.py index f2a9601..e4798a0 100755 --- a/cron-jobs/check_archlinux/check_packages.py +++ b/cron-jobs/check_archlinux/check_packages.py @@ -51,10 +51,11 @@ class Depend: self.mod = mod def parse_pkgbuilds(repos,arch): - for repo in repos: - data = commands.getoutput(os.path.dirname(sys.argv[0]) + '/parse_pkgbuilds.sh ' - + arch + ' ' + absroot + '/' + repo) - parse_data(repo,data) + for absroot in absroots: + for repo in repos: + data = commands.getoutput(os.path.dirname(sys.argv[0]) + '/parse_pkgbuilds.sh ' + + arch + ' ' + absroot + '/' + repo) + parse_data(repo,data) def parse_data(repo,data): attrname = None @@ -332,9 +333,9 @@ def print_usage(): print "Usage: ./check_packages.py [OPTION]" print "" print "Options:" - print " --abs-tree=<path> Check the specified tree (default : /var/abs)" + print " --abs-tree=<path[,path]> Check the specified tree(s) (default : /var/abs)" print " --repos=<r1,r2,...> Check the specified repos (default : core,extra)" - print " --arch=<any|i686|x86_64> Check the specified arch (default : i686)" + print " --arch=<i686|x86_64> Check the specified arch (default : i686)" print " -h, --help Show this help and exit" print "" print "Examples:" @@ -345,7 +346,7 @@ def print_usage(): print "" ## Default path to the abs root directory -absroot = "/var/abs" +absroots = ["/var/abs"] ## Default list of repos to check repos = ['core', 'extra'] ## Default arch @@ -359,7 +360,7 @@ except getopt.GetoptError: if opts != []: for o, a in opts: if o in ("--abs-tree"): - absroot = a + absroot = a.split(',') elif o in ("--repos"): repos = a.split(",") elif o in ("--arch"): @@ -371,14 +372,15 @@ if opts != []: print_usage() sys.exit() -if not os.path.isdir(absroot): - print "Error : the abs tree " + absroot + " does not exist" - sys.exit() -for repo in repos: - repopath = absroot + "/" + repo - if not os.path.isdir(repopath): - print "Error : the repository " + repo + " does not exist in " + absroot +for absroot in absroots: + if not os.path.isdir(absroot): + print "Error : the abs tree " + absroot + " does not exist" sys.exit() + for repo in repos: + repopath = absroot + "/" + repo + if not os.path.isdir(repopath): + print "Error : the repository " + repo + " does not exist in " + absroot + sys.exit() # repos which need to be loaded loadrepos = set([]) for repo in repos: diff --git a/cron-jobs/integrity-check b/cron-jobs/integrity-check index 0b59064..b3185ec 100755 --- a/cron-jobs/integrity-check +++ b/cron-jobs/integrity-check @@ -9,5 +9,5 @@ fi $basedir/check_archlinux/check_packages.py \ --repos="$1" \ - --abs-tree="/srv/abs/rsync/$2" --arch="$2" |\ + --abs-tree="/srv/abs/rsync/$2,/srv/abs/rsync/any" --arch="$2" |\ $basedir/devlist-mailer "Integrity Check $2: $1" "$3" -- 1.6.4.1
>From ab0d57072f2e02d0664f31ffdbeff58d42091667 Mon Sep 17 00:00:00 2001 From: Henning Garus <henning.garus@xxxxxxxxx> Date: Tue, 1 Sep 2009 23:57:39 +0200 Subject: [PATCH 2/2] Add db comparison to intgrity check Compare the abs tree with the repo dbs to check if we have a PKGBUILD for each package in the dbs and vice versa. Signed-off-by: Henning Garus <henning.garus@xxxxxxxxx> --- cron-jobs/check_archlinux/check_packages.py | 55 ++++++++++++++++++++++++++- 1 files changed, 53 insertions(+), 2 deletions(-) diff --git a/cron-jobs/check_archlinux/check_packages.py b/cron-jobs/check_archlinux/check_packages.py index e4798a0..faa8c2f 100755 --- a/cron-jobs/check_archlinux/check_packages.py +++ b/cron-jobs/check_archlinux/check_packages.py @@ -16,9 +16,11 @@ # a non-core package) # 8. Circular dependencies -import os,re,commands,getopt,sys,alpm +import os,re,commands,getopt,sys,tarfile,alpm import pdb +DBEXT='.db.tar.gz' + packages = {} # pkgname : PacmanPackage provisions = {} # provision : PacmanPackage pkgdeps,makepkgdeps = {},{} # pkgname : list of the PacmanPackage dependencies @@ -26,6 +28,9 @@ invalid_pkgbuilds = [] missing_pkgbuilds = [] dups = [] +dbonly = [] +absonly = [] + mismatches = [] missing_deps = [] missing_makedeps = [] @@ -102,6 +107,17 @@ def parse_data(repo,data): provisions[provname] = [] provisions[provname].append(pkg) +def parse_dbs(repos,arch): + dbpkgs = {} + for repo in repos: + pkgs = set([]) + db = tarfile.open(os.path.join(repodir,repo,'os',arch,repo + DBEXT)) + for line in db.getnames(): + if not '/' in line: + pkgs.add(line.rsplit('-',2)[0]) + dbpkgs[repo] = pkgs + return(dbpkgs) + def splitdep(dep): name = dep version = "" @@ -317,6 +333,8 @@ def print_results(): print_result(dep_hierarchy, "Repo Hierarchy for Dependencies") print_result(makedep_hierarchy, "Repo Hierarchy for Makedepends") print_result(circular_deps, "Circular Dependencies") + print_result(dbonly, "Packages found in db, but not in tree") + print_result(absonly,"Packages found in tree, but not in db") print_subheading("Summary") print "Missing PKGBUILDs: ", len(missing_pkgbuilds) print "Invalid PKGBUILDs: ", len(invalid_pkgbuilds) @@ -326,6 +344,8 @@ def print_results(): print "Missing (make)dependencies: ", len(missing_deps)+len(missing_makedeps) print "Repo hierarchy problems: ", len(dep_hierarchy)+len(makedep_hierarchy) print "Circular dependencies: ", len(circular_deps) + print "In db, but not in tree: ", len(dbonly) + print "In tree, but not in db ", len(absonly) print "" def print_usage(): @@ -336,6 +356,7 @@ def print_usage(): print " --abs-tree=<path[,path]> Check the specified tree(s) (default : /var/abs)" print " --repos=<r1,r2,...> Check the specified repos (default : core,extra)" print " --arch=<i686|x86_64> Check the specified arch (default : i686)" + print " --repo-dir=<path> Check the dbs at the specified path (default : /srv/ftp)" print " -h, --help Show this help and exit" print "" print "Examples:" @@ -351,9 +372,12 @@ absroots = ["/var/abs"] repos = ['core', 'extra'] ## Default arch arch = "i686" +## Default repodir +repodir = "/srv/ftp" try: - opts, args = getopt.getopt(sys.argv[1:], "", ["abs-tree=", "repos=", "arch="]) + opts, args = getopt.getopt(sys.argv[1:], "", ["abs-tree=", "repos=", + "arch=", "repo-dir="]) except getopt.GetoptError: print_usage() sys.exit() @@ -365,6 +389,8 @@ if opts != []: repos = a.split(",") elif o in ("--arch"): arch = a + elif o in ("--repo-dir"): + repodir = a else: print_usage() sys.exit() @@ -381,6 +407,17 @@ for absroot in absroots: if not os.path.isdir(repopath): print "Error : the repository " + repo + " does not exist in " + absroot sys.exit() +if not os.path.isdir(repodir): + print "Error: the repository directory %s does not exiist" % repodir + sys.exit() +for repo in repos: + path = os.path.join(repodir,repo,'os',arch,repo + DBEXT) + if not os.path.isfile(path): + print "Error : repo DB %s : File not found" % path + sys.exit() + if not tarfile.is_tarfile(path): + print "Error : Cant open repo DB %s, not a valid tar file" % path + sys.exit() # repos which need to be loaded loadrepos = set([]) for repo in repos: @@ -397,6 +434,9 @@ for name,pkg in packages.iteritems(): if pkg.repo in repos: repopkgs[name] = pkg +print "==> parsing db files" +dbpkgs = parse_dbs(repos,arch) + print "==> checking mismatches" for name,pkg in repopkgs.iteritems(): pkgdirname = pkg.path.split("/")[-1] @@ -441,4 +481,15 @@ for name,pkg in packages.iteritems(): pkgdeps[pkg] = deps find_scc(repopkgs.values()) +print "==> checking for differences between db files and pkgbuilds" +for repo in repos: + for pkg in dbpkgs[repo]: + if not (pkg in repopkgs.keys() and repopkgs[pkg].repo == repo): + dbonly.append("%s/%s" % (repo,pkg)) + dbonly.sort() +for name,pkg in repopkgs.iteritems(): + if not name in dbpkgs[pkg.repo]: + absonly.append("%s/%s" % (pkg.repo,name)) +absonly.sort + print_results() -- 1.6.4.1