On Fri, Apr 24, 2009 at 11:48:16PM -0500, Matt Domsch wrote: > On Fri, Apr 24, 2009 at 07:44:50PM -0500, Matt Domsch wrote: > > If you see me monkey with u-m-d-l on bapp1, that's what I'm trying to > > figure out... > > Found it... > > update-master-directory-list was trying to be smart and failed. If it > saw that a directory's ctime hadn't changed, it skipped it and moved > on. But, a directory's ctime won't change if one of its _subdirectories' ctime_ > changes. Because u-m-d-l runs every 30 minutes or so, it appears to > catch tree updates mid-flight. In one run it sees updates/10/x86_64/ > has changed, but that repodata/ under that has not (yet). So it > marks updates/10/x86_64 as changed and moves on. On the next pass, > updates/10/x86_64 of course _has not changed_, but it's repodata > subdir has. This is what it was missing... It would skip processing > the repodata subdir. > > (and yes, this would throw off the crawler too, which people have been > complaining about being added and removed from the list somewhat > randomly...) > > I'm working on a fix, which will involve changing > update-master-directory-list. But that should be the only change. This is the patch I want to apply on bapp1 to update-master-directory-list. It ensures that changes in repodata/ directories are handled, even if the parent directories don't appear to have changed. It still tries to be smart by not stat()ing files in a directory which hasn't changed it's ctime. Oh what I would give if inotify/dnotify worked on NFS... Can I get some +1s? --- update-master-directory-list 2009-04-07 03:53:55.000000000 +0000 +++ /home/fedora/mdomsch/update-master-directory-list 2009-04-25 04:50:18.000000000 +0000 @@ -168,8 +168,9 @@ def make_repomd_file_details(dir): - repodataDir = dir.name + '/repodata' - repomd_fname = os.path.join(rootdir, dir.name, 'repodata', 'repomd.xml') + if not dir.name.endswith('/repodata'): + return + repomd_fname = os.path.join(rootdir, dir.name, 'repomd.xml') if not os.path.exists(repomd_fname): return try: @@ -267,7 +268,7 @@ try: category_directories[parent_dname]['isRepository'] = True except KeyError: - category_directories[parent_dname] = {'files':{}, 'isRepository':True, 'readable':readable} + category_directories[parent_dname] = {'files':{}, 'isRepository':True, 'readable':readable, 'ctime':ctime} return dname, category_directories @@ -328,16 +329,17 @@ except SQLObjectNotFound: dir = Directory(name=dirpath,readable=value['readable'], ctime=value['ctime']) dir.addCategory(category) - if dir.files != short_filelist(value['files']): - dir.files = short_filelist(value['files']) + if value['changed']: + if dir.files != short_filelist(value['files']): + dir.files = short_filelist(value['files']) make_file_details_from_checksums(dir) # this has to be a second pass to be sure the child repodata/ dir is created in the db first for dirpath, value in category_directories.iteritems(): + dir = Directory.byName(dirpath) if value['isRepository']: - dir = Directory.byName(dirpath) make_repository(dir, category) - make_repomd_file_details(dir) + make_repomd_file_details(dir) ageFileDetails() def parse_rsync_listing(cname, f): @@ -417,27 +419,31 @@ dname = dname.rstrip('/') try: d = Directory.byName(dname) - if d.ctime == ctime: - # break out here because nothing has changed - continue + d_ctime = d.ctime except SQLObjectNotFound: # we'll need to create it - pass + d_ctime = 0 - print "%s has changed" % dname mode = s.st_mode readable = (mode & stat.S_IRWXO & (stat.S_IROTH|stat.S_IXOTH)) if not readable: unreadable_dirs[dname] = True isRepo = 'repodata' in dirnames - category_directories[dname] = {'files':{}, 'isRepository':isRepo, 'readable':readable, 'ctime':ctime} - for f in filenames: - try: - s = os.stat(os.path.join(dirpath, f)) - except OSError: - continue - category_directories[dname]['files'][f] = {'size':str(s.st_size), - 'stat':s[stat.ST_CTIME]} + + changed = (d_ctime != ctime) + if changed: + print "%s has changed" % dname + category_directories[dname] = {'files':{}, 'isRepository':isRepo, 'readable':readable, 'ctime':ctime, 'changed':changed} + + # skip per-file stat()s if the directory hasn't changed + if changed: + for f in filenames: + try: + s = os.stat(os.path.join(dirpath, f)) + except OSError: + continue + category_directories[dname]['files'][f] = {'size':str(s.st_size), + 'stat':s[stat.ST_CTIME]} sync_category_directories(category, category_directories) -- Matt Domsch Linux Technology Strategist, Dell Office of the CTO linux.dell.com & www.dell.com/linux _______________________________________________ Fedora-infrastructure-list mailing list Fedora-infrastructure-list@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/fedora-infrastructure-list