Hello, I've been looking into implementing some additional features into yum but wasn't sure how to do it and/or what are the reasons behind some yum defaults. So, here it goes: I'd like to teach yum to understand hdlist* files. There are two times where this would be useful. First one is if you are building list of headers with yum-arch for a full (installable) copy of a distribution. Why bother going through every rpm when all of their headers are already in hdlist files. I ask because I maintain local copy of RH distro whose one of features is to incorporate updates into original distro so that upon install there is no further need to run any updates. That process can take a while and in my scripts that do that I try to minimize how much rpms are touched by using hdlist wherever I can (e.g., when merging in updated or new rpms, don't scan the original RedHat/RPMS dir, load up hdlist). Second, much more useful time is during initial download of headers. More recent RH distro's install comps rpm which carries with itself appropriate hdlist files and they are supposed to contain headers of all the rpms. This would eliminate the need to download all headers for non installed pkgs on the first run. I include an initial attempt to implement the second goal. Rather crude (e.g. more error checking could be implemented) but seems to work. I added a parameter called core for repositories so that one can indicate to yum that a particular repository is the one (or just like it) used to install the machine. Only for those will the hdlist be used. Now the questions. First of all, why is yum-arch setting header file times equal to the ones of the rpm? Any particular reason (as I can't really do that if dumping data from hdlist)? Second, I dislike that yum-arch recreates the header list every time it is run. I believe it should try to detect what changed and update only what's necessary. Ok, I know - this is minor but it just bugs me slightly :). Then again, in cases where you are updating an already existing headers dir, it would make it much faster. Finally, it seems that yum is not removing cached headers for packages that have been installed until one runs yum clean. Any particular reason for that? Thanks, Josko P. P.S. Great program. -------------- next part -------------- --- yum-2.0.4/config.py.hdlist 2003-10-19 17:49:39.000000000 -0400 +++ yum-2.0.4/config.py 2003-10-31 12:14:31.000000000 -0500 @@ -57,6 +57,7 @@ self.servercache = {} self.servergpgcheck={} self.serverexclude={} + self.servercore={} self.failoverclass = {} self.excludes=[] @@ -178,6 +179,10 @@ self.servergpgcheck[section]=self.cfg.getboolean(section,'gpgcheck') else: self.servergpgcheck[section]=0 + if self._getoption(section,'core') != None: + self.servercore[section]=self.cfg.getboolean(section,'core') + else: + self.servercore[section]=0 if self._getoption(section, 'exclude') != None: srvexcludelist = self._getoption(section, 'exclude') srvexcludelist = self._doreplace(srvexcludelist) --- yum-2.0.4/clientStuff.py.hdlist 2003-10-19 17:49:39.000000000 -0400 +++ yum-2.0.4/clientStuff.py 2003-10-31 12:14:31.000000000 -0500 @@ -795,6 +795,14 @@ else: continue + if conf.servercore[serverid]: + log(8, 'checking for a header in hdlist') + hdhdr = HeaderInfo.getHdListHeader(name, arch) + if hdhdr: + log(5, 'writing out a header from hdlist into: %s' % LocalHeaderFile) + hdhdr.writeHeader("",1,LocalHeaderFile) + continue + if not conf.cache: log(2, _('getting %s') % (LocalHeaderFile)) try: --- yum-2.0.4/nevral.py.hdlist 2003-10-19 17:49:39.000000000 -0400 +++ yum-2.0.4/nevral.py 2003-10-31 12:50:10.000000000 -0500 @@ -30,6 +30,7 @@ self.rpmbynamearch = {} self.localrpmpath = {} self.localhdrpath = {} + self.hdlist = None def add(self,(name,epoch,ver,rel,arch,rpmloc,serverid),state): # if self.rpmbyname.haskey(name): @@ -65,6 +66,39 @@ else: return ((None,None,None,None,None,None),None) + def _get_hdlist(self): + if os.path.exists("/usr/share/comps/i386/hdlist") and os.path.exists("/usr/share/comps/i386/hdlist2"): + log(7, "loading hdlist data") + self.hdlist = {} + temphdlist = rpm.readHeaderListFromFile("/usr/share/comps/i386/hdlist") + fd = os.open("/usr/share/comps/i386/hdlist2", os.O_RDONLY) + rpm.mergeHeaderListFromFD(temphdlist, fd, 1000004) + os.close(fd) + for h in temphdlist: + h.fullFilelist() + hdhdr = rpmUtils.RPM_Hdr(h) + self.hdlist[(hdhdr.name(),hdhdr.arch())] = (hdhdr) + return 1 + else: + return 0 + + def getHdListHeader(self,name,arch=None): + if self.hdlist is None: + self._get_hdlist() + ((e,v,r,a,l,i),state) = self._get_data(name, arch) + if (name,arch) in self.hdlist: + if e == None: + e = "0" + hdhdr = self.hdlist[(name,arch)] + if hdhdr.epoch(): + ehdhdr = "%s" % hdhdr.epoch() + else: + ehdhdr = "0" + if (hdhdr.version() == v) and (hdhdr.release() == r) and (ehdhdr == e): + log(6, "found header for (%s,%s) in hdlist" %(name,arch)) + return hdhdr + return None + def getHeader(self, name, arch=None): # FIXME - this should return a header object from rpmUtils ((e,v,r,a,l,i),state) = self._get_data(name, arch) --- yum-2.0.4/rpmUtils.py.hdlist 2003-10-19 17:49:39.000000000 -0400 +++ yum-2.0.4/rpmUtils.py 2003-10-31 12:14:31.000000000 -0500 @@ -199,16 +199,17 @@ a = self._getTag('arch') return (n, e, v, r, a) - def writeHeader(self, headerdir, compress): + def writeHeader(self, headerdir, compress, headerfn = None): # write the header out to a file with the format: name-epoch-ver-rel.arch.hdr # return the name of the file it just made - no real reason :) (name, epoch, ver, rel, arch) = self.nevra() if epoch is None: epoch = '0' - if self.isSource(): - headerfn = "%s/%s-%s-%s-%s.src.hdr" % (headerdir, name, epoch, ver, rel) - else: - headerfn = "%s/%s-%s-%s-%s.%s.hdr" % (headerdir, name, epoch, ver, rel, arch) + if headerfn is None: + if self.isSource(): + headerfn = "%s/%s-%s-%s-%s.src.hdr" % (headerdir, name, epoch, ver, rel) + else: + headerfn = "%s/%s-%s-%s-%s.%s.hdr" % (headerdir, name, epoch, ver, rel, arch) if compress: headerout = _gzipOpen(headerfn, "w") @@ -260,7 +261,11 @@ errorlog(0, _('Error opening rpm %s - error %s') % (rpmfn, e)) self.hdr = None os.close(fd) - + +class RPM_Hdr(RPM_Base_Work): + def __init__(self, sethdr): + self.hdr = sethdr + class Rpm_Ts_Work: """This should operate on groups of headers/matches/etc in the rpmdb - ideally it will operate with a list of the Base objects above, so I can refer to any one object there