On Mon, 17 Aug 2009, James Antill wrote:
James Antill <james-yum@xxxxxxx> writes:
If you just
stop the package creation, does that help? -- ie. have simplePkgList()
return the pkgtups without creating package objects first?
I've just posted a patch to the yum-devel-list for review, which does
this. It makes a significant difference to update and check-update,
if you have time testing that on your machines and posting results to
yum-devel-list would be great.
I'll post here because I just subscribed to that list and I can't reply to
the thread.
I just tried your committed changes and indeed it makes a big difference.
Thanks for incorporating these ideas. The average times I see for
check-update are about the following:
without-exclude with-exclude
yum-original 26s 24s
yum-james 15s 20s
yum-jimis 12s 11s
For excludes I used "-x '*p*'" which is probably an extreme case. I also
included my version just out of curiosity. I attach the diff for the
version I used, which includes small doc fixes and overrides properly the
simplePkgList() method as you suggested.
Dimitris
--
James Antill -- james@xxxxxxx
_______________________________________________
Yum mailing list
Yum@xxxxxxxxxxxxxxxxx
http://lists.baseurl.org/mailman/listinfo/yum
diff --git a/yum/sqlitesack.py b/yum/sqlitesack.py
index 3f9dc37..f88ab0d 100644
--- a/yum/sqlitesack.py
+++ b/yum/sqlitesack.py
@@ -411,6 +411,7 @@ class YumSqlitePackageSack(yumRepo.YumPackageSack):
self._excludes = set() # of (repo, pkgKey)
self._exclude_whitelist = set() # of (repo, pkgKey)
self._all_excludes = {}
+ self._excludedIds = None
self._search_cache = {
'provides' : { },
'requires' : { },
@@ -424,6 +425,85 @@ class YumSqlitePackageSack(yumRepo.YumPackageSack):
self._pkgExcludeIds = {}
self._pkgobjlist_dirty = False
+ def _excludedIdsQuery(self):
+ """Creates a table "excludedIds in each repo database that contains
+ all pkgIds of excluded packages. Currently it returns an empty list FIXME"""
+
+ # TODO: sqlite GLOB is case sensitive so even though it's handy because of
+ # its wildcars, perhaps we should use LIKE and transform wildcards
+ def buildQuery():
+ """Return a query that inserts into a new table the excluded
+ packages as given from the following query:
+
+SELECT pkgId FROM packages
+WHERE
+ NOT
+ (pkgName GLOB self._pkgExcluder[i][2].lower()
+ (only if self._pkgExcluder[i][1]=="include.match")
+ )
+ AND
+ (
+ (repo = self._excludes[i][0] AND
+ pkgKey = self._excludes[i][1])
+ OR
+ repo IN (self._all_excludes[i])
+ OR
+ arch NOT IN (self._arch_allowed[i])
+ OR
+ (pkgName GLOB self._pkgExcluder[i][2].lower()
+ (only if self._pkgExcluder[i][1]=="exclude.match")
+ )
+ )
+"""
+
+ import itertools
+
+ incl_vars= [ i[2].lower() for i in self._pkgExcluder if i[1]=="include.match" ]
+ incl_q1= " OR ".join( [" (name GLOB '?') "] * len(incl_vars) )
+
+ excl_L=[]
+ # itertools.chain seems the most elegant way to flatten a nested list
+ excl_vars1= list(itertools.chain(*self._excludes))
+ excl_q1= " OR ".join( [" (repo = ? AND pkgKey = ?) "] * (len(excl_vars1)/2) )
+ if len(excl_vars1)>0:
+ excl_L+= [excl_q1]
+ excl_vars2= list(self._all_excludes)
+ excl_q2= "repo IN (" + ",".join( ["?"] * len(excl_vars2) ) + ")"
+ if len(excl_vars2)>0:
+ excl_L+= [excl_q2]
+ excl_vars3= list(self._arch_allowed)
+ excl_q3= "arch NOT IN (" + ",".join( ["?"] * len(self._arch_allowed) ) + ")"
+ if len(excl_vars3)>0:
+ excl_L+= [excl_q3]
+ excl_vars4= [ i[2].lower() for i in self._pkgExcluder if i[1]=="exclude.match" ]
+ excl_q4= " OR ".join( [" (name GLOB ?) "] * len(excl_vars4) )
+ if len(excl_vars4)>0:
+ excl_L+= [excl_q4]
+ excl_q= " OR ".join(excl_L)
+ excl_vars= excl_vars1 + excl_vars2 + excl_vars3 + excl_vars4
+
+ q= "INSERT INTO excludedIds SELECT pkgId FROM packages WHERE "
+ if len(incl_vars)>0 or len(excl_vars)>0:
+ if len(incl_vars)>0:
+ q+= " NOT (" + incl_q1 + ")"
+ if len(excl_vars)>0:
+ q+= " AND "
+ if len(excl_vars)>0:
+ q+= "(" + excl_q + ")"
+ else:
+ q+= "0"
+
+ return q, incl_vars+excl_vars
+
+ returnList=[]
+ (q,v)= buildQuery()
+ for (repo,cache) in self.primarydb.items():
+ print repo, q
+ cur = cache.execute("CREATE TEMP TABLE excludedIds (pkgId TEXT)")
+ cur = cache.execute(q, v)
+ return returnList
+
+
@catchSqliteException
def _sql_MD(self, MD, repo, sql, *args):
""" Exec SQL against an MD of the repo, return a cursor. """
@@ -725,19 +805,6 @@ class YumSqlitePackageSack(yumRepo.YumPackageSack):
pkgkeys.append(pkgKey)
return self._key2pkg[repo][data['pkgKey']]
- def _pkgtupByKeyData(self, repo, pkgKey, data):
- """ Like _packageByKeyData() but we don't create the package, we just
- return the pkgtup. """
- if self._pkgExcludedRKD(repo, pkgKey, data):
- return None
- if repo not in self._key2pkg:
- self._key2pkg[repo] = {}
- self._pkgname2pkgkeys[repo] = {}
- if data['pkgKey'] in self._key2pkg.get(repo, {}):
- return self._key2pkg[repo][data['pkgKey']].pkgtup
- return (data['name'], data['arch'],
- data['epoch'], data['version'], data['release'])
-
def _packagesByName(self, pkgname):
""" Load all pkgnames from cache, with a given name. """
ret = []
@@ -1007,8 +1074,44 @@ class YumSqlitePackageSack(yumRepo.YumPackageSack):
result.append((pkg, ob['total']))
return result
- @catchSqliteException
+# @catchSqliteException
def returnObsoletes(self, newest=False):
+ """Returns a dict { (n,a,e,v,r): [(n,f,(e,v,r))] } of new:obsoleted
+ packages, minus excludes"""
+
+ def buildQuery():
+ """Build a query in the following form:
+
+SELECT
+ packages.name,
+ packages.arch,
+ packages.epoch,
+ packages.version,
+ packages.release,
+ obsoletes.name,
+ obsoletes.flags,
+ obsoletes.epoch,
+ obsoletes.version,
+ obsoletes.release
+FROM packages, obsoletes
+WHERE
+ (packages.pkgId NOT IN (SELECT pkgId FROM excludedIds))
+ AND
+ (obsoletes.pkgKey = packages.pkgKey)
+"""
+
+ excl_vars= self._excludedIds
+ excl_q= " packages.pkgId NOT IN " + "(" + ",".join( ["?"] * len(excl_vars) ) + ")"
+
+ q="SELECT packages.name, packages.arch, packages.epoch, "\
+ "packages.version, packages.release, obsoletes.name, "\
+ "obsoletes.flags, obsoletes.epoch, obsoletes.version, "\
+ "obsoletes.release FROM packages, obsoletes WHERE "\
+ "(packages.pkgId NOT IN (SELECT pkgId FROM excludedIds)) "\
+ " AND (obsoletes.pkgKey = packages.pkgKey)"
+
+ return q
+
if self._skip_all():
return {}
@@ -1016,32 +1119,14 @@ class YumSqlitePackageSack(yumRepo.YumPackageSack):
raise NotImplementedError()
obsoletes = {}
- for (rep,cache) in self.primarydb.items():
- cur = cache.cursor()
- executeSQL(cur, "select packages.name as name,\
- packages.pkgKey as pkgKey,\
- packages.arch as arch, packages.epoch as epoch,\
- packages.release as release, packages.version as version,\
- obsoletes.name as oname, obsoletes.epoch as oepoch,\
- obsoletes.release as orelease, obsoletes.version as oversion,\
- obsoletes.flags as oflags\
- from obsoletes,packages where obsoletes.pkgKey = packages.pkgKey")
- for ob in cur:
- key = ( _share_data(ob['name']), _share_data(ob['arch']),
- _share_data(ob['epoch']), _share_data(ob['version']),
- _share_data(ob['release']))
- if self._pkgExcludedRKT(rep, ob['pkgKey'], key):
- continue
-
- (n,f,e,v,r) = ( _share_data(ob['oname']),
- _share_data(ob['oflags']),
- _share_data(ob['oepoch']),
- _share_data(ob['oversion']),
- _share_data(ob['orelease']))
-
- key = _share_data(key)
- val = _share_data((n,f,(e,v,r)))
- obsoletes.setdefault(key,[]).append(val)
+ q= buildQuery()
+ for (repo,cache) in self.primarydb.items():
+ print repo, q
+ cur= cache.execute(q)
+ results= cur.fetchall()
+ for l in results:
+ l= list(l)
+ obsoletes.setdefault(tuple(l[:5]),[]).append(tuple( [ l[5],l[6],tuple(l[7:10]) ] ))
return obsoletes
@@ -1447,13 +1532,16 @@ class YumSqlitePackageSack(yumRepo.YumPackageSack):
unmatched = misc.unique(unmatched)
return exactmatch, matched, unmatched
- def _setupPkgObjList(self, repoid=None, patterns=None, ignore_case=False):
- """Setup need_full and patterns for _yieldSQLDataList, also see if
- we can get away with just using searchNames(). """
+ @catchSqliteException
+ def _buildPkgObjList(self, repoid=None, patterns=None, ignore_case=False):
+ """Builds a list of packages, only containing nevra information. No
+ excludes are done at this stage. """
if patterns is None:
patterns = []
+ returnList = []
+
fields = ['name', 'sql_nameArch', 'sql_nameVerRelArch',
'sql_nameVer', 'sql_nameVerRel',
'sql_envra', 'sql_nevra']
@@ -1481,14 +1569,8 @@ class YumSqlitePackageSack(yumRepo.YumPackageSack):
else:
tmp.append((pat, '='))
if not need_full and not need_glob and patterns:
- return (need_full, patterns, fields, True)
+ return self.searchNames(patterns)
patterns = tmp
- return (need_full, patterns, fields, False)
-
- @catchSqliteException
- def _yieldSQLDataList(self, repoid, patterns, fields, ignore_case):
- """Yields all the package data for the given params. Excludes are done
- at this stage. """
for (repo,cache) in self.primarydb.items():
if (repoid == None or repoid == repo.id):
@@ -1509,26 +1591,17 @@ class YumSqlitePackageSack(yumRepo.YumPackageSack):
if pat_sqls:
qsql = _FULL_PARSE_QUERY_BEG + " OR ".join(pat_sqls)
executeSQL(cur, qsql, pat_data)
+ # Note: If we are building the pkgobjlist, we don't exclude
+ # here, so that we can un-exclude later on ... if that matters.
for x in cur:
- yield (repo, x)
-
- def _buildPkgObjList(self, repoid=None, patterns=None, ignore_case=False):
- """Builds a list of packages, only containing nevra information.
- Excludes are done at this stage. """
-
- returnList = []
-
- data = self._setupPkgObjList(repoid, patterns, ignore_case)
- (need_full, patterns, fields, names) = data
- if names:
- return self.searchNames(patterns)
-
- for (repo, x) in self._yieldSQLDataList(repoid, patterns, fields,
- ignore_case):
- po = self._packageByKeyData(repo, x['pkgKey'], x)
- if po is None:
- continue
- returnList.append(po)
+ exclude = not patterns
+ if True: # NOTE: Can't unexclude things...
+ exclude = True
+ po = self._packageByKeyData(repo, x['pkgKey'], x,
+ exclude=exclude)
+ if po is None:
+ continue
+ returnList.append(po)
if not patterns and repoid is None:
self.pkgobjlist = returnList
self._pkgnames_loaded = set() # Save memory
@@ -1537,7 +1610,26 @@ class YumSqlitePackageSack(yumRepo.YumPackageSack):
self._pkgnames_loaded.update([po.name for po in returnList])
return returnList
-
+
+ def simplePkgList(self, patterns=None, ignore_case=False):
+ """Returns a list of n,a,e,v,r tuples with all packages minus excludes
+ """
+
+ # Where should I initialise the _excludedIds list??? Obviously here is not best...
+ if self._excludedIds is None:
+ self._excludedIds= self._excludedIdsQuery()
+
+ returnList=[]
+ q= "SELECT name, arch, epoch, version, release FROM packages "\
+ "WHERE pkgId NOT IN (SELECT pkgId FROM excludedIds)"
+ for (repo,cache) in self.primarydb.items():
+ print repo, q
+ cur = cache.execute(q)
+ returnList.extend(cur.fetchall())
+ return [tuple(i) for i in returnList]
+
+
+
def returnPackages(self, repoid=None, patterns=None, ignore_case=False):
"""Returns a list of packages, only containing nevra information. The
packages are processed for excludes. Note that patterns is just
@@ -1573,32 +1665,6 @@ class YumSqlitePackageSack(yumRepo.YumPackageSack):
return returnList
- def simplePkgList(self, patterns=None, ignore_case=False):
- """Returns a list of pkg tuples (n, a, e, v, r), optionally from a
- single repoid. """
-
- if self._skip_all():
- return []
-
- internal_pkgoblist = hasattr(self, 'pkgobjlist')
- if internal_pkgoblist:
- return yumRepo.YumPackageSack.simplePkgList(self, patterns,
- ignore_case)
-
- repoid = None
- returnList = []
- # Haven't loaded everything, so _just_ get the pkgtups...
- data = self._setupPkgObjList(repoid, patterns, ignore_case)
- (need_full, patterns, fields, names) = data
- for (repo, x) in self._yieldSQLDataList(repoid, patterns, fields,
- ignore_case):
- # NOTE: Can't unexclude things...
- pkgtup = self._pkgtupByKeyData(repo, x['pkgKey'], x)
- if pkgtup is None:
- continue
- returnList.append(pkgtup)
- return returnList
-
@catchSqliteException
def searchNevra(self, name=None, epoch=None, ver=None, rel=None, arch=None):
"""return list of pkgobjects matching the nevra requested"""
_______________________________________________
Yum mailing list
Yum@xxxxxxxxxxxxxxxxx
http://lists.baseurl.org/mailman/listinfo/yum