Re: yum performance

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Here is another patch (to the original yum-3_2_X branch) that moves even more logic to SQL. In particular it computes a list of all excluded packages initially, and then uses that list of pkgId for the queries needed in returnObsoletes() and returnPackages(). This could probably be used in other queries too but I haven't dug that deep.

What I'm thinking to do next is store the excluded PkgIds in a sqlite temporary table and use JOINs for the queries that need excludes. That way even more logic will be moved to SQL and we'll avoid even more the overhead of converting from SQL to lists/dicts/tuples and back to SQL...

Profiling after my changes seems that now the major slowdown is simplePkgList()@rpmsack.py so it's worth concentrating the effort there.


Dimitris
diff --git a/yum/packageSack.py b/yum/packageSack.py
index b71356a..1f574b8 100644
--- a/yum/packageSack.py
+++ b/yum/packageSack.py
@@ -921,9 +921,8 @@ class PackageSack(PackageSackBase):
         """returns a list of pkg tuples (n, a, e, v, r) optionally from a single repoid"""
         
         # Don't cache due to excludes
-        return [pkg.pkgtup for pkg in self.returnPackages(patterns=patterns,
-                                                          ignore_case=False)]
-                       
+        return self.returnPackageTuples(patterns=patterns, ignore_case=False)
+
     def printPackages(self):
         for pkg in self.returnPackages():
             print pkg
diff --git a/yum/sqlitesack.py b/yum/sqlitesack.py
index 643f1f6..7b85914 100644
--- a/yum/sqlitesack.py
+++ b/yum/sqlitesack.py
@@ -411,6 +411,7 @@ class YumSqlitePackageSack(yumRepo.YumPackageSack):
         self._excludes = set() # of (repo, pkgKey)
         self._exclude_whitelist = set() # of (repo, pkgKey)
         self._all_excludes = {}
+        self._excludedIds = None
         self._search_cache = {
             'provides' : { },
             'requires' : { },
@@ -424,6 +425,83 @@ class YumSqlitePackageSack(yumRepo.YumPackageSack):
         self._pkgExcludeIds = {}
         self._pkgobjlist_dirty = False
 
+    def _excludedIdsQuery(self):
+        """Return a list of pkgId that are excluded"""
+        
+        # TODO: sqlite GLOB is case sensitive so even though it's handy because of 
+        #	its wildcars, perhaps we should use LIKE and transform wildcards
+        def buildQuery():
+            """Build a query in the following form:
+
+SELECT pkgId FROM packages
+WHERE
+	NOT
+	(pkgName GLOB self._pkgExcluder[i][2].lower() 
+		(only if self._pkgExcluder[i][1]=="include.match")
+	)
+	AND
+	(
+		(repo = self._excludes[i][0] AND
+		pkgKey = self._excludes[i][1])
+		OR
+		repo IN (self._all_excludes[i])
+		OR
+		arch NOT IN (self._arch_allowed[i])
+		OR
+		(pkgName GLOB self._pkgExcluder[i][2].lower() 
+			(only if self._pkgExcluder[i][1]=="exclude.match")
+		)
+	)
+"""
+
+            import itertools
+
+            incl_vars= [ i[2].lower() for i in self._pkgExcluder if i[1]=="include.match" ]
+            incl_q1= " OR ".join( [" (name GLOB '?') "] * len(incl_vars) )
+            
+            excl_L=[]
+            # itertools.chain seems the most elegant way to flatten a nested list
+            excl_vars1= list(itertools.chain(*self._excludes))
+            excl_q1= " OR ".join( [" (repo = ? AND pkgKey = ?) "] * (len(excl_vars1)/2) )
+            if len(excl_vars1)>0:
+                excl_L+= [excl_q1]
+            excl_vars2= list(self._all_excludes)
+            excl_q2= "repo IN (" + ",".join( ["?"] * len(excl_vars2)  ) + ")"
+            if len(excl_vars2)>0:
+                excl_L+= [excl_q2]
+            excl_vars3= list(self._arch_allowed)
+            excl_q3= "arch NOT IN (" + ",".join( ["?"] * len(self._arch_allowed) ) + ")"
+            if len(excl_vars3)>0:
+                excl_L+= [excl_q3]
+            excl_vars4= [ i[2].lower() for i in self._pkgExcluder if i[1]=="exclude.match" ]
+            excl_q4= " OR ".join( [" (name GLOB ?) "] * len(excl_vars4) )
+            if len(excl_vars4)>0:
+                excl_L+= [excl_q4]
+            excl_q= " OR ".join(excl_L)
+            excl_vars= excl_vars1 + excl_vars2 + excl_vars3 + excl_vars4
+
+            q= "SELECT pkgId FROM packages WHERE "
+            if len(incl_vars)>0 or len(excl_vars)>0:
+                if len(incl_vars)>0:
+                    q+= " NOT (" + incl_q1 + ")"
+                    if len(excl_vars)>0:
+                        q+= " AND "
+                if len(excl_vars)>0:
+                    q+= "(" + excl_q + ")"
+            else:
+                q+= 0
+
+            return q, incl_vars+excl_vars
+
+        returnList=[]
+        (q,v)= buildQuery()
+        for (repo,cache) in self.primarydb.items():
+            print repo, q
+            cur = cache.execute(q, v)
+            returnList.extend([l[0] for l in cur.fetchall()])
+        return returnList
+
+
     @catchSqliteException
     def _sql_MD(self, MD, repo, sql, *args):
         """ Exec SQL against an MD of the repo, return a cursor. """
@@ -994,8 +1072,45 @@ class YumSqlitePackageSack(yumRepo.YumPackageSack):
                 result.append((pkg, ob['total']))
         return result
         
-    @catchSqliteException
+#    @catchSqliteException
     def returnObsoletes(self, newest=False):
+        """Returns a dict { (n,a,e,v,r): [(n,f,(e,v,r))] } of new:obsoleted 
+           packages"""
+        
+        def buildQuery():
+            """Build a query in the following form:
+
+SELECT 
+	packages.name, 
+	packages.arch, 
+	packages.epoch, 
+	packages.version, 
+	packages.release,
+	obsoletes.name,
+	obsoletes.flags,
+	obsoletes.epoch,
+	obsoletes.version,
+	obsoletes.release
+FROM packages, obsoletes
+WHERE
+	(packages.pkgId NOT IN self._excludedIds[:])
+	AND
+	(obsoletes.pkgKey = packages.pkgKey)
+"""
+
+            excl_vars= self._excludedIds
+            excl_q= " packages.pkgId NOT IN " + "(" + ",".join( ["?"] * len(excl_vars) ) + ")"
+
+            q="SELECT packages.name, packages.arch, packages.epoch, "\
+                "packages.version, packages.release, obsoletes.name, "\
+                "obsoletes.flags, obsoletes.epoch, obsoletes.version, "\
+                "obsoletes.release FROM packages, obsoletes WHERE "
+            if len(excl_vars)>0:
+                q+=  excl_q + " AND "
+            q+= "(obsoletes.pkgKey = packages.pkgKey)"
+
+            return q, excl_vars
+
         if self._skip_all():
             return {}
 
@@ -1003,32 +1118,14 @@ class YumSqlitePackageSack(yumRepo.YumPackageSack):
             raise NotImplementedError()
 
         obsoletes = {}
-        for (rep,cache) in self.primarydb.items():
-            cur = cache.cursor()
-            executeSQL(cur, "select packages.name as name,\
-                packages.pkgKey as pkgKey,\
-                packages.arch as arch, packages.epoch as epoch,\
-                packages.release as release, packages.version as version,\
-                obsoletes.name as oname, obsoletes.epoch as oepoch,\
-                obsoletes.release as orelease, obsoletes.version as oversion,\
-                obsoletes.flags as oflags\
-                from obsoletes,packages where obsoletes.pkgKey = packages.pkgKey")
-            for ob in cur:
-                key = ( _share_data(ob['name']), _share_data(ob['arch']),
-                        _share_data(ob['epoch']), _share_data(ob['version']),
-                        _share_data(ob['release']))
-                if self._pkgExcludedRKT(rep, ob['pkgKey'], key):
-                    continue
-
-                (n,f,e,v,r) = ( _share_data(ob['oname']),
-                                _share_data(ob['oflags']),
-                                _share_data(ob['oepoch']),
-                                _share_data(ob['oversion']),
-                                _share_data(ob['orelease']))
-
-                key = _share_data(key)
-                val = _share_data((n,f,(e,v,r)))
-                obsoletes.setdefault(key,[]).append(val)
+        (q,v)= buildQuery()
+        for (repo,cache) in self.primarydb.items():
+            print repo, q
+            cur= cache.execute(q,v)
+            results= cur.fetchall()
+            for l in results:
+                l= list(l)
+                obsoletes.setdefault(tuple(l[:5]),[]).append(tuple( [ l[5],l[6],tuple(l[7:10]) ] ))
 
         return obsoletes
 
@@ -1512,7 +1609,41 @@ class YumSqlitePackageSack(yumRepo.YumPackageSack):
             self._pkgnames_loaded.update([po.name for po in returnList])
 
         return returnList
-                
+
+    def returnPackageTuples(self, ignore_case=False, patterns=None):
+        """Returns a list of n,a,e,v,r tuples with all packages minus excludes
+        """
+        
+        # Where should I initialise the _excludedIds list??? Obviously here is not best...
+        if self._excludedIds is None:
+            self._excludedIds= self._excludedIdsQuery()
+
+        def buildQuery():
+            """Build a query in the following form:
+
+SELECT name, arch, epoch, version, release FROM packages
+WHERE pkgId NOT IN self._excludedIds[:]
+	    """
+
+            excl_vars= self._excludedIds
+            excl_q= " pkgId NOT IN " + "(" + ",".join( ["?"] * len(excl_vars) ) + ")"
+
+            q="SELECT name, arch, epoch, version, release FROM packages"
+            if len(excl_vars)>0:
+                q+= " WHERE " + excl_q
+
+            return q, excl_vars
+
+        returnList=[]
+        (q,v)= buildQuery()
+        for (repo,cache) in self.primarydb.items():
+            print repo, q
+            cur = cache.execute(q, v)
+            returnList.extend(cur.fetchall())
+        return [tuple(i) for i in returnList]
+            
+        
+
     def returnPackages(self, repoid=None, patterns=None, ignore_case=False):
         """Returns a list of packages, only containing nevra information. The
            packages are processed for excludes. Note that patterns is just
_______________________________________________
Yum mailing list
Yum@xxxxxxxxxxxxxxxxx
http://lists.baseurl.org/mailman/listinfo/yum

[Index of Archives]     [Fedora Users]     [Fedora Legacy List]     [Fedora Maintainers]     [Fedora Desktop]     [Fedora SELinux]     [Big List of Linux Books]     [Yosemite News]     [KDE Users]

  Powered by Linux