Re: yum performance

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Thu, 13 Aug 2009, Dimitrios Apostolou wrote:

What I'm thinking to do next is store the excluded PkgIds in a sqlite temporary table and use JOINs for the queries that need excludes. That way even more logic will be moved to SQL and we'll avoid even more the overhead of converting from SQL to lists/dicts/tuples and back to SQL...

Please ignore the previous patches and see the one attached (it applies again to the original yum-3_2_X branch), it includes these changes. That is, for every repo a TEMP table "excludedIds" is created which has all the excluded packages. Then it is used in subsequent queries that need to take exclusions into account.

I haven't tested this one much because I currently don't have any updates pending. Could you try various glob's and tell me if exlcusions happen correctly?


Thanks,
Dimitris
diff --git a/yum/packageSack.py b/yum/packageSack.py
index b71356a..1f574b8 100644
--- a/yum/packageSack.py
+++ b/yum/packageSack.py
@@ -921,9 +921,8 @@ class PackageSack(PackageSackBase):
         """returns a list of pkg tuples (n, a, e, v, r) optionally from a single repoid"""
         
         # Don't cache due to excludes
-        return [pkg.pkgtup for pkg in self.returnPackages(patterns=patterns,
-                                                          ignore_case=False)]
-                       
+        return self.returnPackageTuples(patterns=patterns, ignore_case=False)
+
     def printPackages(self):
         for pkg in self.returnPackages():
             print pkg
diff --git a/yum/sqlitesack.py b/yum/sqlitesack.py
index 643f1f6..b06187d 100644
--- a/yum/sqlitesack.py
+++ b/yum/sqlitesack.py
@@ -411,6 +411,7 @@ class YumSqlitePackageSack(yumRepo.YumPackageSack):
         self._excludes = set() # of (repo, pkgKey)
         self._exclude_whitelist = set() # of (repo, pkgKey)
         self._all_excludes = {}
+        self._excludedIds = None
         self._search_cache = {
             'provides' : { },
             'requires' : { },
@@ -424,6 +425,83 @@ class YumSqlitePackageSack(yumRepo.YumPackageSack):
         self._pkgExcludeIds = {}
         self._pkgobjlist_dirty = False
 
+    def _excludedIdsQuery(self):
+        """Return a list of pkgId that are excluded"""
+        
+        # TODO: sqlite GLOB is case sensitive so even though it's handy because of 
+        #	its wildcars, perhaps we should use LIKE and transform wildcards
+        def buildQuery():
+            """Build a query in the following form:
+
+SELECT pkgId FROM packages
+WHERE
+	NOT
+	(pkgName GLOB self._pkgExcluder[i][2].lower() 
+		(only if self._pkgExcluder[i][1]=="include.match")
+	)
+	AND
+	(
+		(repo = self._excludes[i][0] AND
+		pkgKey = self._excludes[i][1])
+		OR
+		repo IN (self._all_excludes[i])
+		OR
+		arch NOT IN (self._arch_allowed[i])
+		OR
+		(pkgName GLOB self._pkgExcluder[i][2].lower() 
+			(only if self._pkgExcluder[i][1]=="exclude.match")
+		)
+	)
+"""
+
+            import itertools
+
+            incl_vars= [ i[2].lower() for i in self._pkgExcluder if i[1]=="include.match" ]
+            incl_q1= " OR ".join( [" (name GLOB '?') "] * len(incl_vars) )
+            
+            excl_L=[]
+            # itertools.chain seems the most elegant way to flatten a nested list
+            excl_vars1= list(itertools.chain(*self._excludes))
+            excl_q1= " OR ".join( [" (repo = ? AND pkgKey = ?) "] * (len(excl_vars1)/2) )
+            if len(excl_vars1)>0:
+                excl_L+= [excl_q1]
+            excl_vars2= list(self._all_excludes)
+            excl_q2= "repo IN (" + ",".join( ["?"] * len(excl_vars2)  ) + ")"
+            if len(excl_vars2)>0:
+                excl_L+= [excl_q2]
+            excl_vars3= list(self._arch_allowed)
+            excl_q3= "arch NOT IN (" + ",".join( ["?"] * len(self._arch_allowed) ) + ")"
+            if len(excl_vars3)>0:
+                excl_L+= [excl_q3]
+            excl_vars4= [ i[2].lower() for i in self._pkgExcluder if i[1]=="exclude.match" ]
+            excl_q4= " OR ".join( [" (name GLOB ?) "] * len(excl_vars4) )
+            if len(excl_vars4)>0:
+                excl_L+= [excl_q4]
+            excl_q= " OR ".join(excl_L)
+            excl_vars= excl_vars1 + excl_vars2 + excl_vars3 + excl_vars4
+
+            q= "INSERT INTO excludedIds SELECT pkgId FROM packages WHERE "
+            if len(incl_vars)>0 or len(excl_vars)>0:
+                if len(incl_vars)>0:
+                    q+= " NOT (" + incl_q1 + ")"
+                    if len(excl_vars)>0:
+                        q+= " AND "
+                if len(excl_vars)>0:
+                    q+= "(" + excl_q + ")"
+            else:
+                q+= "0"
+
+            return q, incl_vars+excl_vars
+
+        returnList=[]
+        (q,v)= buildQuery()
+        for (repo,cache) in self.primarydb.items():
+            print repo, q
+            cur = cache.execute("CREATE TEMP TABLE excludedIds (pkgId TEXT)")
+            cur = cache.execute(q, v)
+        return returnList
+
+
     @catchSqliteException
     def _sql_MD(self, MD, repo, sql, *args):
         """ Exec SQL against an MD of the repo, return a cursor. """
@@ -994,8 +1072,44 @@ class YumSqlitePackageSack(yumRepo.YumPackageSack):
                 result.append((pkg, ob['total']))
         return result
         
-    @catchSqliteException
+#    @catchSqliteException
     def returnObsoletes(self, newest=False):
+        """Returns a dict { (n,a,e,v,r): [(n,f,(e,v,r))] } of new:obsoleted 
+           packages"""
+        
+        def buildQuery():
+            """Build a query in the following form:
+
+SELECT 
+	packages.name, 
+	packages.arch, 
+	packages.epoch, 
+	packages.version, 
+	packages.release,
+	obsoletes.name,
+	obsoletes.flags,
+	obsoletes.epoch,
+	obsoletes.version,
+	obsoletes.release
+FROM packages, obsoletes
+WHERE
+	(packages.pkgId NOT IN (SELECT pkgId FROM excludedIds))
+	AND
+	(obsoletes.pkgKey = packages.pkgKey)
+"""
+
+            excl_vars= self._excludedIds
+            excl_q= " packages.pkgId NOT IN " + "(" + ",".join( ["?"] * len(excl_vars) ) + ")"
+
+            q="SELECT packages.name, packages.arch, packages.epoch, "\
+                "packages.version, packages.release, obsoletes.name, "\
+                "obsoletes.flags, obsoletes.epoch, obsoletes.version, "\
+                "obsoletes.release FROM packages, obsoletes WHERE "\
+                "(packages.pkgId NOT IN (SELECT pkgId FROM excludedIds)) "\
+                " AND (obsoletes.pkgKey = packages.pkgKey)"
+
+            return q
+
         if self._skip_all():
             return {}
 
@@ -1003,32 +1117,14 @@ class YumSqlitePackageSack(yumRepo.YumPackageSack):
             raise NotImplementedError()
 
         obsoletes = {}
-        for (rep,cache) in self.primarydb.items():
-            cur = cache.cursor()
-            executeSQL(cur, "select packages.name as name,\
-                packages.pkgKey as pkgKey,\
-                packages.arch as arch, packages.epoch as epoch,\
-                packages.release as release, packages.version as version,\
-                obsoletes.name as oname, obsoletes.epoch as oepoch,\
-                obsoletes.release as orelease, obsoletes.version as oversion,\
-                obsoletes.flags as oflags\
-                from obsoletes,packages where obsoletes.pkgKey = packages.pkgKey")
-            for ob in cur:
-                key = ( _share_data(ob['name']), _share_data(ob['arch']),
-                        _share_data(ob['epoch']), _share_data(ob['version']),
-                        _share_data(ob['release']))
-                if self._pkgExcludedRKT(rep, ob['pkgKey'], key):
-                    continue
-
-                (n,f,e,v,r) = ( _share_data(ob['oname']),
-                                _share_data(ob['oflags']),
-                                _share_data(ob['oepoch']),
-                                _share_data(ob['oversion']),
-                                _share_data(ob['orelease']))
-
-                key = _share_data(key)
-                val = _share_data((n,f,(e,v,r)))
-                obsoletes.setdefault(key,[]).append(val)
+        q= buildQuery()
+        for (repo,cache) in self.primarydb.items():
+            print repo, q
+            cur= cache.execute(q)
+            results= cur.fetchall()
+            for l in results:
+                l= list(l)
+                obsoletes.setdefault(tuple(l[:5]),[]).append(tuple( [ l[5],l[6],tuple(l[7:10]) ] ))
 
         return obsoletes
 
@@ -1512,7 +1608,37 @@ class YumSqlitePackageSack(yumRepo.YumPackageSack):
             self._pkgnames_loaded.update([po.name for po in returnList])
 
         return returnList
-                
+
+    def returnPackageTuples(self, ignore_case=False, patterns=None):
+        """Returns a list of n,a,e,v,r tuples with all packages minus excludes
+        """
+        
+        # Where should I initialise the _excludedIds list??? Obviously here is not best...
+        if self._excludedIds is None:
+            self._excludedIds= self._excludedIdsQuery()
+
+        def buildQuery():
+            """Build a query in the following form:
+
+SELECT name, arch, epoch, version, release FROM packages
+WHERE pkgId NOT IN (SELECT pkgId FROM excludedIds)
+	    """
+
+            q= "SELECT name, arch, epoch, version, release FROM packages "\
+               "WHERE pkgId NOT IN (SELECT pkgId FROM excludedIds)"
+
+            return q
+
+        returnList=[]
+        q= buildQuery()
+        for (repo,cache) in self.primarydb.items():
+            print repo, q
+            cur = cache.execute(q)
+            returnList.extend(cur.fetchall())
+        return [tuple(i) for i in returnList]
+            
+        
+
     def returnPackages(self, repoid=None, patterns=None, ignore_case=False):
         """Returns a list of packages, only containing nevra information. The
            packages are processed for excludes. Note that patterns is just
_______________________________________________
Yum mailing list
Yum@xxxxxxxxxxxxxxxxx
http://lists.baseurl.org/mailman/listinfo/yum

[Index of Archives]     [Fedora Users]     [Fedora Legacy List]     [Fedora Maintainers]     [Fedora Desktop]     [Fedora SELinux]     [Big List of Linux Books]     [Yosemite News]     [KDE Users]

  Powered by Linux