[StGIT PATCH 5/5] Speed up the appliedness test

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



The appliedness test was too slow if at least one patch, applied or
unapplied, was too far away from HEAD, since we had to visit the whole
intervening part of the commit DAG.

This patch fixes that problem by maintaining a cache of uninteresting
commits that are known to not reach any patches in the commit DAG.
(Specifically, this is at all times the set of commits that are
parents to patch commits and do not have a patch commit as their
ancestor.) By exlcuding these commits when walking the graph, we only
have to visit the interesting places.

As a nice side effect, the cache of uninteresting commits makes it
possible to use just one git-rev-list call instead of two, since we
can list the applied patches without first computing the unapplied
patches; the unapplied patches are then simply all patches except
those that are applied.

Signed-off-by: Karl Hasselström <kha@xxxxxxxxxxx>

---

 stgit/stack.py |  272 ++++++++++++++++++++++++++++++++++++++++++++++----------
 1 files changed, 221 insertions(+), 51 deletions(-)

diff --git a/stgit/stack.py b/stgit/stack.py
index 4186ba9..5a51329 100644
--- a/stgit/stack.py
+++ b/stgit/stack.py
@@ -155,7 +155,7 @@ class Patch(StgitObject):
         os.mkdir(self._dir())
         self.create_empty_field('bottom')
         self.create_empty_field('top')
-
+ 
     def delete(self):
         for f in os.listdir(self._dir()):
             os.remove(os.path.join(self._dir(), f))
@@ -351,7 +351,11 @@ class PatchorderCache:
     saved in its patchorder file."""
     def __init__(self, series):
         self.__series = series
+        self.__file = os.path.join(self.__series._dir(), 'patchorder')
         self.__invalidate()
+    def delete_file(self):
+        if os.path.isfile(self.__file):
+            os.remove(self.__file)
     def __invalidate(self):
         self.__patchnames = None
         self.__position = None
@@ -361,9 +365,8 @@ class PatchorderCache:
 
         self.__patchnames = []
         self.__position = {}
-        pof = os.path.join(self.__series._dir(), 'patchorder')
-        if os.path.isfile(pof):
-            for line in file(pof):
+        if os.path.isfile(self.__file):
+            for line in file(self.__file):
                 name = line.strip()
                 assert not name in self.__position
                 self.__position[name] = len(self.__patchnames)
@@ -386,60 +389,200 @@ class PatchorderCache:
         pos2 = self.__position.get(name2, largepos)
         return cmp((pos1, name1), (pos2, name2))
 
+class UninterestingCache:
+    """Keeps track of a set of commits that do not reach any patches.
+    These are used to speed up the detection of unapplied patches.
+
+    Specifically, this is at all times the set of commits c that
+    fulfill the following two criteria:
+
+      * c does not reach any patch
+
+      * c is the parent of a patch
+
+    """
+    def __init__(self, series):
+        self.__series = series
+        self.__uninteresting = None
+        self.__filename = os.path.join(self.__series._dir(), 'uninteresting')
+    def __invalidate(self):
+        self.__uninteresting = None
+        self.delete_file()
+    def delete_file(self):
+        if os.path.isfile(self.__filename):
+            os.remove(self.__filename)
+    def __other_patches(self, patchname):
+        """All patches except the named one."""
+        ref2hash = read_refs(self.__series.get_name())
+        return [self.__series.get_patch(ref)
+                for ref in ref2hash.iterkeys()
+                if ref and ref != patchname]
+    def __write_file(self):
+        """Write the uninteresting commits to file."""
+        try:
+            f = file(self.__filename, 'w')
+            for u in self.__uninteresting:
+                f.write('%s\n' % u)
+            f.close()
+        except IOError:
+            pass # this isn't fatal -- the directory is probably missing
+    def __read_file(self):
+        """Read the uninteresting commits from file. Return true on
+        success, false on failure."""
+        if not os.path.isfile(self.__filename):
+            return False
+        self.__uninteresting = Set()
+        for line in file(self.__filename):
+            self.__uninteresting.add(line.strip())
+        return True
+    def __cache_file(self):
+        """Try to cache the uninteresting commits using only the cache
+        file. Return true on success, false on failure."""
+        if self.__uninteresting != None:
+            return True # already cached
+        return self.__read_file()
+    def __cache(self):
+        """Cache the uninteresting commits, recomputing them if
+        necessary."""
+        if self.__cache_file():
+            return
+        self.__compute_uninteresting()
+        self.__write_file()
+    def __compute_uninteresting(self):
+        """Compute a reasonable set of uninteresting commits from
+        scratch. This is expensive."""
+        out.start('Finding uninteresting commits')
+        ref2hash = read_refs(self.__series.get_name())
+        patches = Set([sha1 for ref, sha1 in ref2hash.iteritems() if ref])
+        interesting, uninteresting = Set(), Set()
+
+        # Iterate over all commits. We are guaranteed to see each
+        # commit before any of its children.
+        for line in git._output_lines(
+            'git-rev-list --topo-order --reverse --parents --all'):
+            commits = line.split()
+            commit, parents = commits[0], Set(commits[1:])
+
+            # Patches are interesting.
+            if commit in patches:
+                interesting.add(commit)
+
+                # The parents of a patch are uninteresting unless they
+                # are interesting.
+                for p in parents:
+                    if not p in interesting:
+                        uninteresting.add(p)
+                continue
+
+            # Commits with interesting parents are interesting.
+            if interesting.intersection(parents):
+                interesting.add(commit)
+        self.__uninteresting = uninteresting
+        out.done()
+    def create_patch(self, name, top, bottom):
+        """The given patch has been created. Update the uninterested
+        state to maintain the invariant."""
+        if not self.__cache_file():
+            return # not cached
+
+        # New patch inserted just below an existing bottommost patch:
+        # need to move the uninteresting commit down one step.
+        if top in self.__uninteresting:
+            self.__uninteresting.remove(top)
+            self.__uninteresting.add(bottom)
+            self.__write_file()
+            return
+
+        # New patch inserted next to an existing non-bottommost patch:
+        # don't need to do anything.
+        existing_patches = self.__other_patches(name)
+        tops = Set([p.get_top() for p in existing_patches])
+        bottoms = Set([p.get_bottom() for p in existing_patches])
+        if bottom in bottoms or bottom in tops or top in bottoms:
+            return
+
+        # The new patch is not adjacent to an existing patch. We'd
+        # need to first get rid of any uninteresting commit that
+        # reaches this patch, and then mark the patch's bottom
+        # uninteresting if it doesn't reach any other patch. This is a
+        # lot of work, so we chicken out and blow the whole cache
+        # instead.
+        self.__invalidate()
+    def delete_patch(self, name, top, bottom):
+        """The given patch has been deleted. Update the uninterested
+        state to maintain the invariant."""
+        if not self.__cache_file():
+            return # not cached
+
+        # If this patch reaches another patch, there's nothing to do.
+        if not bottom in self.__uninteresting:
+            return
+
+        # If another patch has the same bottom, it's still
+        # uninteresting and there's nothing more to do.
+        other_patches = self.__other_patches(name)
+        for p in other_patches:
+            if p.get_bottom() == bottom:
+                return
+
+        # If there are other patches on top of this one, their bottoms
+        # (this patch's top) become uninteresting in place of this
+        # patch's bottom.
+        for p in other_patches:
+            if p.get_bottom() == top:
+                self.__uninteresting.remove(bottom)
+                self.__uninteresting.add(top)
+                self.__write_file()
+                return
+
+        # The bottom of this patch is no longer uninteresting. But
+        # there might be other patches that reach it, whose bottoms
+        # would need to be marked uninteresting. That would require an
+        # expensive reachability analysis.
+        self.__invalidate()
+    def get(self):
+        self.__cache()
+        return self.__uninteresting
+
 def read_refs(branch):
     """Return a mapping from patches and branch head to hashes for a
     given branch. The patches are listed by name; the branch head is
     None."""
     refs = {}
     patchpat = re.compile(r'^refs/patches/%s/([^\.]+)$' % branch)
+    head = 'refs/heads/%s' % branch
     for line in git._output_lines('git-show-ref'):
         sha1, ref = line.split()
         m = re.match(patchpat, ref)
         if m:
             refs[m.group(1)] = sha1
-        elif ref == 'refs/heads/%s' % branch:
+        elif ref == head:
             refs[None] = sha1
+    if not None in refs:
+        raise StackException, 'Could not find %s' % head
     return refs
 
-def unapplied_patches(ref2hash):
+def get_patches(ref2hash, uninteresting):
     """Given a map of patch names (and the branch head, keyed by None)
-    to hashes, return the set of unapplied patches."""
-    hash2refs = {}
-    for r, h in ref2hash.iteritems():
-        hash2refs.setdefault(h, Set()).add(r)
-
+    to hashes, return the list of applied patches and the set of
+    unapplied patches. The second parameter is a set of commit objects
+    that do not reach any patch."""
+    applied = []
     unapplied = Set()
-    for line in git._output_lines(
-        'git-rev-list --stdin',
-        ('%s%s\n' % (['', '^'][ref == None], sha1)
-         for ref, sha1 in ref2hash.iteritems())):
-        for ref in hash2refs.get(line.strip(), []):
-            unapplied.add(ref)
-    return unapplied
-
-def sort_applied_patches(ref2hash):
-    """Given a map of patch names (and the branch head, keyed by None)
-    to hashes, return a list with the applied patches in stack order.
-    All patches in the map must be applied."""
-    hash2refs = {}
+    hash2patches = {}
     for r, h in ref2hash.iteritems():
-        if r != None:
-            hash2refs.setdefault(h, Set()).add(r)
+        if r:
+            hash2patches.setdefault(h, Set()).add(r)
+            unapplied.add(r)
 
-    missing = Set(ref for ref in ref2hash.iterkeys() if ref != None)
-    if not missing:
-        return []
-    applied = []
-    grl = popen2.Popen3('git-rev-list %s' % ref2hash[None], True)
-    for line in grl.fromchild:
-        for ref in hash2refs.get(line.strip(), []):
+    for line in git._output_lines(
+        'git-rev-list --topo-order --stdin', ['%s\n' % ref2hash[None]]
+        + ['^%s\n' % u for u in uninteresting]):
+        for ref in hash2patches.get(line.strip(), []):
             applied.append(ref)
-            missing.remove(ref)
-        if not missing:
-            applied.reverse()
-            return applied
-
-    raise StackException, 'Could not find patches: %s' % ', '.join(missing)
+            unapplied.remove(ref)
+    applied.reverse()
+    return applied, unapplied
 
 class AppliedCache:
     """An object that keeps track of the appliedness and order of the
@@ -447,7 +590,11 @@ class AppliedCache:
     def __init__(self, series):
         self.__series = series
         self.__order = PatchorderCache(series)
+        self.__uninteresting = UninterestingCache(series)
         self.__invalidate()
+    def delete_files(self):
+        for sub in [self.__uninteresting, self.__order]:
+            sub.delete_file()
     def get_applied(self):
         self.__cache()
         return self.__applied
@@ -466,6 +613,17 @@ class AppliedCache:
                 self.__write_patchorder()
                 return
         raise StackException, 'Unknown patch "%s"' % oldname
+    def new(self, name, top, bottom):
+        """Create new patch."""
+        self.__uninteresting.create_patch(name, top, bottom)
+    def delete(self, name, top, bottom):
+        """Delete a patch."""
+        self.__uninteresting.delete_patch(name, top, bottom)
+    def change(self, name, old_top, old_bottom, new_top, new_bottom):
+        """Change a patch."""
+        if (new_top, new_bottom) != (old_top, old_bottom):
+            self.new(name, new_top, new_bottom)
+            self.delete(name, old_top, old_bottom)
     def __write_patchorder(self):
         self.__order.set_patchorder(self.get_applied() + self.get_unapplied())
     def set_patchorder(self, new_order):
@@ -484,11 +642,8 @@ class AppliedCache:
     def __cache(self):
         if self.__cached():
             return
-        patches = read_refs(self.__series.get_name())
-        unapplied = unapplied_patches(patches)
-        for patch in unapplied:
-            del patches[patch]
-        self.__applied = sort_applied_patches(patches)
+        self.__applied, unapplied = get_patches(
+            read_refs(self.__series.get_name()), self.__uninteresting.get())
         self.__unapplied = list(unapplied)
         self.__unapplied.sort(self.__order.cmp)
 
@@ -849,6 +1004,7 @@ class Series(PatchSet):
                 os.remove(self.__hidden_file)
             if os.path.exists(self._dir()+'/orig-base'):
                 os.remove(self._dir()+'/orig-base')
+            self.__applied_cache.delete_files()
 
             if not os.listdir(self.__patch_dir):
                 os.rmdir(self.__patch_dir)
@@ -953,16 +1109,20 @@ class Series(PatchSet):
         patch = self.get_patch(name)
         old_bottom = patch.get_old_bottom()
         old_top = patch.get_old_top()
+        curr_bottom = patch.get_bottom()
+        curr_top = patch.get_top()
 
         # the bottom of the patch is not changed by refresh. If the
         # old_bottom is different, there wasn't any previous 'refresh'
         # command (probably only a 'push')
-        if old_bottom != patch.get_bottom() or old_top == patch.get_top():
+        if old_bottom != curr_bottom or old_top == curr_top:
             raise StackException, 'No undo information available'
 
         git.reset(tree_id = old_top, check_out = False)
         if patch.restore_old_boundaries():
             self.log_patch(patch, 'undo')
+        self.__applied_cache.change(name, curr_top, curr_bottom,
+                                    old_top, old_bottom)
 
     def new_patch(self, name, message = None, can_edit = True,
                   unapplied = False, show_patch = False,
@@ -995,10 +1155,9 @@ class Series(PatchSet):
         patch = self.get_patch(name)
         patch.create()
 
-        if not bottom:
-            bottom = head
-        if not top:
-            top = head
+        bottom = bottom or head
+        top = top or head
+        self.__applied_cache.new(name, top, bottom)
 
         patch.set_bottom(bottom)
         patch.set_top(top)
@@ -1042,15 +1201,16 @@ class Series(PatchSet):
     def delete_patch_data(self, name):
         """Deletes the stgit data for a patch."""
         patch = Patch(name, self.__patch_dir, self.__refs_dir)
+        top, bottom = patch.get_top(), patch.get_bottom()
 
         # save the commit id to a trash file
-        write_string(os.path.join(self.__trash_dir, name), patch.get_top())
+        write_string(os.path.join(self.__trash_dir, name), top)
 
         patch.delete()
         if self.patch_hidden(name):
             self.unhide_patch(name)
 
-        return patch
+        self.__applied_cache.delete(name, top, bottom)
 
     def delete_patch(self, name):
         """Deletes a patch
@@ -1109,6 +1269,7 @@ class Series(PatchSet):
 
                     top_tree = git.get_commit(top).get_tree()
 
+                    old_top = top
                     top = git.commit(message = descr, parents = [head],
                                      cache_update = False,
                                      tree_id = top_tree,
@@ -1122,6 +1283,9 @@ class Series(PatchSet):
                     patch.set_bottom(head, backup = True)
                     patch.set_top(top, backup = True)
 
+                    self.__applied_cache.change(
+                        name, old_top = old_top, old_bottom = bottom,
+                        new_top = top, new_bottom = head)
                     self.log_patch(patch, 'push(f)')
                 else:
                     top = head
@@ -1179,6 +1343,7 @@ class Series(PatchSet):
             # need an empty commit
             patch.set_bottom(head, backup = True)
             patch.set_top(head, backup = True)
+            self.__applied_cache.change(name, top, bottom, head, head)
             modified = True
         elif head == bottom:
             # reset the backup information. No need for logging
@@ -1191,6 +1356,7 @@ class Series(PatchSet):
             # The current patch is empty after merge.
             patch.set_bottom(head, backup = True)
             patch.set_top(head, backup = True)
+            self.__applied_cache.change(name, top, bottom, head, head)
 
             # Try the fast applying first. If this fails, fall back to the
             # three-way merge
@@ -1236,6 +1402,8 @@ class Series(PatchSet):
         patch = self.get_patch(name)
         old_bottom = patch.get_old_bottom()
         old_top = patch.get_old_top()
+        curr_bottom = patch.get_bottom()
+        curr_top = patch.get_top()
 
         # the top of the patch is changed by a push operation only
         # together with the bottom (otherwise the top was probably
@@ -1247,6 +1415,8 @@ class Series(PatchSet):
         git.reset()
         self.pop_patch(name)
         ret = patch.restore_old_boundaries()
+        self.__applied_cache.change(name, curr_top, curr_bottom,
+                                    old_top, old_bottom)
         if ret:
             self.log_patch(patch, 'undo')
 

-
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]

  Powered by Linux