[RFCv4 2/5] 1/2: Add Python support library for CVS remote helper

Johan Herland <johan@xxxxxxxxxxx> · Mon, 17 Aug 2009 05:35:58 +0200

This patch introduces parts of a Python package called "git_remote_cvs"
containing the building blocks of the CVS remote helper.
The CVS remote helper itself is NOT part of this patch.

This patch has been improved by the following contributions:
- David Aguilar: Lots of Python coding style fixes

Cc: David Aguilar <davvid@xxxxxxxxx>
Signed-off-by: Johan Herland <johan@xxxxxxxxxxx>
---

This patch and the next patch belong together in the same commit, but
the 100K limit (?) on the git mailing list forces me to split this
patch in two.

...Johan

 git_remote_cvs/changeset.py        |  126 +++++
 git_remote_cvs/cvs.py              |  998 ++++++++++++++++++++++++++++++++++++
 git_remote_cvs/cvs_symbol_cache.py |  313 +++++++++++
 git_remote_cvs/util.py             |  186 +++++++
 4 files changed, 1623 insertions(+), 0 deletions(-)
 create mode 100644 git_remote_cvs/changeset.py
 create mode 100644 git_remote_cvs/cvs.py
 create mode 100644 git_remote_cvs/cvs_symbol_cache.py
 create mode 100644 git_remote_cvs/util.py

diff --git a/git_remote_cvs/changeset.py b/git_remote_cvs/changeset.py
new file mode 100644
index 0000000..9eea9d2
--- /dev/null
+++ b/git_remote_cvs/changeset.py
@@ -0,0 +1,126 @@
+#!/usr/bin/env python
+
+"""Code for collecting individual CVS revisions into "changesets"
+
+A changeset is a collection of CVSRev objects that belong together in
+the same "commit".  This is a somewhat artificial construct on top of
+CVS, which only stores changes at the per-file level.  Normally, CVS
+users create several CVS revisions simultaneously by applying the
+"cvs commit" command to several files with related changes.  This
+module tries to reconstruct this notion of related revisions.
+
+"""
+
+from git_remote_cvs.util import debug, error, die
+
+
+class Changeset(object):
+
+    """Encapsulate a single changeset/commit."""
+
+    __slots__ = ('revs', 'date', 'author', 'message')
+
+    # The maximum time between the changeset's date, and the date of a
+    # rev to included in that changeset.
+    MaxSecondsBetweenRevs = 8 * 60 * 60  # 8 hours
+
+    @classmethod
+    def from_rev (cls, rev):
+        """Return a Changeset based on the given CVSRev object."""
+        c = cls(rev.date, rev.author, rev.message)
+        result = c.add(rev)
+        assert result
+        return c
+
+    def __init__ (self, date, author, message):
+        """Create a new Changeset with the given metadata."""
+        self.revs = {}  # dict: path -> CVSRev object
+        self.date = date  # CVSDate object
+        self.author = author
+        self.message = message  # Lines of commit message
+
+    def __str__ (self):
+        """Stringify this Changeset object."""
+        msg = self.message[0]  # First line only
+        # Limit message to 25 chars
+        if len(msg) > 25:
+            msg = msg[:22] + "..."
+        return ("<Changeset @(%s) by %s (%s) updating %i files>" %
+                (self.date, self.author, msg, len(self.revs)))
+
+    def __iter__ (self):
+        """Return iterator traversing the CVSRevs in this Changeset."""
+        return self.revs.itervalues()
+
+    def __getitem__ (self, path):
+        """Look up a specific CVSRev in this Changeset."""
+        return self.revs[path]
+
+    def within_time_window (self, rev):
+        """Return True iff the rev is within the time window of self."""
+        return abs(rev.date.diff(self.date)) <= self.MaxSecondsBetweenRevs
+
+    def add (self, rev):
+        """Add the given CVSRev to this Changeset.
+
+        The addition will only succeed if the following holds:
+          - rev.author == self.author
+          - rev.message == self.message
+          - rev.path is not in self.revs
+          - rev.date is within MaxSecondsBetweenRevs of self.date
+        If the addition succeeds, True is returned; otherwise False.
+
+        """
+        if rev.author != self.author or \
+           rev.message != self.message or \
+           rev.path in self.revs or \
+           not self.within_time_window(rev):
+            return False
+
+        self.revs[rev.path] = rev
+        return True
+
+
+def build_changesets_from_revs (cvs_revs):
+    """Organize CVSRev objects into a chronological list of Changesets."""
+    # Construct chronological list of CVSRev objects
+    chron_revs = []
+    for path, d in cvs_revs.iteritems():
+        i = 0  # Current index into chronRevs
+        for revnum, cvsrev in sorted(d.iteritems()):
+            assert path == cvsrev.path
+            assert revnum == cvsrev.num
+            while i < len(chron_revs) and cvsrev.date > chron_revs[i].date:
+                i += 1
+            # Insert cvsRev at position i in chronRevs
+            chron_revs.insert(i, cvsrev)
+            i += 1
+
+    changesets = []  # Chronological list of Changeset objects
+    while len(chron_revs):
+        # There are still more revs to be added to Changesets
+        # Create Changeset based on the first rev in chronRevs
+        changeset = Changeset.from_rev(chron_revs.pop(0))
+        # Keep adding revs chronologically until MaxSecondsBetweenRevs
+        rejects = []  # Revs that cannot be added to this changeset
+        while len(chron_revs):
+            rev = chron_revs.pop(0)
+            reject = False
+            # First, if we have one of rev's parents in rejects, we
+            # must also reject rev
+            for r in rejects:
+                if r.path == rev.path:
+                    reject = True
+                    break
+            # Next, add rev to changeset, reject if add fails
+            if not reject:
+                reject = not changeset.add(rev)
+            if reject:
+                rejects.append(rev)
+                # stop trying when rev is too far in the future
+                if not changeset.within_time_window(rev):
+                    break
+        chron_revs = rejects + chron_revs  # Reconstruct remaining revs
+        changesets.append(changeset)
+
+    return changesets
diff --git a/git_remote_cvs/cvs.py b/git_remote_cvs/cvs.py
new file mode 100644
index 0000000..f870ae0
--- /dev/null
+++ b/git_remote_cvs/cvs.py
@@ -0,0 +1,998 @@
+#!/usr/bin/env python
+
+"""Functionality for interacting with CVS repositories.
+
+This module provides classes for interrogating a CVS repository via a
+CVS working directory (aka. checkout), or via direct queries using the
+"cvs rlog" command.
+
+Also, classes for encapsulating fundamental CVS concepts (like CVS
+revision/branch numbers) are provided.
+"""
+
+import sys
+import os
+import shutil
+import time
+from calendar import timegm
+import unittest
+
+from git_remote_cvs.util import (debug, error, die, ProgressIndicator,
+                                 start_command, run_command,
+                                 file_reader_method, file_writer_method)
+
+
+class CVSNum(object):
+
+    """Encapsulate a single CVS revision/branch number.
+
+    Provides functionality for common operations on CVS numbers.
+
+    A CVS number consists of a list of components separated by periods
+    ('.'), where each component is a decimal number.  Inspecting the
+    components from left to right, the odd-numbered (1st, 3rd, 5th,
+    etc.) components represent branches in the CVS history tree, while
+    the even-numbered (2nd, 4th, 6th, etc.) components represent
+    revisions on the branch specified in the previous position.
+    Thus "1.2" denotes the second revision on the first branch
+    (aka. trunk), while "1.2.4.6" denotes the sixth revision of the
+    fourth branch started from revision "1.2".
+
+    Therefore, in general, a CVS number with an even number of
+    components denotes a revision (we call this a "revision number"),
+    while an odd number of components denotes a branch (called a
+    "branch number").
+
+    There are a few complicating peculiarities: If there is an even
+    number of components, and the second-last component is 0, the
+    number is not a revision number, but is rather equivalent to the
+    branch number we get by removing the 0-component.  I.e. "1.2.0.4"
+    is equivalent to "1.2.4".
+
+    A branch number (except the trunk: "1") always has a "branch point"
+    revision, i.e. the revision from which the branch was started.
+    This revision is found by removing the last component of the branch
+    number.  For example the branch point of "1.2.4" is "1.2".
+
+    Conversely, all revision numbers belong to a corresponding branch,
+    whose branch number is found by removing the last component.
+    Examples: The "1.2.4.6" revision belong to the "1.2.4" branch,
+    while the "1.2" revision belongs to the "1" branch (the "trunk").
+
+    From this we can programatically determine the ancestry of any
+    revision number, by decrementing the last revision component until
+    it equals 1, and then trim off the last two components to get to
+    the branch point, and repeat the process from there until we reach
+    the initial revision (typically "1.1").  For example, recursively
+    enumerating the parent revisions of "1.2.4.6" yields the following
+    revisions:
+    "1.2.4.5", "1.2.4.4", "1.2.4.3", "1.2.4.2", "1.2.4.1", "1.2", "1.1"
+
+    """
+
+    __slots__ = ('c',)
+
+    @staticmethod
+    def decompose (cvsnum):
+        """Split the given CVS number into a list of int components.
+
+        Branch numbers are normalized to the odd-numbered components
+        form (i.e. removing the second last '0' component)
+
+        Examples:
+          '1.2.4.8' -> [1, 2, 4, 8]
+          '1.2.3'   -> [1, 2, 3]
+          '1.2.0.5' -> [1, 2, 5]
+
+        """
+        if cvsnum:
+            r = map(int, cvsnum.split('.'))
+        else:
+            r = []
+        if len(r) >= 2 and r[-2] == 0:
+            del r[-2]
+        if r[-1] == 0:
+            raise ValueError(cvsnum)
+        return tuple(r)
+
+    @staticmethod
+    def compose (c):
+        """Join the given list of integer components into a CVS number.
+
+        E.g.: (1, 2, 4, 8) -> '1.2.4.8'
+
+        """
+        if c[-1] == 0:
+            raise ValueError(str(c))
+        return ".".join(map(str, c))
+
+    @classmethod
+    def from_components (cls, args):
+        """Create a CVSNum from the given list of numerical components."""
+        return cls(cls.compose(args))
+
+    @classmethod
+    def disjoint (cls, a, b):
+        """Return True iff the CVS numbers are historically disjoint.
+
+        Two CVS numbers are disjoint if they do not share the same
+        historical line back to the initial revision.  In other words:
+        the two numbers are disjoint if the history (i.e. set of parent
+        revisions all the way back to the intial (1.1) revision) of
+        neither number is a superset of the other's history.
+        See test_disjoint() for practical examples:
+
+        """
+        if a.is_branch():
+            a = cls.from_components(a.c + (1,))
+        if b.is_branch():
+            b = cls.from_components(b.c + (1,))
+        if len(a.c) > len(b.c):
+            a, b = b, a  # a is now shortest
+        pairs = zip(a.c, b.c)
+        for pa, pb in pairs[:-1]:
+            if pa != pb:
+                return True
+        if len(a) == len(b):
+            return False
+        common_len = len(a)
+        if a.c[common_len - 1] <= b.c[common_len - 1]:
+            return False
+        return True
+
+
+    def __init__ (self, cvsnum):
+        """Create a CVSNum object from the given CVS number string."""
+        self.c = self.decompose(str(cvsnum))
+
+    def __repr__ (self):
+        """Return a string representation of this object."""
+        return self.compose(self.c)
+
+    def __str__ (self):
+        """Return a string representation of this object."""
+        return repr(self)
+
+    def __hash__ (self):
+        """Create a hash value for this CVS number."""
+        return hash(repr(self))
+
+    def __len__ (self):
+        """Return number of components in this CVS number."""
+        return len(self.c)
+
+    def __cmp__ (self, other):
+        """Comparison method for CVS numbers."""
+        try:
+            return cmp(self.c, other.c)
+        except AttributeError:
+            return 1
+
+    def __getitem__ (self, key):
+        """Return the Xth component of this CVS number."""
+        return self.c[key]
+
+    def is_rev (self):
+        """Return True iff this number is a CVS revision number."""
+        return len(self.c) % 2 == 0 and len(self.c) >= 2 and self.c[-2] != 0
+
+    def is_branch (self):
+        """Return True iff this number is a CVS branch number."""
+        return len(self.c) % 2 != 0 or (len(self.c) >= 2 and self.c[-2] == 0)
+
+    def components (self):
+        """Return a list of integer components in this CVS number."""
+        return list(self.c)
+
+    def branch (self):
+        """Return the branch on which the given number lives.
+
+        Revisions: chop the last component to find the branch, e.g.:
+            1.2.4.6 -> 1.2.4
+            1.1 -> 1
+        Branches: unchanged
+
+        """
+        if self.is_rev():
+            return self.from_components(self.c[:-1])
+        return self
+
+    def parent (self):
+        """Return the parent/previous revision number to this number.
+
+        For revisions, this is the previous revision, e.g.:
+            1.2.4.6 -> 1.2.4.5
+            1.2.4.1 -> 1.2
+            1.1 -> None
+            2.1 -> None
+        For branches, this is the branch point, e.g.:
+            1.2.4 -> 1.2
+            1 -> None
+            2 -> None
+
+        """
+        if len(self.c) < 2:
+            return None
+        elif len(self.c) % 2:  # Branch number
+            return self.from_components(self.c[:-1])
+        else:  # Revision number
+            assert self.c[-1] > 0
+            result = self.components()
+            result[-1] -= 1  # Decrement final component
+            if result[-1] == 0:  # We're at the start of the branch
+                del result[-2:]  # Make into branch point
+                if not result:
+                    return None
+            return self.from_components(result)
+
+    def follows (self, other):
+        """Return True iff self historically follows the given rev.
+
+        This iterates through the parents of self, and returns True iff
+        any of them equals the given rev.  Otherwise, it returns False.
+
+        """
+        assert other.is_rev()
+        cur = self
+        while cur:
+            if cur == other:
+                return True
+            cur = cur.parent()
+        return False
+
+    def on_branch (self, branch):
+        """Return True iff this rev is on the given branch.
+
+        The revs considered to be "on" a branch X also includes the
+        branch point of branch X.
+
+        """
+        return branch == self.branch() or branch.parent() == self
+
+
+class TestCVSNum(unittest.TestCase):
+
+    """CVSNum selftests."""
+
+    def test_basic (self):
+        """CVSNum basic selftests."""
+        self.assertEqual(CVSNum("1.2.4"), CVSNum("1.2.0.4"))
+        self.assert_(CVSNum("1.2.4").is_branch())
+        self.assert_(CVSNum("1.2").is_rev())
+        self.assert_(CVSNum("1").is_branch())
+        self.assert_(CVSNum("1.2.4.6").is_rev())
+        self.assertEqual(CVSNum("1.2.4.6").components(), [1, 2, 4, 6])
+        self.assertEqual(CVSNum.from_components([1, 2, 4, 6]),
+                         CVSNum("1.2.4.6"))
+        self.assertEqual(str(CVSNum.from_components([1, 2, 4, 6])), "1.2.4.6")
+        self.assertEqual(len(CVSNum("1.2.4.6")), 4)
+        self.assertEqual(CVSNum("1.2.4.6").branch(), CVSNum("1.2.4"))
+        self.assertEqual(CVSNum("1.2.4").branch(), CVSNum("1.2.4"))
+        self.assertEqual(CVSNum("1.1").branch(), CVSNum("1"))
+        self.assertEqual(CVSNum("1").branch(), CVSNum("1"))
+        self.assertEqual(CVSNum("1.2.4.6").parent(), CVSNum("1.2.4.5"))
+        self.assertEqual(CVSNum("1.2.4.1").parent(), CVSNum("1.2"))
+        self.assertEqual(CVSNum("1.2").parent(), CVSNum("1.1"))
+        self.assert_(CVSNum("1.1").parent() is None)
+        self.assert_(CVSNum("2.1").parent() is None)
+        self.assertEqual(CVSNum("1.2.4").parent(), CVSNum("1.2"))
+        self.assert_(CVSNum("1").parent() is None)
+        self.assert_(CVSNum("2").parent() is None)
+
+    def test_follows (self):
+        """CVSNum.follows() selftests."""
+        self.assert_(CVSNum("1.2.4.6").follows(CVSNum("1.1")))
+        self.assert_(CVSNum("1.2.4.6").follows(CVSNum("1.2")))
+        self.assert_(CVSNum("1.2.4.6").follows(CVSNum("1.2.4.1")))
+        self.assert_(CVSNum("1.2.4.6").follows(CVSNum("1.2.4.2")))
+        self.assert_(CVSNum("1.2.4.6").follows(CVSNum("1.2.4.3")))
+        self.assert_(CVSNum("1.2.4.6").follows(CVSNum("1.2.4.4")))
+        self.assert_(CVSNum("1.2.4.6").follows(CVSNum("1.2.4.5")))
+        self.assert_(CVSNum("1.2.4.6").follows(CVSNum("1.2.4.6")))
+        self.assertFalse(CVSNum("1.2.4.6").follows(CVSNum("1.2.4.7")))
+        self.assertFalse(CVSNum("1.2.4.6").follows(CVSNum("1.3")))
+        self.assertFalse(CVSNum("1.1").follows(CVSNum("1.2.4.6")))
+
+    def test_disjoint (self):
+        """CVSNum.disjoint() selftests."""
+        tests = [
+            ("1.2", "1.1", False),
+            ("1.2", "1.2", False),
+            ("1.2", "1.3", False),
+            ("1.2", "1.1.2", True),
+            ("1.2", "1.1.2.3", True),
+            ("1.2.4", "1.1", False),
+            ("1.2.4", "1.2", False),
+            ("1.2.4", "1.3", True),
+            ("1.2.4", "1.2.2", True),
+            ("1.2.4", "1.2.4", False),
+            ("1.2.4", "1.2.6", True),
+            ("1.2.4", "1.2.2.4", True),
+            ("1.2.4", "1.2.4.4", False),
+            ("1.2.4", "1.2.6.4", True),
+            ("1.2.4.6", "1.1", False),
+            ("1.2.4.6", "1.2", False),
+            ("1.2.4.6", "1.3", True),
+            ("1.2.4.6", "1.2.2", True),
+            ("1.2.4.6", "1.2.2.1", True),
+            ("1.2.4.6", "1.2.4", False),
+            ("1.2.4.6", "1.2.4.5", False),
+            ("1.2.4.6", "1.2.4.6", False),
+            ("1.2.4.6", "1.2.4.7", False),
+            ("1.2.4.6.8.10", "1.2.4.5", False),
+            ("1.2.4.6.8.10", "1.2.4.6", False),
+            ("1.2.4.6.8.10", "1.2.4.7", True),
+        ]
+        for a, b, result in tests:
+            self.assertEqual(CVSNum.disjoint(CVSNum(a), CVSNum(b)), result)
+            self.assertEqual(CVSNum.disjoint(CVSNum(b), CVSNum(a)), result)
+
+
+class CVSState(object):
+
+    """Encapsulate a historical state in CVS (a set of paths and nums).
+
+    This class is a container of CVS pathnames and associated CVSNum
+    objects.
+
+    No communication with a CVS working directory or repository is done
+    in this class, hence only basic sanity checks are performed:
+      - A path may only appear once in a CVSState.
+      - When adding a path:num pair, path may not already exist in self
+      - When replacing a path:num pair, path must already exist in self
+      - When removing a path:num pair, both path and num must be given
+
+    IMPORTANT: Objects of this class are hash()able (to support being
+    used as keys in a dict), but they are also mutable.  It is
+    therefore up to the caller to make sure that the object is not
+    changed after being stored in a data structure indexed by its hash
+    value.
+
+    """
+
+    __slots__ = ('revs', '_hash')
+
+    def __init__ (self):
+        """Create a new, empty CVSState."""
+        self.revs = {}  # dict: path -> CVSNum object
+        self._hash = None
+
+    def __iter__ (self):
+        """Return iterator traversing the (path, CVSNum)s in this CVSState."""
+        return self.revs.iteritems()
+
+    def __cmp__ (self, other):
+        """Comparison method for CVSState objects."""
+        return cmp(self.revs, other.revs)
+
+    def __str__ (self):
+        """Stringify this CVSState by listing the contained revisions."""
+        return "".join(["%s:%s\n" % (p, n) for p, n in sorted(self)])
+
+    def __hash__ (self):
+        """Create a hash value for this CVSState."""
+        if self._hash is None:
+            self._hash = hash(str(self))
+        return self._hash
+
+    def __getitem__ (self, path):
+        """Return the CVSNum associated with the given path in self."""
+        return self.revs[path]
+
+    def get (self, path, default = None):
+        """Return the CVSNum associated with the given path in self."""
+        return self.revs.get(path, default)
+
+    def paths (self):
+        """Return the path names contained within this CVSState."""
+        return self.revs.iterkeys()
+
+    def add (self, path, revnum):
+        """Add the given path:revnum to this CVSState."""
+        assert path not in self.revs
+        self._hash = None
+        self.revs[path] = revnum
+
+    def replace (self, path, revnum):
+        """Replace the revnum associated with the given path."""
+        assert path in self.revs
+        self._hash = None
+        self.revs[path] = revnum
+
+    def remove (self, path, revnum):
+        """Remove the given path:revnum association from this CVSState."""
+        assert path in self.revs and self.revs[path] == revnum
+        self._hash = None
+        del self.revs[path]
+
+    def copy (self):
+        """Create and return a copy of this object."""
+        ret = CVSState()
+        ret.revs = self.revs.copy()
+        ret._hash = self._hash
+        return ret
+
+    def load_data (self, note_data):
+        """Load note data as formatted by self.__str__()."""
+        for line in note_data.split("\n"):
+            line = line.strip()
+            if not line:
+                continue
+            path, num = line.rsplit(':', 1)
+            self.add(path, CVSNum(num))
+        self._hash = hash(note_data)
+
+    def print_members (self, f = sys.stdout, prefix = ""):
+        """Write the members of this CVSState to the given file object."""
+        for path, num in sorted(self):
+            print >> f, "%s%s:%s" % (prefix, path, num)
+
+    @file_reader_method(missing_ok = True)
+    def load (self, f):
+        """Load CVS state from the given file name/object."""
+        if f:
+            self.load_data(f.read())
+
+    @file_writer_method
+    def save (self, f):
+        """Save CVS state to the given file name/object."""
+        assert f
+        print >> f, str(self),
+
+
+class CVSDate(object):
+
+    """Encapsulate a timestamp, as reported by CVS.
+
+    The internal representation of a timestamp is two integers, the
+    first representing the timestamp as #seconds since epoch (UTC),
+    and the second representing the timezone as #minutes offset from
+    UTC.
+
+    Example: "2007-09-05 17:26:28 -0200" is converted to
+             (1189013188, -120)
+
+    """
+
+    __slots__ = ('ts', 'tz')
+
+    def __init__ (self, date_str = None, in_utc = False):
+        """Convert CVS date string into a CVSDate object.
+
+        A CVS timestamp string has one of the following forms:
+          - "YYYY-MM-DD hh:mm:ss SZZZZ"
+          - "YYYY/MM/DD hh:mm:ss" (with timezone assumed to be UTC)
+        The in_utc parameter determines whether the timestamp part of
+        the given string (the "YYYY-MM-DD hh:mm:ss" part) is given in
+        local time or UTC (normally CVS dates are given in local time.
+        If given in local time, the timezone offset is subtracted from
+        the timestamp in order to make the time in UTC format.
+
+        """
+        if date_str is None:
+            self.ts, self.tz = 0, 0
+            return
+        if date_str == "now":
+            self.ts, self.tz = time.time(), 0
+            return
+        date_str = date_str.strip()
+        # Set up self.ts and self.tz
+        if date_str.count(" ") == 2:
+            # Assume format "YYYY-MM-DD hh:mm:ss SZZZZ"
+            t, z = date_str.rsplit(" ", 1)
+            # Convert timestamp to #secs since epoch (UTC)
+            self.ts = timegm(time.strptime(t, "%Y-%m-%d %H:%M:%S"))
+            # Convert timezone into #mins offset from UTC
+            self.tz = int(z[1:3]) * 60 + int(z[3:5])
+            # Incorporate timezone sign
+            if z[0] == '-':
+                self.tz *= -1
+        else:
+            assert date_str.count(" ") == 1
+            # Assume format "YYYY/MM/DD hh:mm:ss"
+            self.ts = timegm(time.strptime(date_str, "%Y/%m/%d %H:%M:%S"))
+            self.tz = 0
+        # Adjust timestamp if not already in UTC
+        if not in_utc:
+            self.ts -= self.tz * 60
+
+    def tz_str (self):
+        """Return timezone part of self in string format."""
+        sign = '+'
+        if self.tz < 0:
+            sign = '-'
+        hours, minutes = divmod(abs(self.tz), 60)
+        return "%s%02d%02d" % (sign, hours, minutes)
+
+    def __str__ (self):
+        """Reconstruct date string from members."""
+        s = time.strftime("%Y-%m-%d %H:%M:%S", time.gmtime(self.ts))
+        return "%s %s" % (s, self.tz_str())
+
+    def __repr__ (self):
+        """Create a string representation of self."""
+        return "CVSDate('%s')" % (str(self))
+
+    def __hash__ (self):
+        """Create a hash value from self."""
+        return hash((self.ts, self.tz))
+
+    def __nonzero__ (self):
+        """Provide interpretation of self in a boolean context."""
+        return bool(self.ts or self.tz)
+
+    def __cmp__ (self, other):
+        """Comparison method for CVSDate objects."""
+        return cmp(self.ts, other.ts) or cmp(self.tz, other.tz)
+
+    def __eq__ (self, other):
+        """Return True iff self and other is considered equal."""
+        return self.ts == other.ts and self.tz == other.tz
+
+    def diff (self, other):
+        """Return difference between self and other in #seconds.
+
+        Invariant: self == other.add(self.diff(other))
+
+        """
+        return self.ts - other.ts
+
+
+class TestCVSDate(unittest.TestCase):
+
+    """CVSDate selftests."""
+
+    def test_basic (self):
+        """CVSDate basic selftests."""
+        a = CVSDate("2009-05-10 14:34:56 +0200")
+        b = CVSDate("2009/05/10 12:34:56")
+        self.assert_(a)
+        self.assert_(b)
+        self.assertEqual(str(a), "2009-05-10 12:34:56 +0200", str(a))
+        self.assertEqual(str(b), "2009-05-10 12:34:56 +0000", str(b))
+        self.assertNotEqual(a, b)
+        self.assertEqual(a.diff(b), 0)
+        c = CVSDate("2009-05-10 16:34:56 +0200")
+        self.assert_(c)
+        self.assertEqual(str(c), "2009-05-10 14:34:56 +0200", str(c))
+        self.assertNotEqual(c, a)
+        self.assertEqual(c.diff(a), 2 * 60 * 60)
+        self.assertEqual(a.diff(c), -2 * 60 * 60)
+
+
+class CVSRev(object):
+
+    """Encapsulate metadata on a CVS revision."""
+
+    __slots__ = ('path', 'num', 'date', 'author', 'deleted', 'message')
+
+    def __init__ (self, path, num):
+        """Create a CVSRev object for the given path:num revision."""
+        self.path = path
+        self.num = num
+        self.date = None  # CVSDate object
+        self.author = ""
+        self.deleted = None  # True or False
+        self.message = []  # Lines of commit message
+
+    def __str__ (self):
+        """Return a string listing the metadata in this CVS revision."""
+        return ("<%s:%s on %s by %s%s>" %
+                (self.path, self.num, self.date, self.author,
+                 self.deleted and ", deleted" or ""))
+
+    def __cmp__ (self, other):
+        """Comparison method for CVSRev objects."""
+        return cmp(self.path, other.path) or cmp(self.num, other.num)
+
+
+class CVSWorkDir(object):
+
+    """Encapsulate a CVS working directory.
+
+    This class auto-creates a CVS workdir/checkout in the directory
+    given to the constructor, and provides various methods for
+    interacting with this workdir.
+
+    """
+
+    def __init__ (self, workdir, cvs_repo):
+        """Create a new CVSWorkDir.
+
+        The cvs_repo argument must be a (cvs_root, cvs_module) tuple
+
+        """
+        self.d = workdir
+        self.cvs_root, self.cvs_module = cvs_repo
+        parent_dir = os.path.dirname(self.d)
+        if not os.path.isdir(parent_dir):
+            os.makedirs(parent_dir)
+        self._valid = None
+
+    def makepath(self, *args):
+        """Create path relative to working directory."""
+        return os.path.join(self.d, *args)
+
+    def valid (self):
+        """Return True iff this workdir is present and valid."""
+        if self._valid is not None:
+            return self._valid
+        try:
+            f = open(self.makepath("CVS", "Root"), 'r')
+            assert f.read().strip() == self.cvs_root
+            f.close()
+            f = open(self.makepath("CVS", "Repository"), 'r')
+            assert f.read().strip() == self.cvs_module
+            f.close()
+            self._valid = True
+        except (IOError, AssertionError):
+            self._valid = False
+        return self._valid
+
+    def remove (self):
+        """Remove this checkout."""
+        shutil.rmtree(self.d, True)
+        assert not os.path.exists(self.d)
+        self._valid = False
+
+    def checkout (self, revision = "HEAD"):
+        """Create a checkout of the given revision."""
+        self.remove()
+        parent_dir, co_dir = os.path.split(self.d)
+        args = ["cvs", "-f", "-Q", "-d", self.cvs_root, "checkout"]
+        if str(revision) != "HEAD":
+            args.extend(["-r", str(revision)])
+        args.extend(["-d", co_dir, self.cvs_module])
+        exit_code, output, errors = run_command(args, cwd = parent_dir)
+        if exit_code:
+            die("Failed to checkout CVS working directory")
+        assert not errors
+        assert not output, "output = '%s'" % (output)
+        self._valid = None
+        assert self.valid()
+
+    def update (self, revision = "HEAD", paths = None):
+        """Update the given paths to the given revision."""
+        if not self.valid():
+            self.checkout()
+        args = ["cvs", "-f", "-Q", "update", "-kk"]
+        if str(revision) == "HEAD":
+            args.append("-A")
+        else:
+            args.extend(["-r", str(revision)])
+        if paths is not None:
+            args.extend(paths)
+        exit_code, output, errors = run_command(args, cwd = self.d)
+        if exit_code:
+            die("Failed to checkout CVS working directory")
+        assert not errors
+        assert not output, "output = '%s'" % (output)
+
+    def get_revision_data (self, path, revision):
+        """Return the contents of the given CVS path:revision."""
+        if not self.valid():
+            self.checkout()
+        args = ["cvs", "-f", "-Q", "update", "-p", "-kk"]
+        if str(revision) == "HEAD":
+            args.append("-A")
+        else:
+            args.extend(["-r", str(revision)])
+        args.append(path)
+        exit_code, output, errors = run_command(args, cwd = self.d)
+        if exit_code:
+            die("Failed to checkout CVS working directory")
+        assert not errors
+        return output
+
+    def get_modeinfo (self, paths = None):
+        """Return mode information for the given paths.
+
+        Returns a dict of path -> mode number mappings.  If paths are
+        not specified, mode information for all files in the current
+        checkout will be returned.  No checkout/update will be done.
+
+        """
+        result = {}
+        if paths is not None:
+            for path in paths:
+                fullpath = os.path.join(self.d, path)
+                mode = 644
+                if os.access(fullpath, os.X_OK):
+                    mode = 755
+                assert path not in result
+                result[path] = mode
+        else:  # Return mode information for all paths
+            for dirpath, dirnames, filenames in os.walk(self.d):
+                # Don't descend into CVS subdirs
+                try:
+                    dirnames.remove('CVS')
+                except ValueError:
+                    pass
+                assert dirpath.startswith(self.d)
+                directory = dirpath[len(self.d):].lstrip("/")
+                for fname in filenames:
+                    path = os.path.join(directory, fname)
+                    fullpath = os.path.join(dirpath, fname)
+                    mode = 644
+                    if os.access(fullpath, os.X_OK):
+                        mode = 755
+                    assert path not in result
+                    result[path] = mode
+        return result
+
+    @classmethod
+    def parse_entries (cls, entries, prefix, directory = ""):
+        """Recursively parse CVS/Entries files.
+
+        Return a dict of CVS paths found by parsing the CVS/Entries
+        files rooted at the given directory.
+
+        See http://ximbiot.com/cvs/manual/feature/cvs_2.html#SEC19 for
+        information on the format of the CVS/Entries file.
+
+        """
+        fname = os.path.join(prefix, directory, "CVS", "Entries")
+        subdirs = []
+        f = open(fname, 'r')
+        for line in f:
+            line = line.strip()
+            if line == "D":
+                continue  # There are no subdirectories
+            t, path, revnum, date, options, tag = line.split("/")
+            if t == "D":
+                subdirs.append(path)
+                continue
+            assert line.startswith("/")
+            path = os.path.join(directory, path)
+            revnum = CVSNum(revnum)
+            assert path not in entries
+            entries[path] = (revnum, date, options, tag)
+        f.close()
+        for d in subdirs:
+            d = os.path.join(directory, d)
+            cls.parse_entries(entries, prefix, d)
+
+    def get_state (self):
+        """Return CVSState reflecting current state of this checkout.
+
+        Note that the resulting CVSState will never contain any
+        deleted/dead files.  Other CVSStates to be compared to the one
+        returned from here should remove deleted/dead entries first.
+
+        """
+        assert self.valid()
+        entries = {}
+        result = CVSState()
+        self.parse_entries(entries, self.d)
+        for path, info in entries.iteritems():
+            result.add(path, info[0])
+        return result
+
+
+class CVSLogParser(object):
+
+    """Encapsulate the execution of a "cvs rlog" command."""
+
+    def __init__ (self, cvs_repo):
+        """Create a new CVSLogParser.
+
+        The cvs_repo argument must be a (cvs_root, cvs_module) tuple
+
+        """
+        self.cvs_root, self.cvs_module = cvs_repo
+
+    def cleanup_path (self, cvs_path):
+        """Utility method for parsing CVS paths from CVS log."""
+        cvsprefix = "/".join((self.cvs_root[self.cvs_root.index("/"):],
+                              self.cvs_module))
+        assert cvs_path.startswith(cvsprefix)
+        assert cvs_path.endswith(",v")
+        # Drop cvsprefix and ,v-extension
+        cvs_path = cvs_path[len(cvsprefix):-2]
+        # Split the remaining path into components
+        path_comps = filter(None, cvs_path.strip().split('/'))
+        # Remove 'Attic' from CVS paths
+        if len(path_comps) >= 2 and path_comps[-2] == "Attic":
+            del path_comps[-2]
+        # Reconstruct resulting "cleaned" path
+        return "/".join(path_comps)
+
+    def __call__ (self, line):
+        """Parse the given line from the CVS log.
+
+        Must be reimplemented by subclass
+
+        """
+        pass
+
+    def finish (self):
+        """This method is invoked after the last line has been parsed.
+
+        May be reimplemented by subclass
+
+        """
+        pass
+
+    def run (self, paths = None, no_symbols = False, revisions = None):
+        """Execute "cvs rlog" with the given arguments.
+
+        self.__call__() is invoked once for each line in the CVS log.
+        self.finish() is invoked exactly once after the CVS log.
+
+        """
+        args = ["cvs", "-f", "-q", "-d", self.cvs_root, "rlog"]
+        if no_symbols:
+            args.append("-N")
+        if revisions:
+            args.append("-r%s" % (revisions))
+        if paths is not None:
+            for p in paths:
+                args.append("%s/%s" % (self.cvs_module, p))
+        else:
+            args.append(self.cvs_module)
+        proc = start_command(args)
+        proc.stdin.close()
+        while True:
+            for line in proc.stdout:
+                self(line.rstrip())  # Call self's line parser
+            if proc.poll() is not None:
+                break
+        assert proc.stdout.read() == ""
+        self.finish()  # Notify subclass that parsing is finished
+        exit_code = proc.returncode
+        if exit_code:
+            error("'%s' returned exit code %i, and errors:\n---\n%s---",
+                  " ".join(args), exit_code, proc.stderr.read())
+        return exit_code
+
+
+class CVSRevLister(CVSLogParser):
+
+    """Extract CVSRev objects (with revision metadata) from a CVS log."""
+
+    def __init__ (self, cvs_repo, show_progress = False):
+        """Create a new CVSRevLister.
+
+        The cvs_repo argument must be a (cvs_root, cvs_module) tuple
+        show_progress determines whether progress indication is shown.
+
+        """
+        super(CVSRevLister, self).__init__(cvs_repo)
+        self.cur_file = None  # Current CVS file being processed
+        self.cur_file_numrevs = 0  # #revs in current CVS file
+        self.cur_rev = None  # Current CVSRev under construction
+        self.progress = None
+        if show_progress:
+            self.progress = ProgressIndicator("\t", sys.stderr)
+        # Store found revs in a two-level dict structure:
+        # filename -> revnum -> CVSRev
+        self.revs = {}
+        # Possible states:
+        # - BeforeRevs  - waiting for "total revisions:"
+        # - BetweenRevs - waiting for "----------------------------"
+        # - ReadingRev  - reading CVS revision details
+        self.state = 'BeforeRevs'
+
+    def __call__ (self, line):
+        """Line parser; this method is invoked for each line in the log."""
+        if self.state == 'BeforeRevs':
+            if line.startswith("RCS file: "):
+                self.cur_file = self.cleanup_path(line[10:])
+                assert self.cur_file not in self.revs
+                self.revs[self.cur_file] = {}
+            elif line.startswith("total revisions: "):
+                assert self.cur_file
+                totalrevs_unused, selectedrevs = line.split(";")
+                self.cur_file_numrevs = int(selectedrevs.split(":")[1].strip())
+                self.state = 'BetweenRevs'
+        elif self.state == 'BetweenRevs':
+            if (line == "----------------------------" or
+                line == "======================================"
+                        "======================================="):
+                if self.cur_rev:
+                    # Finished current revision
+                    f = self.revs[self.cur_file]
+                    assert self.cur_rev.num not in f
+                    f[self.cur_rev.num] = self.cur_rev
+                    self.cur_rev = None
+                    if self.progress:
+                        self.progress()
+                if line == "----------------------------":
+                    self.state = 'ReadingRev'
+                else:
+                    # Finalize current CVS file
+                    assert len(self.revs[self.cur_file]) == \
+                           self.cur_file_numrevs
+                    self.cur_file = None
+                    self.state = 'BeforeRevs'
+            elif self.cur_rev:
+                # Currently in the middle of a revision.
+                if line.startswith("branches:  %s" % (self.cur_rev.num)) and \
+                   line.endswith(";"):
+                    return  # Skip 'branches:' lines
+                # This line is part of the commit message.
+                self.cur_rev.message.append(line)
+        elif self.state == 'ReadingRev':
+            if line.startswith("revision "):
+                self.cur_rev = CVSRev(self.cur_file, CVSNum(line.split()[1]))
+            else:
+                date, author, state, dummy = line.split(";", 3)
+                assert date.startswith("date: ")
+                self.cur_rev.date = CVSDate(date[6:])
+                assert author.strip().startswith("author: ")
+                self.cur_rev.author = author.strip()[8:]
+                assert state.strip().startswith("state: ")
+                state = state.strip()[7:]
+                self.cur_rev.deleted = state == "dead"
+                self.state = 'BetweenRevs'
+
+    def finish (self):
+        """This method is invoked after the last line has been parsed."""
+        assert self.state == 'BeforeRevs'
+        if self.progress:
+            self.progress.finish("Parsed %i revs in %i files" %
+                                 (self.progress.n, len(self.revs)))
+
+
+def fetch_revs (path, from_rev, to_rev, symbol, cvs_repo):
+    """Fetch CVSRevs for each rev in <path:from_rev, path:symbol].
+
+    Return a dict of CVSRev objects (revnum -> CVSRev), where each
+    CVSRev encapsulates a CVS revision in the range from
+    path:from_rev to path:symbol (inclusive).  If symbol currently
+    refers to from_rev (i.e. nothing has happened since the last
+    import), the returned dict will have exactly one entry (from_rev).
+    If there is no valid revision range between from_rev and symbol,
+    the returned dict will be empty.  Situations in which an empty dict
+    is returned, include:
+    - symbol is no longer defined on this path
+    - symbol refers to a revision that is disjoint from from_rev
+
+    from_rev may be None, meaning that all revisions from the initial
+    version of path up to the revision currently referenced by symbol
+    should be fetched.
+
+    If the revision currently referenced by symbol is disjoint from
+    from_rev, the returned dict will be empty.
+
+    Note that there is lots of unexpected behaviour in the handling of
+    the 'cvs rlog -r' parameter: Say you have a branch, called
+    'my_branch', that points to branch number 1.1.2 of a file.  Say
+    there are 3 revisions on this branch: 1.1.2.1 -> 1.1.2.3 (in
+    additions to the branch point 1.1).  Now, observe the following
+    'cvs rlog' executions:
+    - cvs rlog -r0:my_branch ... returns 1.1, 1.1.2.1, 1.1.2.2, 1.1.2.3
+    - cvs rlog -r1.1:my_branch ... returns the same revs
+    - cvs rlog -rmy_branch ... returns 1.1.2.1, 1.1.2.2, 1.1.2.3
+    - cvs rlog -rmy_branch: ... returns the same revs
+    - cvs rlog -r:my_branch ... returns the same revs
+    - cvs rlog -r::my_branch ... returns the same revs
+    - cvs rlog -r1.1.2.1: ... returns the same revs
+    Here is where it gets really weird:
+    - cvs rlog -r1.1.2.1:my_branch ... returns 1.1.2.1 only
+    - cvs rlog -r1.1.2.2:my_branch ... returns 1.1.2.1, 1.1.2.2
+    - cvs rlog -r1.1.2.3:my_branch ... returns 1.1.2.1, 1.1.2.2, 1.1.2.3
+
+    In other words the 'cvs rlog -rfrom_rev:symbol' scheme that we
+    normally use will not work in the case where from_rev is _on_ the
+    branch pointed at by the symbol.
+
+    Therefore, we need an extra parameter, to_rev, which we can use to:
+    1. Detect when this situation is present.
+    2. Work around by using 'cvs rlog -rfrom_ref:to_rev' instead.
+
+    """
+    if from_rev is None:  # Initial import
+        from_rev = "0"  # "cvs rlog -r0:X" fetches from initial revision
+    elif to_rev and to_rev.branch() == from_rev.branch():
+        symbol = to_rev  # Use to_rev instead of given symbol
+    # Run 'cvs rlog' on range [from_rev, symbol] and parse CVSRev objects
+    parser = CVSRevLister(cvs_repo)
+    parser.run((path,), True, "%s:%s" % (from_rev, symbol))
+    assert len(parser.revs) == 1
+    assert path in parser.revs
+    return parser.revs[path]
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/git_remote_cvs/cvs_symbol_cache.py b/git_remote_cvs/cvs_symbol_cache.py
new file mode 100644
index 0000000..cc8d88b
--- /dev/null
+++ b/git_remote_cvs/cvs_symbol_cache.py
@@ -0,0 +1,313 @@
+#!/usr/bin/env python
+
+"""Implementation of a local CVS symbol cache.
+
+A CVS symbol cache stores a list of CVS symbols and the CVS state
+associated with each of those CVS symbols at some point in time.
+
+Keeping a local cache of CVS symbols is often needed because the
+design of CVS makes it potentially very expensive to query the CVS
+server directly for CVS symbols and associated states.
+
+In these cases, a local CVS symbol cache can provide equivalent
+(although possibly out-of-date) information immediatele.
+
+Synchronization with the current state on the CVS server can be
+done on a symbol-by-symbol basis (by checking out a given symbol
+and extracting the CVS state from the CVS work tree), or by
+synchronizing _all_ CVS symbols in one operation (by executing
+'cvs rlog' and parsing CVS states from its output).
+
+"""
+
+import sys
+import os
+
+from git_remote_cvs.util import debug, error, die, ProgressIndicator
+from git_remote_cvs.cvs import CVSNum, CVSState, CVSLogParser
+
+
+class CVSSymbolStateLister(CVSLogParser):
+
+    """Extract current CVSStates for all CVS symbols from a CVS log."""
+
+    def __init__ (self, cvs_repo, show_progress = False):
+        """Create a new CVSSymbolStateLister.
+
+        The cvs_repo argument must be a (cvs_root, cvs_module) tuple
+        show_progress determines whether a progress indicator should
+        be displayed.
+
+        """
+        super(CVSSymbolStateLister, self).__init__(cvs_repo)
+        self.symbols = {}  # CVS symbol name -> CVSState object
+        self.cur_file = None  # current CVS file being processed
+        self.cur_file_numrevs = 0  # #revs in current CVS file
+        self.cur_revnum = None  # current revision number
+        self.rev2syms = {}  # CVSNum -> [CVS symbol names]
+        self.cur_revs = {}  # CVSNum -> True/False (deleted)
+        self.head_num = None  # CVSNum of the HEAD rev or branch
+
+        # Possible states:
+        # - BeforeSymbols - waiting for "symbolic names:"
+        # - WithinSymbols - reading CVS symbol names
+        # - BeforeRevs  - waiting for "total revisions:"
+        # - BetweenRevs - waiting for "----------------------------"
+        # - ReadingRev  - reading CVS revision details
+        self.state = 'BeforeSymbols'
+
+        self.progress = None
+        if show_progress:
+            self.progress = ProgressIndicator("\t", sys.stderr)
+
+    def finalize_symbol_states (self):
+        """Adjust CVSStates in self.symbols based on revision data.
+
+        Based on the information found in self.rev2syms and
+        self.cur_revs, remove deleted revisions and turn branch numbers
+        into corresponding revisions in the CVSStates found in
+        self.symbols.
+
+        """
+        # Create a mapping from branch numbers to the last existing
+        # revision number on those branches
+        branch2lastrev = {}  # branch number -> revision number
+        for revnum in self.cur_revs.iterkeys():
+            branchnum = revnum.branch()
+            if (branchnum not in branch2lastrev) or \
+               (revnum > branch2lastrev[branchnum]):
+                branch2lastrev[branchnum] = revnum
+
+        for cvsnum, symbols in self.rev2syms.iteritems():
+            if cvsnum.is_branch():
+                # Turn into corresponding revision number
+                revnum = branch2lastrev.get(cvsnum, cvsnum.parent())
+                for s in symbols:
+                    state = self.symbols[s]
+                    assert state[self.cur_file] == cvsnum
+                    state.replace(self.cur_file, revnum)
+                cvsnum = revnum
+            assert cvsnum.is_rev()
+            assert cvsnum in self.cur_revs
+            if self.cur_revs[cvsnum]:  # cvsnum is a deleted rev
+                # Remove from CVSStates
+                for s in symbols:
+                    state = self.symbols[s]
+                    state.remove(self.cur_file, cvsnum)
+
+        self.rev2syms = {}
+        self.cur_revs = {}
+        self.cur_file = None
+
+    def __call__ (self, line):
+        """Line parser; this method is invoked for each line in the log."""
+        if self.state == 'BeforeSymbols':
+            if line.startswith("RCS file: "):
+                self.cur_file = self.cleanup_path(line[10:])
+                if self.progress:
+                    self.progress("%5i symbols found - Parsing CVS file #%i: "
+                                  "%s " % (len(self.symbols), self.progress.n,
+                                           self.cur_file,))
+            if line.startswith("head: "):
+                self.head_num = CVSNum(line[6:])
+            if line.startswith("branch: "):
+                self.head_num = CVSNum(line[8:])
+            elif line == "symbolic names:":
+                assert self.head_num
+                s = self.symbols.setdefault("HEAD", CVSState())
+                s.add(self.cur_file, self.head_num)
+                r = self.rev2syms.setdefault(self.head_num, [])
+                r.append("HEAD")
+                self.head_num = None
+                self.state = 'WithinSymbols'
+        elif self.state == 'WithinSymbols':
+            if line.startswith("\t"):
+                symbol, cvsnum = line.split(":", 1)
+                symbol = symbol.strip()
+                cvsnum = CVSNum(cvsnum)
+                s = self.symbols.setdefault(symbol, CVSState())
+                s.add(self.cur_file, cvsnum)
+                r = self.rev2syms.setdefault(cvsnum, [])
+                r.append(symbol)
+            else:
+                self.state = 'BeforeRevs'
+        elif self.state == 'BeforeRevs':
+            if line.startswith("total revisions: "):
+                assert self.cur_file
+                totalrevs_unused, selectedrevs = line.split(";")
+                self.cur_file_numrevs = int(selectedrevs.split(":")[1].strip())
+                self.state = 'BetweenRevs'
+        elif self.state == 'BetweenRevs':
+            if (line == "----------------------------" or
+                line == "======================================"
+                        "======================================="):
+                if self.cur_revnum:
+                    assert self.cur_revnum in self.cur_revs
+                    self.cur_revnum = None
+                if line == "----------------------------":
+                    self.state = 'ReadingRev'
+                else:
+                    # Finalize current CVS file
+                    assert len(self.cur_revs) == self.cur_file_numrevs
+                    self.finalize_symbol_states()
+                    self.state = 'BeforeSymbols'
+        elif self.state == 'ReadingRev':
+            if line.startswith("revision "):
+                self.cur_revnum = CVSNum(line.split()[1])
+            else:
+                date, author, state, dummy = line.split(";", 3)
+                assert date.startswith("date: ")
+                assert author.strip().startswith("author: ")
+                assert state.strip().startswith("state: ")
+                state = state.strip()[7:]
+                assert self.cur_revnum not in self.cur_revs
+                deleted = state == "dead"
+                self.cur_revs[self.cur_revnum] = deleted
+                self.state = 'BetweenRevs'
+
+    def finish (self):
+        """This method is invoked after the last line has been parsed."""
+        assert self.state == 'BeforeSymbols'
+        if self.progress:
+            self.progress.finish("Parsed %i symbols in %i files" %
+                                 (len(self.symbols), self.progress.n))
+
+
+class CVSSymbolCache(object):
+
+    """Local cache of the current CVSState of CVS symbols.
+
+    Simulates a dictionary of CVS symbol -> CVSState mappings.
+
+    """
+
+    def __init__ (self, symbols_dir):
+        """Create a new CVS symbol cache, located in the given directory."""
+        self.symbols_dir = symbols_dir
+        if not os.path.isdir(self.symbols_dir):
+            os.makedirs(self.symbols_dir)
+
+    def __len__ (self):
+        """Return the number of CVS symbols stored in this cache."""
+        return len(os.listdir(self.symbols_dir))
+
+    def __iter__ (self):
+        """Return an iterator traversing symbol names stored in this cache."""
+        for filename in os.listdir(self.symbols_dir):
+            yield filename
+
+    def __contains__ (self, symbol):
+        """Return True if the given symbol is present in this cache."""
+        return os.access(os.path.join(self.symbols_dir, symbol),
+                         os.F_OK | os.R_OK)
+
+    def __getitem__ (self, symbol):
+        """Return the cached CVSState of the given CVS symbol."""
+        try:
+            f = open(os.path.join(self.symbols_dir, symbol), 'r')
+        except IOError:
+            raise KeyError("'%s'" % (symbol))
+        ret = CVSState()
+        ret.load(f)
+        f.close()
+        return ret
+
+    def __setitem__ (self, symbol, cvs_state):
+        """Store the given CVS symbol and CVSState into the cache."""
+        cvs_state.save(os.path.join(self.symbols_dir, symbol))
+
+    def __delitem__ (self, symbol):
+        """Remove the the given CVS symbol from the cache."""
+        os.remove(os.path.join(self.symbols_dir, symbol))
+
+    def get (self, symbol, default = None):
+        """Return the cached CVSState of the given CVS symbol."""
+        try:
+            return self[symbol]
+        except KeyError:
+            return default
+
+    def items (self):
+        """Return list of (CVS symbol, CVSState) tuples saved in this cache."""
+        for filename in self:
+            yield (filename, self[filename])
+
+    def clear (self):
+        """Remove all entries from this CVS symbol cache."""
+        for filename in os.listdir(self.symbols_dir):
+            os.remove(os.path.join(self.symbols_dir, filename))
+
+    def sync_symbol (self, symbol, cvs, progress):
+        """Synchronize the given CVS symbol with the CVS server.
+
+        The given CVS workdir is used for the synchronization.
+        The retrieved CVSState is also returned
+
+        """
+        progress("Retrieving state of CVS symbol '%s'..." % (symbol))
+        cvs.update(symbol)
+        state = cvs.get_state()
+
+        progress("Saving state of '%s' to symbol cache..." % (symbol))
+        self[symbol] = state
+
+    def sync_all_symbols (self, cvs_repo, progress, symbol_filter = None):
+        """Synchronize this entire CVS symbol cache with the CVS server.
+
+        This may be very expensive if the CVS repository is large, or
+        has many symbols.  After this method returns, the symbol cache
+        will be in sync with the current state on the server.
+
+        This method returns a dict with the keys 'unchanged',
+        'changed', 'added', and 'deleted', where each map to a list of
+        CVS symbols.  Each CVS symbol appears in exactly one of these
+        lists.
+
+        If symbol_filter is given, it specifies functions that takes
+        one parameter - a CVS symbol name - and returns True if that
+        symbol should be synchronized, and False if that symbol should
+        be skipped.  Otherwise all CVS symbols are synchronized.
+
+        """
+        if symbol_filter is None:
+            symbol_filter = lambda symbol: True
+
+        # Run cvs rlog to fetch current CVSState for all CVS symbols
+        progress("Retrieving current state of all CVS symbols from CVS "
+                 "server...", lf = True)
+        parser = CVSSymbolStateLister(cvs_repo, True)
+        retcode = parser.run()
+        if retcode:
+            raise EnvironmentError(retcode, "cvs rlog exit code %i" % retcode)
+
+        # Update symbol cache with new states from the CVS server
+        progress("Updating symbol cache with current CVS state...")
+        results = {}
+        result_keys = ("unchanged", "changed", "added", "deleted")
+        for k in result_keys:
+            results[k] = []
+        # Classify existing symbols as unchanged, changed, or deleted
+        for symbol in filter(symbol_filter, self):
+            if symbol not in parser.symbols:  # Deleted
+                results["deleted"].append(symbol)
+                del self[symbol]
+            elif self[symbol] != parser.symbols[symbol]:  # Changed
+                results["changed"].append(symbol)
+                self[symbol] = parser.symbols[symbol]
+            else:  # Unchanged
+                results["unchanged"].append(symbol)
+            progress()
+        # Add symbols that are not in self
+        for symbol, state in parser.symbols.iteritems():
+            if not symbol_filter(symbol):
+                debug("Skipping CVS symbol '%s'...", symbol)
+            elif symbol in self:
+                assert state == self[symbol]
+            else:  # Added
+                results["added"].append(symbol)
+                self[symbol] = state
+            progress()
+        progress("Synchronized local symbol cache (%s)" %
+                 (", ".join(["%i %s" % (len(results[k]), k)
+                             for k in result_keys])), True)
+        return results
diff --git a/git_remote_cvs/util.py b/git_remote_cvs/util.py
new file mode 100644
index 0000000..7d6adb4
--- /dev/null
+++ b/git_remote_cvs/util.py
@@ -0,0 +1,186 @@
+#!/usr/bin/env python
+
+"""Misc. useful functionality used by the rest of this package.
+
+This module provides common functionality used by the other modules in
+this package.
+
+"""
+
+import sys
+import os
+import subprocess
+
+
+# Whether or not to show debug messages
+DEBUG = False
+
+def debug (msg, *args):
+    """Print a debug message to stderr when DEBUG is enabled."""
+    if DEBUG:
+        print >> sys.stderr, msg % args
+
+def error (msg, *args):
+    """Print an error message to stderr."""
+    print >> sys.stderr, "ERROR:", msg % args
+
+def die (msg, *args):
+    """Print as error message to stderr and exit the program."""
+    error(msg, *args)
+    sys.exit(1)
+
+
+class ProgressIndicator(object):
+
+    """Simple progress indicator.
+
+    Displayed as a spinning character by default, but can be customized
+    by passing custom messages that overrides the spinning character.
+
+    """
+
+    States = ("|", "/", "-", "\\")
+
+    def __init__ (self, prefix = "", f = sys.stdout):
+        """Create a new ProgressIndicator, bound to the given file object."""
+        self.n = 0  # Simple progress counter
+        self.f = f  # Progress is written to this file object
+        self.prev_len = 0  # Length of previous msg (to be overwritten)
+        self.prefix = prefix  # Prefix prepended to each progress message
+        self.prefix_lens = [] # Stack of prefix string lengths
+
+    def pushprefix (self, prefix):
+        """Append the given prefix onto the prefix stack."""
+        self.prefix_lens.append(len(self.prefix))
+        self.prefix += prefix
+
+    def popprefix (self):
+        """Remove the last prefix from the prefix stack."""
+        prev_len = self.prefix_lens.pop()
+        self.prefix = self.prefix[:prev_len]
+
+    def __call__ (self, msg = None, lf = False):
+        """Indicate progress, possibly with a custom message."""
+        if msg is None:
+            msg = self.States[self.n % len(self.States)]
+        msg = self.prefix + msg
+        print >> self.f, "\r%-*s" % (self.prev_len, msg),
+        self.prev_len = len(msg.expandtabs())
+        if lf:
+            print >> self.f
+            self.prev_len = 0
+        self.n += 1
+
+    def finish (self, msg = "done", noprefix = False):
+        """Finalize progress indication with the given message."""
+        if noprefix:
+            self.prefix = ""
+        self(msg, True)
+
+
+def start_command (args, cwd = None, shell = False, add_env = None,
+                   stdin = subprocess.PIPE, stdout = subprocess.PIPE,
+                   stderr = subprocess.PIPE):
+    """Start the given command, and return a subprocess object.
+
+    This provides a simpler interface to the subprocess module.
+
+    """
+    env = None
+    if add_env is not None:
+        env = os.environ.copy()
+        env.update(add_env)
+    return subprocess.Popen(args, bufsize = 1, stdin = stdin, stdout = stdout,
+                            stderr = stderr, cwd = cwd, shell = shell,
+                            env = env, universal_newlines = True)
+
+
+def run_command (args, cwd = None, shell = False, add_env = None,
+                 flag_error = True):
+    """Run the given command to completion, and return its results.
+
+    This provides a simpler interface to the subprocess module.
+
+    The results are formatted as a 3-tuple: (exit_code, output, errors)
+
+    If flag_error is enabled, Error messages will be produced if the
+    subprocess terminated with a non-zero exit code and/or stderr
+    output.
+
+    The other arguments are passed on to start_command().
+
+    """
+    process = start_command(args, cwd, shell, add_env)
+    (output, errors) = process.communicate()
+    exit_code = process.returncode
+    if flag_error and errors:
+        error("'%s' returned errors:\n---\n%s---", " ".join(args), errors)
+    if flag_error and exit_code:
+        error("'%s' returned exit code %i", " ".join(args), exit_code)
+    return (exit_code, output, errors)
+
+
+def file_reader_method (missing_ok = False):
+    """Decorator for simplifying reading of files.
+
+    If missing_ok is True, a failure to open a file for reading will
+    not raise the usual IOError, but instead the wrapped method will be
+    called with f == None.  The method must in this case properly
+    handle f == None.
+
+    """
+    def _wrap (method):
+        """Teach given method to handle both filenames and file objects.
+
+        The given method must take a file object as its second argument
+        (the first argument being 'self', of course).  This decorator
+        will take a filename given as the second argument and promote
+        it to a file object.
+
+        """
+        def _wrapped_method (self, filename, *args, **kwargs):
+            if isinstance(filename, file):
+                f = filename
+            else:
+                try:
+                    f = open(filename, 'r')
+                except IOError:
+                    if missing_ok:
+                        f = None
+                    else:
+                        raise
+            try:
+                return method(self, f, *args, **kwargs)
+            finally:
+                if not isinstance(filename, file) and f:
+                    f.close()
+        return _wrapped_method
+    return _wrap
+
+
+def file_writer_method (method):
+    """Decorator for simplifying writing of files.
+
+    Enables the given method to handle both filenames and file objects.
+
+    The given method must take a file object as its second argument
+    (the first argument being 'self', of course).  This decorator will
+    take a filename given as the second argument and promote it to a
+    file object.
+
+    """
+    def _new_method (self, filename, *args, **kwargs):
+        if isinstance(filename, file):
+            f = filename
+        else:
+            # Make sure the containing directory exists
+            parent_dir = os.path.dirname(filename)
+            if not os.path.isdir(parent_dir):
+                os.makedirs(parent_dir)
+            f = open(filename, 'w')
+        try:
+            return method(self, f, *args, **kwargs)
+        finally:
+            if not isinstance(filename, file):
+                f.close()
+    return _new_method
-- 
1.6.4.262.gca66a.dirty

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html