From: Johan Herland <johan@xxxxxxxxxxx> This patch introduces parts of a Python package called "git_remote_cvs" containing the building blocks of the CVS remote helper. The CVS remote helper itself is NOT part of this patch. This patch has been improved by the following contributions: - David Aguilar: Lots of Python coding style fixes Cc: David Aguilar <davvid@xxxxxxxxx> Signed-off-by: Johan Herland <johan@xxxxxxxxxxx> Signed-off-by: Sverre Rabbelier <srabbelier@xxxxxxxxx> --- This has my patch to util.py squashed in. git_remote_cvs/changeset.py | 126 +++++ git_remote_cvs/cvs.py | 998 ++++++++++++++++++++++++++++++++++++ git_remote_cvs/cvs_symbol_cache.py | 313 +++++++++++ git_remote_cvs/util.py | 194 +++++++ 4 files changed, 1631 insertions(+), 0 deletions(-) create mode 100644 git_remote_cvs/changeset.py create mode 100644 git_remote_cvs/cvs.py create mode 100644 git_remote_cvs/cvs_symbol_cache.py create mode 100644 git_remote_cvs/util.py diff --git a/git_remote_cvs/changeset.py b/git_remote_cvs/changeset.py new file mode 100644 index 0000000..9eea9d2 --- /dev/null +++ b/git_remote_cvs/changeset.py @@ -0,0 +1,126 @@ +#!/usr/bin/env python + +"""Code for collecting individual CVS revisions into "changesets" + +A changeset is a collection of CVSRev objects that belong together in +the same "commit". This is a somewhat artificial construct on top of +CVS, which only stores changes at the per-file level. Normally, CVS +users create several CVS revisions simultaneously by applying the +"cvs commit" command to several files with related changes. This +module tries to reconstruct this notion of related revisions. + +""" + +from git_remote_cvs.util import debug, error, die + + +class Changeset(object): + + """Encapsulate a single changeset/commit.""" + + __slots__ = ('revs', 'date', 'author', 'message') + + # The maximum time between the changeset's date, and the date of a + # rev to included in that changeset. + MaxSecondsBetweenRevs = 8 * 60 * 60 # 8 hours + + @classmethod + def from_rev (cls, rev): + """Return a Changeset based on the given CVSRev object.""" + c = cls(rev.date, rev.author, rev.message) + result = c.add(rev) + assert result + return c + + def __init__ (self, date, author, message): + """Create a new Changeset with the given metadata.""" + self.revs = {} # dict: path -> CVSRev object + self.date = date # CVSDate object + self.author = author + self.message = message # Lines of commit message + + def __str__ (self): + """Stringify this Changeset object.""" + msg = self.message[0] # First line only + # Limit message to 25 chars + if len(msg) > 25: + msg = msg[:22] + "..." + return ("<Changeset @(%s) by %s (%s) updating %i files>" % + (self.date, self.author, msg, len(self.revs))) + + def __iter__ (self): + """Return iterator traversing the CVSRevs in this Changeset.""" + return self.revs.itervalues() + + def __getitem__ (self, path): + """Look up a specific CVSRev in this Changeset.""" + return self.revs[path] + + def within_time_window (self, rev): + """Return True iff the rev is within the time window of self.""" + return abs(rev.date.diff(self.date)) <= self.MaxSecondsBetweenRevs + + def add (self, rev): + """Add the given CVSRev to this Changeset. + + The addition will only succeed if the following holds: + - rev.author == self.author + - rev.message == self.message + - rev.path is not in self.revs + - rev.date is within MaxSecondsBetweenRevs of self.date + If the addition succeeds, True is returned; otherwise False. + + """ + if rev.author != self.author or \ + rev.message != self.message or \ + rev.path in self.revs or \ + not self.within_time_window(rev): + return False + + self.revs[rev.path] = rev + return True + + +def build_changesets_from_revs (cvs_revs): + """Organize CVSRev objects into a chronological list of Changesets.""" + # Construct chronological list of CVSRev objects + chron_revs = [] + for path, d in cvs_revs.iteritems(): + i = 0 # Current index into chronRevs + for revnum, cvsrev in sorted(d.iteritems()): + assert path == cvsrev.path + assert revnum == cvsrev.num + while i < len(chron_revs) and cvsrev.date > chron_revs[i].date: + i += 1 + # Insert cvsRev at position i in chronRevs + chron_revs.insert(i, cvsrev) + i += 1 + + changesets = [] # Chronological list of Changeset objects + while len(chron_revs): + # There are still more revs to be added to Changesets + # Create Changeset based on the first rev in chronRevs + changeset = Changeset.from_rev(chron_revs.pop(0)) + # Keep adding revs chronologically until MaxSecondsBetweenRevs + rejects = [] # Revs that cannot be added to this changeset + while len(chron_revs): + rev = chron_revs.pop(0) + reject = False + # First, if we have one of rev's parents in rejects, we + # must also reject rev + for r in rejects: + if r.path == rev.path: + reject = True + break + # Next, add rev to changeset, reject if add fails + if not reject: + reject = not changeset.add(rev) + if reject: + rejects.append(rev) + # stop trying when rev is too far in the future + if not changeset.within_time_window(rev): + break + chron_revs = rejects + chron_revs # Reconstruct remaining revs + changesets.append(changeset) + + return changesets diff --git a/git_remote_cvs/cvs.py b/git_remote_cvs/cvs.py new file mode 100644 index 0000000..f870ae0 --- /dev/null +++ b/git_remote_cvs/cvs.py @@ -0,0 +1,998 @@ +#!/usr/bin/env python + +"""Functionality for interacting with CVS repositories. + +This module provides classes for interrogating a CVS repository via a +CVS working directory (aka. checkout), or via direct queries using the +"cvs rlog" command. + +Also, classes for encapsulating fundamental CVS concepts (like CVS +revision/branch numbers) are provided. +""" + +import sys +import os +import shutil +import time +from calendar import timegm +import unittest + +from git_remote_cvs.util import (debug, error, die, ProgressIndicator, + start_command, run_command, + file_reader_method, file_writer_method) + + +class CVSNum(object): + + """Encapsulate a single CVS revision/branch number. + + Provides functionality for common operations on CVS numbers. + + A CVS number consists of a list of components separated by periods + ('.'), where each component is a decimal number. Inspecting the + components from left to right, the odd-numbered (1st, 3rd, 5th, + etc.) components represent branches in the CVS history tree, while + the even-numbered (2nd, 4th, 6th, etc.) components represent + revisions on the branch specified in the previous position. + Thus "1.2" denotes the second revision on the first branch + (aka. trunk), while "1.2.4.6" denotes the sixth revision of the + fourth branch started from revision "1.2". + + Therefore, in general, a CVS number with an even number of + components denotes a revision (we call this a "revision number"), + while an odd number of components denotes a branch (called a + "branch number"). + + There are a few complicating peculiarities: If there is an even + number of components, and the second-last component is 0, the + number is not a revision number, but is rather equivalent to the + branch number we get by removing the 0-component. I.e. "1.2.0.4" + is equivalent to "1.2.4". + + A branch number (except the trunk: "1") always has a "branch point" + revision, i.e. the revision from which the branch was started. + This revision is found by removing the last component of the branch + number. For example the branch point of "1.2.4" is "1.2". + + Conversely, all revision numbers belong to a corresponding branch, + whose branch number is found by removing the last component. + Examples: The "1.2.4.6" revision belong to the "1.2.4" branch, + while the "1.2" revision belongs to the "1" branch (the "trunk"). + + From this we can programatically determine the ancestry of any + revision number, by decrementing the last revision component until + it equals 1, and then trim off the last two components to get to + the branch point, and repeat the process from there until we reach + the initial revision (typically "1.1"). For example, recursively + enumerating the parent revisions of "1.2.4.6" yields the following + revisions: + "1.2.4.5", "1.2.4.4", "1.2.4.3", "1.2.4.2", "1.2.4.1", "1.2", "1.1" + + """ + + __slots__ = ('c',) + + @staticmethod + def decompose (cvsnum): + """Split the given CVS number into a list of int components. + + Branch numbers are normalized to the odd-numbered components + form (i.e. removing the second last '0' component) + + Examples: + '1.2.4.8' -> [1, 2, 4, 8] + '1.2.3' -> [1, 2, 3] + '1.2.0.5' -> [1, 2, 5] + + """ + if cvsnum: + r = map(int, cvsnum.split('.')) + else: + r = [] + if len(r) >= 2 and r[-2] == 0: + del r[-2] + if r[-1] == 0: + raise ValueError(cvsnum) + return tuple(r) + + @staticmethod + def compose (c): + """Join the given list of integer components into a CVS number. + + E.g.: (1, 2, 4, 8) -> '1.2.4.8' + + """ + if c[-1] == 0: + raise ValueError(str(c)) + return ".".join(map(str, c)) + + @classmethod + def from_components (cls, args): + """Create a CVSNum from the given list of numerical components.""" + return cls(cls.compose(args)) + + @classmethod + def disjoint (cls, a, b): + """Return True iff the CVS numbers are historically disjoint. + + Two CVS numbers are disjoint if they do not share the same + historical line back to the initial revision. In other words: + the two numbers are disjoint if the history (i.e. set of parent + revisions all the way back to the intial (1.1) revision) of + neither number is a superset of the other's history. + See test_disjoint() for practical examples: + + """ + if a.is_branch(): + a = cls.from_components(a.c + (1,)) + if b.is_branch(): + b = cls.from_components(b.c + (1,)) + if len(a.c) > len(b.c): + a, b = b, a # a is now shortest + pairs = zip(a.c, b.c) + for pa, pb in pairs[:-1]: + if pa != pb: + return True + if len(a) == len(b): + return False + common_len = len(a) + if a.c[common_len - 1] <= b.c[common_len - 1]: + return False + return True + + + def __init__ (self, cvsnum): + """Create a CVSNum object from the given CVS number string.""" + self.c = self.decompose(str(cvsnum)) + + def __repr__ (self): + """Return a string representation of this object.""" + return self.compose(self.c) + + def __str__ (self): + """Return a string representation of this object.""" + return repr(self) + + def __hash__ (self): + """Create a hash value for this CVS number.""" + return hash(repr(self)) + + def __len__ (self): + """Return number of components in this CVS number.""" + return len(self.c) + + def __cmp__ (self, other): + """Comparison method for CVS numbers.""" + try: + return cmp(self.c, other.c) + except AttributeError: + return 1 + + def __getitem__ (self, key): + """Return the Xth component of this CVS number.""" + return self.c[key] + + def is_rev (self): + """Return True iff this number is a CVS revision number.""" + return len(self.c) % 2 == 0 and len(self.c) >= 2 and self.c[-2] != 0 + + def is_branch (self): + """Return True iff this number is a CVS branch number.""" + return len(self.c) % 2 != 0 or (len(self.c) >= 2 and self.c[-2] == 0) + + def components (self): + """Return a list of integer components in this CVS number.""" + return list(self.c) + + def branch (self): + """Return the branch on which the given number lives. + + Revisions: chop the last component to find the branch, e.g.: + 1.2.4.6 -> 1.2.4 + 1.1 -> 1 + Branches: unchanged + + """ + if self.is_rev(): + return self.from_components(self.c[:-1]) + return self + + def parent (self): + """Return the parent/previous revision number to this number. + + For revisions, this is the previous revision, e.g.: + 1.2.4.6 -> 1.2.4.5 + 1.2.4.1 -> 1.2 + 1.1 -> None + 2.1 -> None + For branches, this is the branch point, e.g.: + 1.2.4 -> 1.2 + 1 -> None + 2 -> None + + """ + if len(self.c) < 2: + return None + elif len(self.c) % 2: # Branch number + return self.from_components(self.c[:-1]) + else: # Revision number + assert self.c[-1] > 0 + result = self.components() + result[-1] -= 1 # Decrement final component + if result[-1] == 0: # We're at the start of the branch + del result[-2:] # Make into branch point + if not result: + return None + return self.from_components(result) + + def follows (self, other): + """Return True iff self historically follows the given rev. + + This iterates through the parents of self, and returns True iff + any of them equals the given rev. Otherwise, it returns False. + + """ + assert other.is_rev() + cur = self + while cur: + if cur == other: + return True + cur = cur.parent() + return False + + def on_branch (self, branch): + """Return True iff this rev is on the given branch. + + The revs considered to be "on" a branch X also includes the + branch point of branch X. + + """ + return branch == self.branch() or branch.parent() == self + + +class TestCVSNum(unittest.TestCase): + + """CVSNum selftests.""" + + def test_basic (self): + """CVSNum basic selftests.""" + self.assertEqual(CVSNum("1.2.4"), CVSNum("1.2.0.4")) + self.assert_(CVSNum("1.2.4").is_branch()) + self.assert_(CVSNum("1.2").is_rev()) + self.assert_(CVSNum("1").is_branch()) + self.assert_(CVSNum("1.2.4.6").is_rev()) + self.assertEqual(CVSNum("1.2.4.6").components(), [1, 2, 4, 6]) + self.assertEqual(CVSNum.from_components([1, 2, 4, 6]), + CVSNum("1.2.4.6")) + self.assertEqual(str(CVSNum.from_components([1, 2, 4, 6])), "1.2.4.6") + self.assertEqual(len(CVSNum("1.2.4.6")), 4) + self.assertEqual(CVSNum("1.2.4.6").branch(), CVSNum("1.2.4")) + self.assertEqual(CVSNum("1.2.4").branch(), CVSNum("1.2.4")) + self.assertEqual(CVSNum("1.1").branch(), CVSNum("1")) + self.assertEqual(CVSNum("1").branch(), CVSNum("1")) + self.assertEqual(CVSNum("1.2.4.6").parent(), CVSNum("1.2.4.5")) + self.assertEqual(CVSNum("1.2.4.1").parent(), CVSNum("1.2")) + self.assertEqual(CVSNum("1.2").parent(), CVSNum("1.1")) + self.assert_(CVSNum("1.1").parent() is None) + self.assert_(CVSNum("2.1").parent() is None) + self.assertEqual(CVSNum("1.2.4").parent(), CVSNum("1.2")) + self.assert_(CVSNum("1").parent() is None) + self.assert_(CVSNum("2").parent() is None) + + def test_follows (self): + """CVSNum.follows() selftests.""" + self.assert_(CVSNum("1.2.4.6").follows(CVSNum("1.1"))) + self.assert_(CVSNum("1.2.4.6").follows(CVSNum("1.2"))) + self.assert_(CVSNum("1.2.4.6").follows(CVSNum("1.2.4.1"))) + self.assert_(CVSNum("1.2.4.6").follows(CVSNum("1.2.4.2"))) + self.assert_(CVSNum("1.2.4.6").follows(CVSNum("1.2.4.3"))) + self.assert_(CVSNum("1.2.4.6").follows(CVSNum("1.2.4.4"))) + self.assert_(CVSNum("1.2.4.6").follows(CVSNum("1.2.4.5"))) + self.assert_(CVSNum("1.2.4.6").follows(CVSNum("1.2.4.6"))) + self.assertFalse(CVSNum("1.2.4.6").follows(CVSNum("1.2.4.7"))) + self.assertFalse(CVSNum("1.2.4.6").follows(CVSNum("1.3"))) + self.assertFalse(CVSNum("1.1").follows(CVSNum("1.2.4.6"))) + + def test_disjoint (self): + """CVSNum.disjoint() selftests.""" + tests = [ + ("1.2", "1.1", False), + ("1.2", "1.2", False), + ("1.2", "1.3", False), + ("1.2", "1.1.2", True), + ("1.2", "1.1.2.3", True), + ("1.2.4", "1.1", False), + ("1.2.4", "1.2", False), + ("1.2.4", "1.3", True), + ("1.2.4", "1.2.2", True), + ("1.2.4", "1.2.4", False), + ("1.2.4", "1.2.6", True), + ("1.2.4", "1.2.2.4", True), + ("1.2.4", "1.2.4.4", False), + ("1.2.4", "1.2.6.4", True), + ("1.2.4.6", "1.1", False), + ("1.2.4.6", "1.2", False), + ("1.2.4.6", "1.3", True), + ("1.2.4.6", "1.2.2", True), + ("1.2.4.6", "1.2.2.1", True), + ("1.2.4.6", "1.2.4", False), + ("1.2.4.6", "1.2.4.5", False), + ("1.2.4.6", "1.2.4.6", False), + ("1.2.4.6", "1.2.4.7", False), + ("1.2.4.6.8.10", "1.2.4.5", False), + ("1.2.4.6.8.10", "1.2.4.6", False), + ("1.2.4.6.8.10", "1.2.4.7", True), + ] + for a, b, result in tests: + self.assertEqual(CVSNum.disjoint(CVSNum(a), CVSNum(b)), result) + self.assertEqual(CVSNum.disjoint(CVSNum(b), CVSNum(a)), result) + + +class CVSState(object): + + """Encapsulate a historical state in CVS (a set of paths and nums). + + This class is a container of CVS pathnames and associated CVSNum + objects. + + No communication with a CVS working directory or repository is done + in this class, hence only basic sanity checks are performed: + - A path may only appear once in a CVSState. + - When adding a path:num pair, path may not already exist in self + - When replacing a path:num pair, path must already exist in self + - When removing a path:num pair, both path and num must be given + + IMPORTANT: Objects of this class are hash()able (to support being + used as keys in a dict), but they are also mutable. It is + therefore up to the caller to make sure that the object is not + changed after being stored in a data structure indexed by its hash + value. + + """ + + __slots__ = ('revs', '_hash') + + def __init__ (self): + """Create a new, empty CVSState.""" + self.revs = {} # dict: path -> CVSNum object + self._hash = None + + def __iter__ (self): + """Return iterator traversing the (path, CVSNum)s in this CVSState.""" + return self.revs.iteritems() + + def __cmp__ (self, other): + """Comparison method for CVSState objects.""" + return cmp(self.revs, other.revs) + + def __str__ (self): + """Stringify this CVSState by listing the contained revisions.""" + return "".join(["%s:%s\n" % (p, n) for p, n in sorted(self)]) + + def __hash__ (self): + """Create a hash value for this CVSState.""" + if self._hash is None: + self._hash = hash(str(self)) + return self._hash + + def __getitem__ (self, path): + """Return the CVSNum associated with the given path in self.""" + return self.revs[path] + + def get (self, path, default = None): + """Return the CVSNum associated with the given path in self.""" + return self.revs.get(path, default) + + def paths (self): + """Return the path names contained within this CVSState.""" + return self.revs.iterkeys() + + def add (self, path, revnum): + """Add the given path:revnum to this CVSState.""" + assert path not in self.revs + self._hash = None + self.revs[path] = revnum + + def replace (self, path, revnum): + """Replace the revnum associated with the given path.""" + assert path in self.revs + self._hash = None + self.revs[path] = revnum + + def remove (self, path, revnum): + """Remove the given path:revnum association from this CVSState.""" + assert path in self.revs and self.revs[path] == revnum + self._hash = None + del self.revs[path] + + def copy (self): + """Create and return a copy of this object.""" + ret = CVSState() + ret.revs = self.revs.copy() + ret._hash = self._hash + return ret + + def load_data (self, note_data): + """Load note data as formatted by self.__str__().""" + for line in note_data.split("\n"): + line = line.strip() + if not line: + continue + path, num = line.rsplit(':', 1) + self.add(path, CVSNum(num)) + self._hash = hash(note_data) + + def print_members (self, f = sys.stdout, prefix = ""): + """Write the members of this CVSState to the given file object.""" + for path, num in sorted(self): + print >> f, "%s%s:%s" % (prefix, path, num) + + @file_reader_method(missing_ok = True) + def load (self, f): + """Load CVS state from the given file name/object.""" + if f: + self.load_data(f.read()) + + @file_writer_method + def save (self, f): + """Save CVS state to the given file name/object.""" + assert f + print >> f, str(self), + + +class CVSDate(object): + + """Encapsulate a timestamp, as reported by CVS. + + The internal representation of a timestamp is two integers, the + first representing the timestamp as #seconds since epoch (UTC), + and the second representing the timezone as #minutes offset from + UTC. + + Example: "2007-09-05 17:26:28 -0200" is converted to + (1189013188, -120) + + """ + + __slots__ = ('ts', 'tz') + + def __init__ (self, date_str = None, in_utc = False): + """Convert CVS date string into a CVSDate object. + + A CVS timestamp string has one of the following forms: + - "YYYY-MM-DD hh:mm:ss SZZZZ" + - "YYYY/MM/DD hh:mm:ss" (with timezone assumed to be UTC) + The in_utc parameter determines whether the timestamp part of + the given string (the "YYYY-MM-DD hh:mm:ss" part) is given in + local time or UTC (normally CVS dates are given in local time. + If given in local time, the timezone offset is subtracted from + the timestamp in order to make the time in UTC format. + + """ + if date_str is None: + self.ts, self.tz = 0, 0 + return + if date_str == "now": + self.ts, self.tz = time.time(), 0 + return + date_str = date_str.strip() + # Set up self.ts and self.tz + if date_str.count(" ") == 2: + # Assume format "YYYY-MM-DD hh:mm:ss SZZZZ" + t, z = date_str.rsplit(" ", 1) + # Convert timestamp to #secs since epoch (UTC) + self.ts = timegm(time.strptime(t, "%Y-%m-%d %H:%M:%S")) + # Convert timezone into #mins offset from UTC + self.tz = int(z[1:3]) * 60 + int(z[3:5]) + # Incorporate timezone sign + if z[0] == '-': + self.tz *= -1 + else: + assert date_str.count(" ") == 1 + # Assume format "YYYY/MM/DD hh:mm:ss" + self.ts = timegm(time.strptime(date_str, "%Y/%m/%d %H:%M:%S")) + self.tz = 0 + # Adjust timestamp if not already in UTC + if not in_utc: + self.ts -= self.tz * 60 + + def tz_str (self): + """Return timezone part of self in string format.""" + sign = '+' + if self.tz < 0: + sign = '-' + hours, minutes = divmod(abs(self.tz), 60) + return "%s%02d%02d" % (sign, hours, minutes) + + def __str__ (self): + """Reconstruct date string from members.""" + s = time.strftime("%Y-%m-%d %H:%M:%S", time.gmtime(self.ts)) + return "%s %s" % (s, self.tz_str()) + + def __repr__ (self): + """Create a string representation of self.""" + return "CVSDate('%s')" % (str(self)) + + def __hash__ (self): + """Create a hash value from self.""" + return hash((self.ts, self.tz)) + + def __nonzero__ (self): + """Provide interpretation of self in a boolean context.""" + return bool(self.ts or self.tz) + + def __cmp__ (self, other): + """Comparison method for CVSDate objects.""" + return cmp(self.ts, other.ts) or cmp(self.tz, other.tz) + + def __eq__ (self, other): + """Return True iff self and other is considered equal.""" + return self.ts == other.ts and self.tz == other.tz + + def diff (self, other): + """Return difference between self and other in #seconds. + + Invariant: self == other.add(self.diff(other)) + + """ + return self.ts - other.ts + + +class TestCVSDate(unittest.TestCase): + + """CVSDate selftests.""" + + def test_basic (self): + """CVSDate basic selftests.""" + a = CVSDate("2009-05-10 14:34:56 +0200") + b = CVSDate("2009/05/10 12:34:56") + self.assert_(a) + self.assert_(b) + self.assertEqual(str(a), "2009-05-10 12:34:56 +0200", str(a)) + self.assertEqual(str(b), "2009-05-10 12:34:56 +0000", str(b)) + self.assertNotEqual(a, b) + self.assertEqual(a.diff(b), 0) + c = CVSDate("2009-05-10 16:34:56 +0200") + self.assert_(c) + self.assertEqual(str(c), "2009-05-10 14:34:56 +0200", str(c)) + self.assertNotEqual(c, a) + self.assertEqual(c.diff(a), 2 * 60 * 60) + self.assertEqual(a.diff(c), -2 * 60 * 60) + + +class CVSRev(object): + + """Encapsulate metadata on a CVS revision.""" + + __slots__ = ('path', 'num', 'date', 'author', 'deleted', 'message') + + def __init__ (self, path, num): + """Create a CVSRev object for the given path:num revision.""" + self.path = path + self.num = num + self.date = None # CVSDate object + self.author = "" + self.deleted = None # True or False + self.message = [] # Lines of commit message + + def __str__ (self): + """Return a string listing the metadata in this CVS revision.""" + return ("<%s:%s on %s by %s%s>" % + (self.path, self.num, self.date, self.author, + self.deleted and ", deleted" or "")) + + def __cmp__ (self, other): + """Comparison method for CVSRev objects.""" + return cmp(self.path, other.path) or cmp(self.num, other.num) + + +class CVSWorkDir(object): + + """Encapsulate a CVS working directory. + + This class auto-creates a CVS workdir/checkout in the directory + given to the constructor, and provides various methods for + interacting with this workdir. + + """ + + def __init__ (self, workdir, cvs_repo): + """Create a new CVSWorkDir. + + The cvs_repo argument must be a (cvs_root, cvs_module) tuple + + """ + self.d = workdir + self.cvs_root, self.cvs_module = cvs_repo + parent_dir = os.path.dirname(self.d) + if not os.path.isdir(parent_dir): + os.makedirs(parent_dir) + self._valid = None + + def makepath(self, *args): + """Create path relative to working directory.""" + return os.path.join(self.d, *args) + + def valid (self): + """Return True iff this workdir is present and valid.""" + if self._valid is not None: + return self._valid + try: + f = open(self.makepath("CVS", "Root"), 'r') + assert f.read().strip() == self.cvs_root + f.close() + f = open(self.makepath("CVS", "Repository"), 'r') + assert f.read().strip() == self.cvs_module + f.close() + self._valid = True + except (IOError, AssertionError): + self._valid = False + return self._valid + + def remove (self): + """Remove this checkout.""" + shutil.rmtree(self.d, True) + assert not os.path.exists(self.d) + self._valid = False + + def checkout (self, revision = "HEAD"): + """Create a checkout of the given revision.""" + self.remove() + parent_dir, co_dir = os.path.split(self.d) + args = ["cvs", "-f", "-Q", "-d", self.cvs_root, "checkout"] + if str(revision) != "HEAD": + args.extend(["-r", str(revision)]) + args.extend(["-d", co_dir, self.cvs_module]) + exit_code, output, errors = run_command(args, cwd = parent_dir) + if exit_code: + die("Failed to checkout CVS working directory") + assert not errors + assert not output, "output = '%s'" % (output) + self._valid = None + assert self.valid() + + def update (self, revision = "HEAD", paths = None): + """Update the given paths to the given revision.""" + if not self.valid(): + self.checkout() + args = ["cvs", "-f", "-Q", "update", "-kk"] + if str(revision) == "HEAD": + args.append("-A") + else: + args.extend(["-r", str(revision)]) + if paths is not None: + args.extend(paths) + exit_code, output, errors = run_command(args, cwd = self.d) + if exit_code: + die("Failed to checkout CVS working directory") + assert not errors + assert not output, "output = '%s'" % (output) + + def get_revision_data (self, path, revision): + """Return the contents of the given CVS path:revision.""" + if not self.valid(): + self.checkout() + args = ["cvs", "-f", "-Q", "update", "-p", "-kk"] + if str(revision) == "HEAD": + args.append("-A") + else: + args.extend(["-r", str(revision)]) + args.append(path) + exit_code, output, errors = run_command(args, cwd = self.d) + if exit_code: + die("Failed to checkout CVS working directory") + assert not errors + return output + + def get_modeinfo (self, paths = None): + """Return mode information for the given paths. + + Returns a dict of path -> mode number mappings. If paths are + not specified, mode information for all files in the current + checkout will be returned. No checkout/update will be done. + + """ + result = {} + if paths is not None: + for path in paths: + fullpath = os.path.join(self.d, path) + mode = 644 + if os.access(fullpath, os.X_OK): + mode = 755 + assert path not in result + result[path] = mode + else: # Return mode information for all paths + for dirpath, dirnames, filenames in os.walk(self.d): + # Don't descend into CVS subdirs + try: + dirnames.remove('CVS') + except ValueError: + pass + assert dirpath.startswith(self.d) + directory = dirpath[len(self.d):].lstrip("/") + for fname in filenames: + path = os.path.join(directory, fname) + fullpath = os.path.join(dirpath, fname) + mode = 644 + if os.access(fullpath, os.X_OK): + mode = 755 + assert path not in result + result[path] = mode + return result + + @classmethod + def parse_entries (cls, entries, prefix, directory = ""): + """Recursively parse CVS/Entries files. + + Return a dict of CVS paths found by parsing the CVS/Entries + files rooted at the given directory. + + See http://ximbiot.com/cvs/manual/feature/cvs_2.html#SEC19 for + information on the format of the CVS/Entries file. + + """ + fname = os.path.join(prefix, directory, "CVS", "Entries") + subdirs = [] + f = open(fname, 'r') + for line in f: + line = line.strip() + if line == "D": + continue # There are no subdirectories + t, path, revnum, date, options, tag = line.split("/") + if t == "D": + subdirs.append(path) + continue + assert line.startswith("/") + path = os.path.join(directory, path) + revnum = CVSNum(revnum) + assert path not in entries + entries[path] = (revnum, date, options, tag) + f.close() + for d in subdirs: + d = os.path.join(directory, d) + cls.parse_entries(entries, prefix, d) + + def get_state (self): + """Return CVSState reflecting current state of this checkout. + + Note that the resulting CVSState will never contain any + deleted/dead files. Other CVSStates to be compared to the one + returned from here should remove deleted/dead entries first. + + """ + assert self.valid() + entries = {} + result = CVSState() + self.parse_entries(entries, self.d) + for path, info in entries.iteritems(): + result.add(path, info[0]) + return result + + +class CVSLogParser(object): + + """Encapsulate the execution of a "cvs rlog" command.""" + + def __init__ (self, cvs_repo): + """Create a new CVSLogParser. + + The cvs_repo argument must be a (cvs_root, cvs_module) tuple + + """ + self.cvs_root, self.cvs_module = cvs_repo + + def cleanup_path (self, cvs_path): + """Utility method for parsing CVS paths from CVS log.""" + cvsprefix = "/".join((self.cvs_root[self.cvs_root.index("/"):], + self.cvs_module)) + assert cvs_path.startswith(cvsprefix) + assert cvs_path.endswith(",v") + # Drop cvsprefix and ,v-extension + cvs_path = cvs_path[len(cvsprefix):-2] + # Split the remaining path into components + path_comps = filter(None, cvs_path.strip().split('/')) + # Remove 'Attic' from CVS paths + if len(path_comps) >= 2 and path_comps[-2] == "Attic": + del path_comps[-2] + # Reconstruct resulting "cleaned" path + return "/".join(path_comps) + + def __call__ (self, line): + """Parse the given line from the CVS log. + + Must be reimplemented by subclass + + """ + pass + + def finish (self): + """This method is invoked after the last line has been parsed. + + May be reimplemented by subclass + + """ + pass + + def run (self, paths = None, no_symbols = False, revisions = None): + """Execute "cvs rlog" with the given arguments. + + self.__call__() is invoked once for each line in the CVS log. + self.finish() is invoked exactly once after the CVS log. + + """ + args = ["cvs", "-f", "-q", "-d", self.cvs_root, "rlog"] + if no_symbols: + args.append("-N") + if revisions: + args.append("-r%s" % (revisions)) + if paths is not None: + for p in paths: + args.append("%s/%s" % (self.cvs_module, p)) + else: + args.append(self.cvs_module) + proc = start_command(args) + proc.stdin.close() + while True: + for line in proc.stdout: + self(line.rstrip()) # Call self's line parser + if proc.poll() is not None: + break + assert proc.stdout.read() == "" + self.finish() # Notify subclass that parsing is finished + exit_code = proc.returncode + if exit_code: + error("'%s' returned exit code %i, and errors:\n---\n%s---", + " ".join(args), exit_code, proc.stderr.read()) + return exit_code + + +class CVSRevLister(CVSLogParser): + + """Extract CVSRev objects (with revision metadata) from a CVS log.""" + + def __init__ (self, cvs_repo, show_progress = False): + """Create a new CVSRevLister. + + The cvs_repo argument must be a (cvs_root, cvs_module) tuple + show_progress determines whether progress indication is shown. + + """ + super(CVSRevLister, self).__init__(cvs_repo) + self.cur_file = None # Current CVS file being processed + self.cur_file_numrevs = 0 # #revs in current CVS file + self.cur_rev = None # Current CVSRev under construction + self.progress = None + if show_progress: + self.progress = ProgressIndicator("\t", sys.stderr) + # Store found revs in a two-level dict structure: + # filename -> revnum -> CVSRev + self.revs = {} + # Possible states: + # - BeforeRevs - waiting for "total revisions:" + # - BetweenRevs - waiting for "----------------------------" + # - ReadingRev - reading CVS revision details + self.state = 'BeforeRevs' + + def __call__ (self, line): + """Line parser; this method is invoked for each line in the log.""" + if self.state == 'BeforeRevs': + if line.startswith("RCS file: "): + self.cur_file = self.cleanup_path(line[10:]) + assert self.cur_file not in self.revs + self.revs[self.cur_file] = {} + elif line.startswith("total revisions: "): + assert self.cur_file + totalrevs_unused, selectedrevs = line.split(";") + self.cur_file_numrevs = int(selectedrevs.split(":")[1].strip()) + self.state = 'BetweenRevs' + elif self.state == 'BetweenRevs': + if (line == "----------------------------" or + line == "======================================" + "======================================="): + if self.cur_rev: + # Finished current revision + f = self.revs[self.cur_file] + assert self.cur_rev.num not in f + f[self.cur_rev.num] = self.cur_rev + self.cur_rev = None + if self.progress: + self.progress() + if line == "----------------------------": + self.state = 'ReadingRev' + else: + # Finalize current CVS file + assert len(self.revs[self.cur_file]) == \ + self.cur_file_numrevs + self.cur_file = None + self.state = 'BeforeRevs' + elif self.cur_rev: + # Currently in the middle of a revision. + if line.startswith("branches: %s" % (self.cur_rev.num)) and \ + line.endswith(";"): + return # Skip 'branches:' lines + # This line is part of the commit message. + self.cur_rev.message.append(line) + elif self.state == 'ReadingRev': + if line.startswith("revision "): + self.cur_rev = CVSRev(self.cur_file, CVSNum(line.split()[1])) + else: + date, author, state, dummy = line.split(";", 3) + assert date.startswith("date: ") + self.cur_rev.date = CVSDate(date[6:]) + assert author.strip().startswith("author: ") + self.cur_rev.author = author.strip()[8:] + assert state.strip().startswith("state: ") + state = state.strip()[7:] + self.cur_rev.deleted = state == "dead" + self.state = 'BetweenRevs' + + def finish (self): + """This method is invoked after the last line has been parsed.""" + assert self.state == 'BeforeRevs' + if self.progress: + self.progress.finish("Parsed %i revs in %i files" % + (self.progress.n, len(self.revs))) + + +def fetch_revs (path, from_rev, to_rev, symbol, cvs_repo): + """Fetch CVSRevs for each rev in <path:from_rev, path:symbol]. + + Return a dict of CVSRev objects (revnum -> CVSRev), where each + CVSRev encapsulates a CVS revision in the range from + path:from_rev to path:symbol (inclusive). If symbol currently + refers to from_rev (i.e. nothing has happened since the last + import), the returned dict will have exactly one entry (from_rev). + If there is no valid revision range between from_rev and symbol, + the returned dict will be empty. Situations in which an empty dict + is returned, include: + - symbol is no longer defined on this path + - symbol refers to a revision that is disjoint from from_rev + + from_rev may be None, meaning that all revisions from the initial + version of path up to the revision currently referenced by symbol + should be fetched. + + If the revision currently referenced by symbol is disjoint from + from_rev, the returned dict will be empty. + + Note that there is lots of unexpected behaviour in the handling of + the 'cvs rlog -r' parameter: Say you have a branch, called + 'my_branch', that points to branch number 1.1.2 of a file. Say + there are 3 revisions on this branch: 1.1.2.1 -> 1.1.2.3 (in + additions to the branch point 1.1). Now, observe the following + 'cvs rlog' executions: + - cvs rlog -r0:my_branch ... returns 1.1, 1.1.2.1, 1.1.2.2, 1.1.2.3 + - cvs rlog -r1.1:my_branch ... returns the same revs + - cvs rlog -rmy_branch ... returns 1.1.2.1, 1.1.2.2, 1.1.2.3 + - cvs rlog -rmy_branch: ... returns the same revs + - cvs rlog -r:my_branch ... returns the same revs + - cvs rlog -r::my_branch ... returns the same revs + - cvs rlog -r1.1.2.1: ... returns the same revs + Here is where it gets really weird: + - cvs rlog -r1.1.2.1:my_branch ... returns 1.1.2.1 only + - cvs rlog -r1.1.2.2:my_branch ... returns 1.1.2.1, 1.1.2.2 + - cvs rlog -r1.1.2.3:my_branch ... returns 1.1.2.1, 1.1.2.2, 1.1.2.3 + + In other words the 'cvs rlog -rfrom_rev:symbol' scheme that we + normally use will not work in the case where from_rev is _on_ the + branch pointed at by the symbol. + + Therefore, we need an extra parameter, to_rev, which we can use to: + 1. Detect when this situation is present. + 2. Work around by using 'cvs rlog -rfrom_ref:to_rev' instead. + + """ + if from_rev is None: # Initial import + from_rev = "0" # "cvs rlog -r0:X" fetches from initial revision + elif to_rev and to_rev.branch() == from_rev.branch(): + symbol = to_rev # Use to_rev instead of given symbol + # Run 'cvs rlog' on range [from_rev, symbol] and parse CVSRev objects + parser = CVSRevLister(cvs_repo) + parser.run((path,), True, "%s:%s" % (from_rev, symbol)) + assert len(parser.revs) == 1 + assert path in parser.revs + return parser.revs[path] + + +if __name__ == '__main__': + unittest.main() diff --git a/git_remote_cvs/cvs_symbol_cache.py b/git_remote_cvs/cvs_symbol_cache.py new file mode 100644 index 0000000..cc8d88b --- /dev/null +++ b/git_remote_cvs/cvs_symbol_cache.py @@ -0,0 +1,313 @@ +#!/usr/bin/env python + +"""Implementation of a local CVS symbol cache. + +A CVS symbol cache stores a list of CVS symbols and the CVS state +associated with each of those CVS symbols at some point in time. + +Keeping a local cache of CVS symbols is often needed because the +design of CVS makes it potentially very expensive to query the CVS +server directly for CVS symbols and associated states. + +In these cases, a local CVS symbol cache can provide equivalent +(although possibly out-of-date) information immediatele. + +Synchronization with the current state on the CVS server can be +done on a symbol-by-symbol basis (by checking out a given symbol +and extracting the CVS state from the CVS work tree), or by +synchronizing _all_ CVS symbols in one operation (by executing +'cvs rlog' and parsing CVS states from its output). + +""" + +import sys +import os + +from git_remote_cvs.util import debug, error, die, ProgressIndicator +from git_remote_cvs.cvs import CVSNum, CVSState, CVSLogParser + + +class CVSSymbolStateLister(CVSLogParser): + + """Extract current CVSStates for all CVS symbols from a CVS log.""" + + def __init__ (self, cvs_repo, show_progress = False): + """Create a new CVSSymbolStateLister. + + The cvs_repo argument must be a (cvs_root, cvs_module) tuple + show_progress determines whether a progress indicator should + be displayed. + + """ + super(CVSSymbolStateLister, self).__init__(cvs_repo) + self.symbols = {} # CVS symbol name -> CVSState object + self.cur_file = None # current CVS file being processed + self.cur_file_numrevs = 0 # #revs in current CVS file + self.cur_revnum = None # current revision number + self.rev2syms = {} # CVSNum -> [CVS symbol names] + self.cur_revs = {} # CVSNum -> True/False (deleted) + self.head_num = None # CVSNum of the HEAD rev or branch + + # Possible states: + # - BeforeSymbols - waiting for "symbolic names:" + # - WithinSymbols - reading CVS symbol names + # - BeforeRevs - waiting for "total revisions:" + # - BetweenRevs - waiting for "----------------------------" + # - ReadingRev - reading CVS revision details + self.state = 'BeforeSymbols' + + self.progress = None + if show_progress: + self.progress = ProgressIndicator("\t", sys.stderr) + + def finalize_symbol_states (self): + """Adjust CVSStates in self.symbols based on revision data. + + Based on the information found in self.rev2syms and + self.cur_revs, remove deleted revisions and turn branch numbers + into corresponding revisions in the CVSStates found in + self.symbols. + + """ + # Create a mapping from branch numbers to the last existing + # revision number on those branches + branch2lastrev = {} # branch number -> revision number + for revnum in self.cur_revs.iterkeys(): + branchnum = revnum.branch() + if (branchnum not in branch2lastrev) or \ + (revnum > branch2lastrev[branchnum]): + branch2lastrev[branchnum] = revnum + + for cvsnum, symbols in self.rev2syms.iteritems(): + if cvsnum.is_branch(): + # Turn into corresponding revision number + revnum = branch2lastrev.get(cvsnum, cvsnum.parent()) + for s in symbols: + state = self.symbols[s] + assert state[self.cur_file] == cvsnum + state.replace(self.cur_file, revnum) + cvsnum = revnum + assert cvsnum.is_rev() + assert cvsnum in self.cur_revs + if self.cur_revs[cvsnum]: # cvsnum is a deleted rev + # Remove from CVSStates + for s in symbols: + state = self.symbols[s] + state.remove(self.cur_file, cvsnum) + + self.rev2syms = {} + self.cur_revs = {} + self.cur_file = None + + def __call__ (self, line): + """Line parser; this method is invoked for each line in the log.""" + if self.state == 'BeforeSymbols': + if line.startswith("RCS file: "): + self.cur_file = self.cleanup_path(line[10:]) + if self.progress: + self.progress("%5i symbols found - Parsing CVS file #%i: " + "%s " % (len(self.symbols), self.progress.n, + self.cur_file,)) + if line.startswith("head: "): + self.head_num = CVSNum(line[6:]) + if line.startswith("branch: "): + self.head_num = CVSNum(line[8:]) + elif line == "symbolic names:": + assert self.head_num + s = self.symbols.setdefault("HEAD", CVSState()) + s.add(self.cur_file, self.head_num) + r = self.rev2syms.setdefault(self.head_num, []) + r.append("HEAD") + self.head_num = None + self.state = 'WithinSymbols' + elif self.state == 'WithinSymbols': + if line.startswith("\t"): + symbol, cvsnum = line.split(":", 1) + symbol = symbol.strip() + cvsnum = CVSNum(cvsnum) + s = self.symbols.setdefault(symbol, CVSState()) + s.add(self.cur_file, cvsnum) + r = self.rev2syms.setdefault(cvsnum, []) + r.append(symbol) + else: + self.state = 'BeforeRevs' + elif self.state == 'BeforeRevs': + if line.startswith("total revisions: "): + assert self.cur_file + totalrevs_unused, selectedrevs = line.split(";") + self.cur_file_numrevs = int(selectedrevs.split(":")[1].strip()) + self.state = 'BetweenRevs' + elif self.state == 'BetweenRevs': + if (line == "----------------------------" or + line == "======================================" + "======================================="): + if self.cur_revnum: + assert self.cur_revnum in self.cur_revs + self.cur_revnum = None + if line == "----------------------------": + self.state = 'ReadingRev' + else: + # Finalize current CVS file + assert len(self.cur_revs) == self.cur_file_numrevs + self.finalize_symbol_states() + self.state = 'BeforeSymbols' + elif self.state == 'ReadingRev': + if line.startswith("revision "): + self.cur_revnum = CVSNum(line.split()[1]) + else: + date, author, state, dummy = line.split(";", 3) + assert date.startswith("date: ") + assert author.strip().startswith("author: ") + assert state.strip().startswith("state: ") + state = state.strip()[7:] + assert self.cur_revnum not in self.cur_revs + deleted = state == "dead" + self.cur_revs[self.cur_revnum] = deleted + self.state = 'BetweenRevs' + + def finish (self): + """This method is invoked after the last line has been parsed.""" + assert self.state == 'BeforeSymbols' + if self.progress: + self.progress.finish("Parsed %i symbols in %i files" % + (len(self.symbols), self.progress.n)) + + +class CVSSymbolCache(object): + + """Local cache of the current CVSState of CVS symbols. + + Simulates a dictionary of CVS symbol -> CVSState mappings. + + """ + + def __init__ (self, symbols_dir): + """Create a new CVS symbol cache, located in the given directory.""" + self.symbols_dir = symbols_dir + if not os.path.isdir(self.symbols_dir): + os.makedirs(self.symbols_dir) + + def __len__ (self): + """Return the number of CVS symbols stored in this cache.""" + return len(os.listdir(self.symbols_dir)) + + def __iter__ (self): + """Return an iterator traversing symbol names stored in this cache.""" + for filename in os.listdir(self.symbols_dir): + yield filename + + def __contains__ (self, symbol): + """Return True if the given symbol is present in this cache.""" + return os.access(os.path.join(self.symbols_dir, symbol), + os.F_OK | os.R_OK) + + def __getitem__ (self, symbol): + """Return the cached CVSState of the given CVS symbol.""" + try: + f = open(os.path.join(self.symbols_dir, symbol), 'r') + except IOError: + raise KeyError("'%s'" % (symbol)) + ret = CVSState() + ret.load(f) + f.close() + return ret + + def __setitem__ (self, symbol, cvs_state): + """Store the given CVS symbol and CVSState into the cache.""" + cvs_state.save(os.path.join(self.symbols_dir, symbol)) + + def __delitem__ (self, symbol): + """Remove the the given CVS symbol from the cache.""" + os.remove(os.path.join(self.symbols_dir, symbol)) + + def get (self, symbol, default = None): + """Return the cached CVSState of the given CVS symbol.""" + try: + return self[symbol] + except KeyError: + return default + + def items (self): + """Return list of (CVS symbol, CVSState) tuples saved in this cache.""" + for filename in self: + yield (filename, self[filename]) + + def clear (self): + """Remove all entries from this CVS symbol cache.""" + for filename in os.listdir(self.symbols_dir): + os.remove(os.path.join(self.symbols_dir, filename)) + + def sync_symbol (self, symbol, cvs, progress): + """Synchronize the given CVS symbol with the CVS server. + + The given CVS workdir is used for the synchronization. + The retrieved CVSState is also returned + + """ + progress("Retrieving state of CVS symbol '%s'..." % (symbol)) + cvs.update(symbol) + state = cvs.get_state() + + progress("Saving state of '%s' to symbol cache..." % (symbol)) + self[symbol] = state + + def sync_all_symbols (self, cvs_repo, progress, symbol_filter = None): + """Synchronize this entire CVS symbol cache with the CVS server. + + This may be very expensive if the CVS repository is large, or + has many symbols. After this method returns, the symbol cache + will be in sync with the current state on the server. + + This method returns a dict with the keys 'unchanged', + 'changed', 'added', and 'deleted', where each map to a list of + CVS symbols. Each CVS symbol appears in exactly one of these + lists. + + If symbol_filter is given, it specifies functions that takes + one parameter - a CVS symbol name - and returns True if that + symbol should be synchronized, and False if that symbol should + be skipped. Otherwise all CVS symbols are synchronized. + + """ + if symbol_filter is None: + symbol_filter = lambda symbol: True + + # Run cvs rlog to fetch current CVSState for all CVS symbols + progress("Retrieving current state of all CVS symbols from CVS " + "server...", lf = True) + parser = CVSSymbolStateLister(cvs_repo, True) + retcode = parser.run() + if retcode: + raise EnvironmentError(retcode, "cvs rlog exit code %i" % retcode) + + # Update symbol cache with new states from the CVS server + progress("Updating symbol cache with current CVS state...") + results = {} + result_keys = ("unchanged", "changed", "added", "deleted") + for k in result_keys: + results[k] = [] + # Classify existing symbols as unchanged, changed, or deleted + for symbol in filter(symbol_filter, self): + if symbol not in parser.symbols: # Deleted + results["deleted"].append(symbol) + del self[symbol] + elif self[symbol] != parser.symbols[symbol]: # Changed + results["changed"].append(symbol) + self[symbol] = parser.symbols[symbol] + else: # Unchanged + results["unchanged"].append(symbol) + progress() + # Add symbols that are not in self + for symbol, state in parser.symbols.iteritems(): + if not symbol_filter(symbol): + debug("Skipping CVS symbol '%s'...", symbol) + elif symbol in self: + assert state == self[symbol] + else: # Added + results["added"].append(symbol) + self[symbol] = state + progress() + progress("Synchronized local symbol cache (%s)" % + (", ".join(["%i %s" % (len(results[k]), k) + for k in result_keys])), True) + return results diff --git a/git_remote_cvs/util.py b/git_remote_cvs/util.py new file mode 100644 index 0000000..d3ca487 --- /dev/null +++ b/git_remote_cvs/util.py @@ -0,0 +1,194 @@ +#!/usr/bin/env python + +"""Misc. useful functionality used by the rest of this package. + +This module provides common functionality used by the other modules in +this package. + +""" + +import sys +import os +import subprocess + + +# Whether or not to show debug messages +DEBUG = False + +def notify(msg, *args): + """Print a message to stderr.""" + print >> sys.stderr, msg % args + +def debug (msg, *args): + """Print a debug message to stderr when DEBUG is enabled.""" + if DEBUG: + print >> sys.stderr, msg % args + +def error (msg, *args): + """Print an error message to stderr.""" + print >> sys.stderr, "ERROR:", msg % args + +def warn(msg, *args): + """Print a warning message to stderr.""" + print >> sys.stderr, "warning:", msg % args + +def die (msg, *args): + """Print as error message to stderr and exit the program.""" + error(msg, *args) + sys.exit(1) + + +class ProgressIndicator(object): + + """Simple progress indicator. + + Displayed as a spinning character by default, but can be customized + by passing custom messages that overrides the spinning character. + + """ + + States = ("|", "/", "-", "\\") + + def __init__ (self, prefix = "", f = sys.stdout): + """Create a new ProgressIndicator, bound to the given file object.""" + self.n = 0 # Simple progress counter + self.f = f # Progress is written to this file object + self.prev_len = 0 # Length of previous msg (to be overwritten) + self.prefix = prefix # Prefix prepended to each progress message + self.prefix_lens = [] # Stack of prefix string lengths + + def pushprefix (self, prefix): + """Append the given prefix onto the prefix stack.""" + self.prefix_lens.append(len(self.prefix)) + self.prefix += prefix + + def popprefix (self): + """Remove the last prefix from the prefix stack.""" + prev_len = self.prefix_lens.pop() + self.prefix = self.prefix[:prev_len] + + def __call__ (self, msg = None, lf = False): + """Indicate progress, possibly with a custom message.""" + if msg is None: + msg = self.States[self.n % len(self.States)] + msg = self.prefix + msg + print >> self.f, "\r%-*s" % (self.prev_len, msg), + self.prev_len = len(msg.expandtabs()) + if lf: + print >> self.f + self.prev_len = 0 + self.n += 1 + + def finish (self, msg = "done", noprefix = False): + """Finalize progress indication with the given message.""" + if noprefix: + self.prefix = "" + self(msg, True) + + +def start_command (args, cwd = None, shell = False, add_env = None, + stdin = subprocess.PIPE, stdout = subprocess.PIPE, + stderr = subprocess.PIPE): + """Start the given command, and return a subprocess object. + + This provides a simpler interface to the subprocess module. + + """ + env = None + if add_env is not None: + env = os.environ.copy() + env.update(add_env) + return subprocess.Popen(args, bufsize = 1, stdin = stdin, stdout = stdout, + stderr = stderr, cwd = cwd, shell = shell, + env = env, universal_newlines = True) + + +def run_command (args, cwd = None, shell = False, add_env = None, + flag_error = True): + """Run the given command to completion, and return its results. + + This provides a simpler interface to the subprocess module. + + The results are formatted as a 3-tuple: (exit_code, output, errors) + + If flag_error is enabled, Error messages will be produced if the + subprocess terminated with a non-zero exit code and/or stderr + output. + + The other arguments are passed on to start_command(). + + """ + process = start_command(args, cwd, shell, add_env) + (output, errors) = process.communicate() + exit_code = process.returncode + if flag_error and errors: + error("'%s' returned errors:\n---\n%s---", " ".join(args), errors) + if flag_error and exit_code: + error("'%s' returned exit code %i", " ".join(args), exit_code) + return (exit_code, output, errors) + + +def file_reader_method (missing_ok = False): + """Decorator for simplifying reading of files. + + If missing_ok is True, a failure to open a file for reading will + not raise the usual IOError, but instead the wrapped method will be + called with f == None. The method must in this case properly + handle f == None. + + """ + def _wrap (method): + """Teach given method to handle both filenames and file objects. + + The given method must take a file object as its second argument + (the first argument being 'self', of course). This decorator + will take a filename given as the second argument and promote + it to a file object. + + """ + def _wrapped_method (self, filename, *args, **kwargs): + if isinstance(filename, file): + f = filename + else: + try: + f = open(filename, 'r') + except IOError: + if missing_ok: + f = None + else: + raise + try: + return method(self, f, *args, **kwargs) + finally: + if not isinstance(filename, file) and f: + f.close() + return _wrapped_method + return _wrap + + +def file_writer_method (method): + """Decorator for simplifying writing of files. + + Enables the given method to handle both filenames and file objects. + + The given method must take a file object as its second argument + (the first argument being 'self', of course). This decorator will + take a filename given as the second argument and promote it to a + file object. + + """ + def _new_method (self, filename, *args, **kwargs): + if isinstance(filename, file): + f = filename + else: + # Make sure the containing directory exists + parent_dir = os.path.dirname(filename) + if not os.path.isdir(parent_dir): + os.makedirs(parent_dir) + f = open(filename, 'w') + try: + return method(self, f, *args, **kwargs) + finally: + if not isinstance(filename, file): + f.close() + return _new_method -- 1.6.5.2.291.gf76a3 -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html