[PATCH 3/3] git-p4: no longer keep all file contents while cloning

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



- Factor out the pipe creation part of p4CmdList() as p4CmdListPipe()

- Factor readP4Files() into openP4Files() and readP4File() and
  changed P4Sync to use this.

The upshot is that git-p4 now read only one file at a time and
immediately pass it on to fast-import. This massively reduces the
memory requirement of git-p4.

Note, git-p4 still reads in the whole file in memory -- this can and
should be fixed in future.

Signed-off-by: Tommy Thorn <tommy-git@xxxxxxxx>
---
 contrib/fast-import/git-p4 |  102 ++++++++++++++++++++++++++++++-------------
 1 files changed, 71 insertions(+), 31 deletions(-)

diff --git a/contrib/fast-import/git-p4 b/contrib/fast-import/git-p4
index 2340876..78a5d02 100755
--- a/contrib/fast-import/git-p4
+++ b/contrib/fast-import/git-p4
@@ -144,7 +144,8 @@ def isModeExec(mode):
 def isModeExecChanged(src_mode, dst_mode):
     return isModeExec(src_mode) != isModeExec(dst_mode)
 
-def p4CmdList(cmd, stdin=None, stdin_mode='w+b'):
+# p4CmdListPipe returns a pipe delivering the result of the p4 command
+def p4CmdListPipe(cmd, stdin=None, stdin_mode='w+b'):
     cmd = "p4 -G %s" % cmd
     if verbose:
         sys.stderr.write("Opening pipe: %s\n" % cmd)
@@ -162,7 +163,11 @@ def p4CmdList(cmd, stdin=None, stdin_mode='w+b'):
     p4 = subprocess.Popen(cmd, shell=True,
                           stdin=stdin_file,
                           stdout=subprocess.PIPE)
+    return p4
 
+# p4CmdList returns the stdout result of the p4 command
+def p4CmdList(cmd, stdin=None, stdin_mode='w+b'):
+    p4 = p4CmdListPipe(cmd, stdin, stdin_mode)
     result = []
     try:
         while True:
@@ -950,42 +955,62 @@ class P4Sync(Command):
         return branches
 
     ## Should move this out, doesn't use SELF.
-    def readP4Files(self, files):
+    def openP4Files(self, files):
         files = [f for f in files
                  if f['action'] != 'delete']
 
         if not files:
             return
 
-        filedata = p4CmdList('-x - print',
-                             stdin='\n'.join(['%s#%s' % (f['path'], f['rev'])
-                                              for f in files]),
-                             stdin_mode='w+')
-        if "p4ExitCode" in filedata[0]:
-            die("Problems executing p4. Error: [%d]."
-                % (filedata[0]['p4ExitCode']));
-
-        j = 0;
-        contents = {}
-        while j < len(filedata):
-            stat = filedata[j]
-            j += 1
-            text = ''
-            while j < len(filedata) and filedata[j]['code'] in ('text',
-                                                                'binary'):
-                text += filedata[j]['data']
-                j += 1
+        p4 = p4CmdListPipe('-x - print',
+                           stdin='\n'.join(['%s#%s' % (f['path'], f['rev'])
+                                            for f in files]),
+                           stdin_mode='w+')
 
+        self.curDepotFile = None
+        return p4
 
-            if not stat.has_key('depotFile'):
-                sys.stderr.write("p4 print fails with: %s\n" % repr(stat))
-                continue
+    # Uisng the pipe handle provided by openP4File, read in a file and
+    # return the pair of (depot file name, contents)
+    def readP4File(self, p4):
+        text = ''
 
-            contents[stat['depotFile']] = text
+        try:
+            while True:
+                entry = marshal.load(p4.stdout)
+
+                if entry['code'] in ('text', 'binary'):
+                    text += entry['data']
+                elif entry['code'] == 'stat':
+                    # We are done with the previous file
+                    if self.curDepotFile is not None:
+                        if not entry.has_key('depotFile'):
+                            sys.stderr.write("p4 print fails with: %s\n" % repr(entry))
+                            self.curDepotFile = None
+                            continue
+
+                        depotFile = self.curDepotFile
+                        self.curDepotFile = entry['depotFile']
+                        if verbose: sys.stderr.write('Read %s\n' % depotFile)
+                        return (depotFile, text)
+
+                    text = ''
+                    self.curDepotFile = entry['depotFile']
+                else:
+                    sys.stderr.write("p4 print returned unexpected code: %s\n" % entry['code'])
+
+        except EOFError:
+            pass
+
+        exitCode = p4.wait()
+        if exitCode != 0:
+            die("Problems executing p4. Error: [%d]." % exitCode);
+
+        depotFile = self.curDepotFile
+        self.curDepotFile = None
+        if verbose: sys.stderr.write('Read %s\n' % depotFile)
+        return (depotFile, text)
 
-        for f in files:
-            assert not f.has_key('data')
-            f['data'] = contents[f['path']]
 
     def commit(self, details, files, branch, branchPrefixes, parent = ""):
         epoch = details["time"]
@@ -996,6 +1021,10 @@ class P4Sync(Command):
 
         # start with reading files; if that fails, we should not
         # create a commit.
+
+        # XXX No, reading all file contents into memory is to tall a
+        # price to pay. Right now if an error is found, it will abort,
+        # leaving cruft for git gc to prune.
         new_files = []
         for f in files:
             if [p for p in branchPrefixes if f['path'].startswith(p)]:
@@ -1003,7 +1032,6 @@ class P4Sync(Command):
             else:
                 sys.stderr.write("Ignoring file outside of prefix: %s\n" % path)
         files = new_files
-        self.readP4Files(files)
 
 
 
@@ -1034,17 +1062,26 @@ class P4Sync(Command):
                 print "parent %s" % parent
             self.gitStream.write("from %s\n" % parent)
 
-        for file in files:
+        # Create a depotFileName -> file mapping
+        revFile = {}
+        for f in files:
+            revFile[f['path']] = f
+
+        p4 = self.openP4Files(files)
+        (depotFile, data) = self.readP4File(p4)
+
+        while depotFile is not None:
+
+            file = revFile[depotFile]
             if file["type"] == "apple":
                 print "\nfile %s is a strange apple file that forks. Ignoring!" % file['path']
+                (depotFile, data) = self.readP4File(p4)
                 continue
 
             relPath = self.stripRepoPath(file['path'], branchPrefixes)
             if file["action"] == "delete":
                 self.gitStream.write("D %s\n" % relPath)
             else:
-                data = file['data']
-
                 mode = "644"
                 if isP4Exec(file["type"]):
                     mode = "755"
@@ -1061,6 +1098,9 @@ class P4Sync(Command):
                 self.gitStream.write(data)
                 self.gitStream.write("\n")
 
+            (depotFile, data) = self.readP4File(p4)
+
+
         self.gitStream.write("\n")
 
         change = int(details["change"])
-- 
1.5.4.rc5.17.g22b645

-
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]

  Powered by Linux