Re: [RFC PATCH] git-p4: add option to store files in Git LFS on import

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Comments below.

>
> diff --git a/Documentation/git-p4.txt b/Documentation/git-p4.txt
> index 82aa5d6..a188840 100644
> --- a/Documentation/git-p4.txt
> +++ b/Documentation/git-p4.txt
> @@ -252,6 +252,18 @@ Git repository:
>         Use a client spec to find the list of interesting files in p4.
>         See the "CLIENT SPEC" section below.
>

<snip>

>
> +def mkdir_p(path):
> +    # Copied from http://stackoverflow.com/questions/600268/mkdir-p-functionality-in-python
> +    try:
> +        os.makedirs(path)
> +    except OSError as exc: # Python >2.5
> +        if exc.errno == errno.EEXIST and os.path.isdir(path):
> +            pass
> +        else:
> +            raise

Sigh. We need to upgrade to Python 3....

Coujld also just do:

   if not path.isdir(path) os.makedirs(path)

(Although there is a tiny race hazard if something else creates the
same path between isdir and makedir, but the way you're using it,
seems unlikely.

> +
>  def die(msg):
>      if verbose:
>          raise Exception(msg)
> @@ -1994,6 +2005,11 @@ class P4Sync(Command, P4UserMap):
>                  optparse.make_option("-/", dest="cloneExclude",
>                                       action="append", type="string",
>                                       help="exclude depot path"),
> +                optparse.make_option("--use-lfs-if-size-exceeds", dest="lfsMinimumFileSize", type="int",
> +                                     help="Use LFS to store files bigger than the given threshold in bytes."),
> +                optparse.make_option("--use-lfs-for-extension", dest="lfsFileExtensions",
> +                                     action="append", type="string",
> +                                     help="Use LFS to store files with the given file extension(s)."),
>          ]
>          self.description = """Imports from Perforce into a git repository.\n
>      example:
> @@ -2025,6 +2041,9 @@ class P4Sync(Command, P4UserMap):
>          self.clientSpecDirs = None
>          self.tempBranches = []
>          self.tempBranchLocation = "git-p4-tmp"
> +        self.lfsFiles = []
> +        self.lfsMinimumFileSize = None
> +        self.lfsFileExtensions = []
>
>          if gitConfig("git-p4.syncFromOrigin") == "false":
>              self.syncWithOrigin = False
> @@ -2145,6 +2164,63 @@ class P4Sync(Command, P4UserMap):
>
>          return branches
>
> +    def writeToGitStream(self, gitMode, relPath, contents):
> +        self.gitStream.write('M %s inline %s\n' % (gitMode, relPath))
> +        self.gitStream.write('data %d\n' % sum(len(d) for d in contents))
> +        for d in contents:
> +            self.gitStream.write(d)
> +        self.gitStream.write('\n')
> +
> +    def writeGitAttributesToStream(self):
> +        gitAttributes = [f + ' filter=lfs -text\n' for f in self.lfsFiles if not self.hasFileLFSExtension(f)]
> +        self.writeToGitStream(
> +            '100644',
> +            '.gitattributes',
> +            ['*.' + f + ' filter=lfs -text\n' for f in self.lfsFileExtensions] +
> +            [f + ' filter=lfs -text\n' for f in self.lfsFiles if not self.hasFileLFSExtension(f)]
> +        )
> +
> +    def hasFileLFSExtension(self, relPath):
> +        return reduce(
> +            lambda a, b: a or b,
> +            [relPath.endswith('.' + e) for e in self.lfsFileExtensions],
> +            False
> +        )
> +
> +    def isFileLargerThanLFSTreshold(self, relPath, contents):
> +        return self.lfsMinimumFileSize and sum(len(d) for d in contents) >= self.lfsMinimumFileSize

Could have a command-line option "--try-compress-first" (or some such)
which compresses the file, and it it's very compressible, leaves it
alone. It would trade speed of cloning with not-using-LFS-needlessly.

> +
> +    def generateLFSPointerFile(self, relPath, contents):
> +        # Write P4 content to temp file
> +        p4ContentTempFile = tempfile.NamedTemporaryFile(prefix='git-lfs', delete=False)
> +        for d in contents:
> +            p4ContentTempFile.write(d)
> +        p4ContentTempFile.flush()
> +
> +        # Generate LFS pointer file based on P4 content
> +        lfsProcess = subprocess.Popen(
> +            ['git', 'lfs', 'pointer', '--file=' + p4ContentTempFile.name],
> +            stdout=subprocess.PIPE
> +        )
> +        lfsPointerFile = lfsProcess.stdout.read()
> +        if lfsProcess.wait():
> +            die('git-lfs command failed. Did you install the extension?')

We're going to leave the P4 content file lying around undeleted here;
is there any way to avoid that and cleanup nicely?

> +        contents = [i+'\n' for i in lfsPointerFile.split('\n')[2:][:-1]]
> +
> +        # Write P4 content to LFS
> +        oid = contents[1].split(' ')[1].split(':')[1][:-1]
> +        oidPath = os.path.join(self.cloneDestination, '.git', 'lfs', 'objects', oid[:2], oid[2:4])
> +        mkdir_p(oidPath)
> +        shutil.move(p4ContentTempFile.name, os.path.join(oidPath, oid))
> +
> +        # Update Git attributes
> +        self.lfsFiles.append(relPath)
> +        self.writeGitAttributesToStream()
> +
> +        # LFS Spec states that pointer files should not have the executable bit set.
> +        gitMode = '100644'
> +        return (gitMode, contents)
> +
>      # output one file from the P4 stream
>      # - helper for streamP4Files
>
> @@ -2213,17 +2289,13 @@ class P4Sync(Command, P4UserMap):
>              text = regexp.sub(r'$\1$', text)
>              contents = [ text ]
>
> -        self.gitStream.write("M %s inline %s\n" % (git_mode, relPath))
> +        if relPath == '.gitattributes':
> +            die('.gitattributes already exists in P4.')
>
> -        # total length...
> -        length = 0
> -        for d in contents:
> -            length = length + len(d)
> +        if self.isFileLargerThanLFSTreshold(relPath, contents) or self.hasFileLFSExtension(relPath):

s/Treshold/Threshold/g


> +            (git_mode, contents) = self.generateLFSPointerFile(relPath, contents)
>
> -        self.gitStream.write("data %d\n" % length)
> -        for d in contents:
> -            self.gitStream.write(d)
> -        self.gitStream.write("\n")
> +        self.writeToGitStream(git_mode, relPath, contents)
>
>      def streamOneP4Deletion(self, file):
>          relPath = self.stripRepoPath(file['path'], self.branchPrefixes)
> @@ -2231,6 +2303,10 @@ class P4Sync(Command, P4UserMap):
>              sys.stderr.write("delete %s\n" % relPath)
>          self.gitStream.write("D %s\n" % relPath)
>
> +        if relPath in self.lfsFiles:
> +            self.lfsFiles.remove(relPath)
> +            self.writeGitAttributesToStream()
> +
>      # handle another chunk of streaming data
>      def streamP4FilesCb(self, marshalled):
>
> diff --git a/t/t9822-git-p4-lfs.sh b/t/t9822-git-p4-lfs.sh
> new file mode 100755
> index 0000000..b27bf29
> --- /dev/null
> +++ b/t/t9822-git-p4-lfs.sh
> @@ -0,0 +1,277 @@
> +#!/bin/sh
> +
> +test_description='Clone repositories and store files in LFS'
> +
> +( git lfs help ) >/dev/null 2>&1 || {

Does this need to be in a subshell?

> +       skip_all='skipping git p4 LFS tests; no git lfs'
> +       test_done
> +}
> +
> +. ./lib-git-p4.sh
> +
> +test_expect_success 'start p4d' '
> +       start_p4d
> +'
> +
> +test_expect_success 'Create repo with binary files' '
> +       client_view "//depot/... //client/..." &&
> +       (
> +               cd "$cli" &&
> +               echo "text" >file.txt &&
> +               echo "bin 13 bytes" >file.dat &&
> +               p4 add file.txt &&
> +               p4 add file.dat &&
> +               p4 submit -d "Add text and binary file" &&
> +               echo "bin 13 bytes" >file2.bin &&
> +               p4 add file2.bin &&
> +               p4 submit -d "Add another binary file with same content"
> +               echo "bin 14 bytess" >file3.bin &&
> +               p4 add file3.bin &&
> +               p4 submit -d "Add another binary file with different content"
> +       )
> +'
> +
> +test_expect_success 'Store files in LFS based on size (10 bytes)' '
> +       client_view "//depot/... //client/..." &&
> +       git p4 clone --use-client-spec --use-lfs-if-size-exceeds=10 --destination="$git" //depot@all &&
> +       test_when_finished cleanup_git &&
> +       (
> +               cd "$git" &&
> +
> +               cat >expect <<-\EOF &&
> +               .git/lfs/objects/d4/43/d443795c1aa3ff7e62afd89d6a86bb84ceba0305f6c22151aa8ee95077a39101
> +               .git/lfs/objects/e5/fe/e5fec48503cd7b85eb9ffaea3311cde2fe9542078b9640369032b26bb5403fff

This feels like it could be very fragile. Every time a new file gets
added to the tests we'll end up having to mess around with SHA1
digests. Surely all we care about is that LFS can recreate the files
we gave it originally?

Plus it makes it quite hard to understand what's going on!

> +               EOF
> +               find ".git/lfs/objects" -type f >actual &&
> +               test_cmp expect actual &&
> +
> +               cat >expect <<-\EOF &&
> +               version https://git-lfs.github.com/spec/v1
> +               oid sha256:d443795c1aa3ff7e62afd89d6a86bb84ceba0305f6c22151aa8ee95077a39101
> +               size 13
> +               EOF
> +               cat file.dat >actual &&
> +               test_cmp expect actual &&
> +
> +               cat >expect <<-\EOF &&
> +               version https://git-lfs.github.com/spec/v1
> +               oid sha256:d443795c1aa3ff7e62afd89d6a86bb84ceba0305f6c22151aa8ee95077a39101
> +               size 13
> +               EOF
> +               cat file2.bin >actual &&
> +               test_cmp expect actual &&
> +
> +               cat >expect <<-\EOF &&
> +               version https://git-lfs.github.com/spec/v1
> +               oid sha256:e5fec48503cd7b85eb9ffaea3311cde2fe9542078b9640369032b26bb5403fff
> +               size 14
> +               EOF
> +               cat file3.bin >actual &&
> +               test_cmp expect actual &&
> +
> +               cat >expect <<-\EOF &&
> +               file.dat filter=lfs -text
> +               file2.bin filter=lfs -text
> +               file3.bin filter=lfs -text
> +               EOF
> +               cat .gitattributes >actual &&
> +               test_cmp expect actual
> +       )
> +'
> +
> +test_expect_success 'Store files in LFS based on size (14 bytes)' '
> +       client_view "//depot/... //client/..." &&
> +       git p4 clone --use-client-spec --use-lfs-if-size-exceeds=14 --destination="$git" //depot@all &&
> +       test_when_finished cleanup_git &&
> +       (
> +               cd "$git" &&
> +
> +               cat >expect <<-\EOF &&
> +               .git/lfs/objects/e5/fe/e5fec48503cd7b85eb9ffaea3311cde2fe9542078b9640369032b26bb5403fff
> +               EOF
> +               find ".git/lfs/objects" -type f >actual &&
> +               test_cmp expect actual &&
> +
> +               cat >expect <<-\EOF &&
> +               bin 13 bytes
> +               EOF
> +               cat file.dat >actual &&
> +               test_cmp expect actual &&
> +
> +               cat >expect <<-\EOF &&
> +               bin 13 bytes
> +               EOF
> +               cat file2.bin >actual &&
> +               test_cmp expect actual &&
> +
> +               cat >expect <<-\EOF &&
> +               version https://git-lfs.github.com/spec/v1
> +               oid sha256:e5fec48503cd7b85eb9ffaea3311cde2fe9542078b9640369032b26bb5403fff
> +               size 14
> +               EOF
> +               cat file3.bin >actual &&
> +               test_cmp expect actual &&
> +
> +               cat >expect <<-\EOF &&
> +               file3.bin filter=lfs -text
> +               EOF
> +               cat .gitattributes >actual &&
> +               test_cmp expect actual
> +       )
> +'
> +
> +test_expect_success 'Store files in LFS based on extension (dat)' '
> +       client_view "//depot/... //client/..." &&
> +       git p4 clone --use-client-spec --use-lfs-for-extension=dat --destination="$git" //depot@all &&
> +       test_when_finished cleanup_git &&
> +       (
> +               cd "$git" &&
> +
> +               cat >expect <<-\EOF &&
> +               .git/lfs/objects/d4/43/d443795c1aa3ff7e62afd89d6a86bb84ceba0305f6c22151aa8ee95077a39101
> +               EOF
> +               find ".git/lfs/objects" -type f >actual &&
> +               test_cmp expect actual &&
> +
> +               cat >expect <<-\EOF &&
> +               version https://git-lfs.github.com/spec/v1
> +               oid sha256:d443795c1aa3ff7e62afd89d6a86bb84ceba0305f6c22151aa8ee95077a39101
> +               size 13
> +               EOF
> +               cat file.dat >actual &&
> +               test_cmp expect actual &&
> +
> +               cat >expect <<-\EOF &&
> +               bin 13 bytes
> +               EOF
> +               cat file2.bin >actual &&
> +               test_cmp expect actual &&
> +
> +               cat >expect <<-\EOF &&
> +               bin 14 bytess
> +               EOF
> +               cat file3.bin >actual &&
> +               test_cmp expect actual &&
> +
> +               cat >expect <<-\EOF &&
> +               *.dat filter=lfs -text
> +               EOF
> +               cat .gitattributes >actual &&
> +               test_cmp expect actual
> +       )
> +'
> +
> +test_expect_success 'Store files in LFS based on size (14 bytes) and extension (dat)' '
> +       client_view "//depot/... //client/..." &&
> +       git p4 clone \
> +               --use-client-spec \
> +               --use-lfs-if-size-exceeds=14 \
> +               --use-lfs-for-extension=dat \
> +               --destination="$git" //depot@all &&
> +       test_when_finished cleanup_git &&
> +       (
> +               cd "$git" &&
> +
> +               cat >expect <<-\EOF &&
> +               .git/lfs/objects/d4/43/d443795c1aa3ff7e62afd89d6a86bb84ceba0305f6c22151aa8ee95077a39101
> +               .git/lfs/objects/e5/fe/e5fec48503cd7b85eb9ffaea3311cde2fe9542078b9640369032b26bb5403fff
> +               EOF
> +               find ".git/lfs/objects" -type f >actual &&
> +               test_cmp expect actual &&
> +
> +               cat >expect <<-\EOF &&
> +               version https://git-lfs.github.com/spec/v1
> +               oid sha256:d443795c1aa3ff7e62afd89d6a86bb84ceba0305f6c22151aa8ee95077a39101
> +               size 13
> +               EOF
> +               cat file.dat >actual &&
> +               test_cmp expect actual &&
> +
> +               cat >expect <<-\EOF &&
> +               bin 13 bytes
> +               EOF
> +               cat file2.bin >actual &&
> +               test_cmp expect actual &&
> +
> +               cat >expect <<-\EOF &&
> +               version https://git-lfs.github.com/spec/v1
> +               oid sha256:e5fec48503cd7b85eb9ffaea3311cde2fe9542078b9640369032b26bb5403fff
> +               size 14
> +               EOF
> +               cat file3.bin >actual &&
> +               test_cmp expect actual &&
> +
> +               cat >expect <<-\EOF &&
> +               *.dat filter=lfs -text
> +               file3.bin filter=lfs -text
> +               EOF
> +               cat .gitattributes >actual &&
> +               test_cmp expect actual
> +       )
> +'
> +
> +test_expect_success 'Remove file from repo and store files in LFS based on size (10 bytes)' '
> +       client_view "//depot/... //client/..." &&
> +       (
> +               cd "$cli" &&
> +               p4 delete file3.bin &&
> +               p4 submit -d "Remove file"
> +       ) &&
> +
> +       git p4 clone --use-client-spec --use-lfs-if-size-exceeds=10 --destination="$git" //depot@all &&
> +       test_when_finished cleanup_git &&
> +       (
> +               cd "$git" &&
> +
> +               # Note that file3 remains here as it referenced in the history
> +               cat >expect <<-\EOF &&
> +               .git/lfs/objects/d4/43/d443795c1aa3ff7e62afd89d6a86bb84ceba0305f6c22151aa8ee95077a39101
> +               .git/lfs/objects/e5/fe/e5fec48503cd7b85eb9ffaea3311cde2fe9542078b9640369032b26bb5403fff
> +               EOF
> +               find ".git/lfs/objects" -type f >actual &&
> +               test_cmp expect actual &&
> +
> +               cat >expect <<-\EOF &&
> +               version https://git-lfs.github.com/spec/v1
> +               oid sha256:d443795c1aa3ff7e62afd89d6a86bb84ceba0305f6c22151aa8ee95077a39101
> +               size 13
> +               EOF
> +               cat file.dat >actual &&
> +               test_cmp expect actual &&
> +
> +               cat >expect <<-\EOF &&
> +               version https://git-lfs.github.com/spec/v1
> +               oid sha256:d443795c1aa3ff7e62afd89d6a86bb84ceba0305f6c22151aa8ee95077a39101
> +               size 13
> +               EOF
> +               cat file2.bin >actual &&
> +               test_cmp expect actual &&
> +
> +               cat >expect <<-\EOF &&
> +               file.dat filter=lfs -text
> +               file2.bin filter=lfs -text
> +               EOF
> +               cat .gitattributes >actual &&
> +               test_cmp expect actual
> +       )
> +'
> +
> +test_expect_success 'Clone repo with existing .gitattributes file' '
> +       client_view "//depot/... //client/..." &&
> +       (
> +               cd "$cli" &&
> +
> +               echo "*.txt text" >.gitattributes &&
> +               p4 add .gitattributes &&
> +               p4 submit -d "Add .gitattributes"
> +       ) &&
> +
> +       test_must_fail git p4 clone --use-client-spec --destination="$git" //depot 2>errs &&
> +       grep ".gitattributes already exists in P4." errs
> +'
> +
> +test_expect_success 'kill p4d' '
> +       kill_p4d
> +'
> +
> +test_done
> --
> 1.9.5 (Apple Git-50.3)
>
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html



[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]