Nicolas Pitre wrote: > I wouldn't mind a _separate_ tool that would load a pack index, > determine object sizes from it, and then extract big objects to write > them as loose objects ... Add two new options to git-unpack-objects: --force:: Loose objects will be created even if they already exist in the repository packed. --min-blob-size=<n>:: Unpacking is only done for objects larger than or equal to n kB (uncompressed size). Passes the tests in "t" and tested on big objects. Based on "next" but should apply to "master" as well. Signed-off-by: Dana L. How <danahow@xxxxxxxxx> --- Documentation/git-unpack-objects.txt | 23 +++++++++++++++++++---- builtin-unpack-objects.c | 29 +++++++++++++++++++++++++++-- cache.h | 2 ++ sha1_file.c | 16 ++++++++++++---- 4 files changed, 60 insertions(+), 10 deletions(-) diff --git a/Documentation/git-unpack-objects.txt b/Documentation/git-unpack-objects.txt index ff6184b..3df2641 100644 --- a/Documentation/git-unpack-objects.txt +++ b/Documentation/git-unpack-objects.txt @@ -8,7 +8,7 @@ git-unpack-objects - Unpack objects from a packed archive SYNOPSIS -------- -'git-unpack-objects' [-n] [-q] [-r] <pack-file +'git-unpack-objects' [-n] [-q] [-r] [-f] [--min-blob-size=N] <pack-file DESCRIPTION @@ -17,9 +17,12 @@ Read a packed archive (.pack) from the standard input, expanding the objects contained within and writing them into the repository in "loose" (one object per file) format. -Objects that already exist in the repository will *not* be unpacked -from the pack-file. Therefore, nothing will be unpacked if you use -this command on a pack-file that exists within the target repository. +By default, objects that already exist in the repository will *not* +be unpacked from the pack-file. Therefore, nothing will be unpacked +if you use this command on a pack-file that exists within the target +repository, unless you specify -f. If an object already exists +unpacked in the repository, it will not be replaced with the copy +from the pack, with or without -f. Please see the `git-repack` documentation for options to generate new packs and replace existing ones. @@ -40,6 +43,18 @@ OPTIONS and make the best effort to recover as many objects as possible. +-f:: + Allow loose objects to be created in the same repository that + contains the packfile. + +--min-blob-size=<n>:: + Smallest loose object to create, expressed in kB. + Blobs smaller than this will not be unpacked. Default is 0. + If you specify this option with a deltified source packfile, + the source packfile should reside in the current repository + so delta bases too small to unpack are still accessible, and + therefore -f will be needed for anything to be written. + Author ------ diff --git a/builtin-unpack-objects.c b/builtin-unpack-objects.c index a6ff62f..b8ee7b5 100644 --- a/builtin-unpack-objects.c +++ b/builtin-unpack-objects.c @@ -10,13 +10,16 @@ #include "progress.h" static int dry_run, quiet, recover, has_errors; -static const char unpack_usage[] = "git-unpack-objects [-n] [-q] [-r] < pack-file"; +static const char unpack_usage[] = +"git-unpack-objects [-n] [-q] [-r] [-f] [--min-blob-size=N] < pack-file"; /* We always read in 4kB chunks. */ static unsigned char buffer[4096]; static unsigned int offset, len; static off_t consumed_bytes; static SHA_CTX ctx; +static int force = 0; +uint32_t min_blob_size; /* * Make sure at least "min" bytes are available in the buffer, and @@ -131,7 +134,18 @@ static void added_object(unsigned nr, enum object_type type, static void write_object(unsigned nr, enum object_type type, void *buf, unsigned long size) { - if (write_sha1_file(buf, size, typename(type), obj_list[nr].sha1) < 0) + /* + * We never need to write it when it's too small. + * Otherwise, without -f, we write it only when + * it does not exist in the repository in any form. + * Finally, with -f, we write it only when it does + * not exist in the local repository as a loose object. + * In all cases we fill in obj_list[nr].sha1 . + */ + if (size < min_blob_size) + hash_sha1_file(buf, size, typename(type), obj_list[nr].sha1); + else if (write_sha1_file_maybe(buf, size, typename(type), + force, obj_list[nr].sha1) < 0) die("failed to write object"); added_object(nr, type, buf, size); } @@ -361,6 +375,17 @@ int cmd_unpack_objects(int argc, const char **argv, const char *prefix) recover = 1; continue; } + if (!strcmp(arg, "-f")) { + force = 1; + continue; + } + if (!prefixcmp(arg, "--min-blob-size=")) { + char *end; + min_blob_size = strtoul(arg+16, &end, 0) * 1024; + if (!arg[16] || *end) + usage(unpack_usage); + continue; + } if (!prefixcmp(arg, "--pack_header=")) { struct pack_header *hdr; char *c; diff --git a/cache.h b/cache.h index ec85d93..4994d03 100644 --- a/cache.h +++ b/cache.h @@ -343,6 +343,8 @@ extern int sha1_object_info(const unsigned char *, unsigned long *); extern void * read_sha1_file(const unsigned char *sha1, enum object_type *type, unsigned long *size); extern int hash_sha1_file(const void *buf, unsigned long len, const char *type, unsigned char *sha1); extern int write_sha1_file(void *buf, unsigned long len, const char *type, unsigned char *return_sha1); +extern int write_sha1_file_maybe(void *buf, unsigned long len, const char *type, + int dup_ok, unsigned char *return_sha1); extern int pretend_sha1_file(void *, unsigned long, enum object_type, unsigned char *); extern int check_sha1_signature(const unsigned char *sha1, void *buf, unsigned long size, const char *type); diff --git a/sha1_file.c b/sha1_file.c index 12d2ef2..e4c3288 100644 --- a/sha1_file.c +++ b/sha1_file.c @@ -1979,7 +1979,8 @@ int hash_sha1_file(const void *buf, unsigned long len, const char *type, return 0; } -int write_sha1_file(void *buf, unsigned long len, const char *type, unsigned char *returnsha1) +int write_sha1_file_maybe(void *buf, unsigned long len, const char *type, + int dup_ok, unsigned char *returnsha1) { int size, ret; unsigned char *compressed; @@ -1990,14 +1991,15 @@ int write_sha1_file(void *buf, unsigned long len, const char *type, unsigned cha char hdr[32]; int fd, hdrlen; - /* Normally if we have it in the pack then we do not bother writing - * it out into .git/objects/??/?{38} file. + /* Normally if in a pack (or any where else) then we do not write + * it out into .git/objects/??/?{38} file, but with dup_ok != 0 + * we only avoid over-writing a loose blob in the local repo. */ write_sha1_file_prepare(buf, len, type, sha1, hdr, &hdrlen); filename = sha1_file_name(sha1); if (returnsha1) hashcpy(returnsha1, sha1); - if (has_sha1_file(sha1)) + if (!dup_ok && has_sha1_file(sha1)) return 0; fd = open(filename, O_RDONLY); if (fd >= 0) { @@ -2062,6 +2064,12 @@ int write_sha1_file(void *buf, unsigned long len, const char *type, unsigned cha return move_temp_to_file(tmpfile, filename); } +int write_sha1_file(void *buf, unsigned long len, const char *type, + unsigned char *returnsha1) +{ + return write_sha1_file_maybe(buf, len, type, 0, returnsha1); +} + /* * We need to unpack and recompress the object for writing * it out to a different file. -- 1.5.2.762.gd8c6-dirty - To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html