Current code peaks into the transfered pack's header, if the number of objects is under a limit, unpack-objects is called to handle the rest, otherwise index-pack is. This patch makes fetch-pack use index-pack unconditionally, then turn objects loose and remove the pack at the end. unpack-objects is deprecated and may be removed in future. There are a few reasons for this: - down to two code paths to maintain regarding pack reading (sha1_file.c and index-pack.c). When .pack v4 comes, we don't need to duplicate work in index-pack and unpack-objects. - the number of objects should not be the only indicator for unpacking. If there are a few large objects in the pack, the pack should be kept anyway. Keeping the pack lets us examine the whole pack later and make a better decision. - by going through index-pack first, then unpack, we pay extra cost for completing a thin pack into a full one. But compared to fetch's total time, it should not be noticeable because unpack-objects is only called when the pack contains a small number of objects. - unpack-objects does not support streaming large blobs. Granted force_object_loose(), which is used by this patch, does not either. But it'll be easier to do so because sha1_file.c interface supports streaming. Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@xxxxxxxxx> --- builtin/receive-pack.c | 120 +++++++++++++------------------------------------ cache.h | 1 + fetch-pack.c | 77 +++++++++++-------------------- sha1_file.c | 70 ++++++++++++++++++++++++++++- 4 files changed, 128 insertions(+), 140 deletions(-) diff --git a/builtin/receive-pack.c b/builtin/receive-pack.c index e3eb5fc..6eb4ffb 100644 --- a/builtin/receive-pack.c +++ b/builtin/receive-pack.c @@ -792,105 +792,49 @@ static struct command *read_head_info(void) return commands; } -static const char *parse_pack_header(struct pack_header *hdr) -{ - switch (read_pack_header(0, hdr)) { - case PH_ERROR_EOF: - return "eof before pack header was fully read"; - - case PH_ERROR_PACK_SIGNATURE: - return "protocol error (pack signature mismatch detected)"; - - case PH_ERROR_PROTOCOL: - return "protocol error (pack version unsupported)"; - - default: - return "unknown error in parse_pack_header"; - - case 0: - return NULL; - } -} - static const char *pack_lockfile; static const char *unpack(int err_fd) { - struct pack_header hdr; - const char *hdr_err; - char hdr_arg[38]; int fsck_objects = (receive_fsck_objects >= 0 ? receive_fsck_objects : transfer_fsck_objects >= 0 ? transfer_fsck_objects : 0); - hdr_err = parse_pack_header(&hdr); - if (hdr_err) { - if (err_fd > 0) - close(err_fd); - return hdr_err; - } - snprintf(hdr_arg, sizeof(hdr_arg), - "--pack_header=%"PRIu32",%"PRIu32, - ntohl(hdr.hdr_version), ntohl(hdr.hdr_entries)); - - if (ntohl(hdr.hdr_entries) < unpack_limit) { - int code, i = 0; - struct child_process child; - const char *unpacker[5]; - unpacker[i++] = "unpack-objects"; - if (quiet) - unpacker[i++] = "-q"; - if (fsck_objects) - unpacker[i++] = "--strict"; - unpacker[i++] = hdr_arg; - unpacker[i++] = NULL; - memset(&child, 0, sizeof(child)); - child.argv = unpacker; - child.no_stdout = 1; - child.err = err_fd; - child.git_cmd = 1; - code = run_command(&child); - if (!code) - return NULL; - return "unpack-objects abnormal exit"; - } else { - const char *keeper[7]; - int s, status, i = 0; - char keep_arg[256]; - struct child_process ip; - - s = sprintf(keep_arg, "--keep=receive-pack %"PRIuMAX" on ", (uintmax_t) getpid()); - if (gethostname(keep_arg + s, sizeof(keep_arg) - s)) - strcpy(keep_arg + s, "localhost"); - - keeper[i++] = "index-pack"; - keeper[i++] = "--stdin"; - if (fsck_objects) - keeper[i++] = "--strict"; - keeper[i++] = "--fix-thin"; - keeper[i++] = hdr_arg; - keeper[i++] = keep_arg; - keeper[i++] = NULL; - memset(&ip, 0, sizeof(ip)); - ip.argv = keeper; - ip.out = -1; - ip.err = err_fd; - ip.git_cmd = 1; - status = start_command(&ip); - if (status) { - return "index-pack fork failed"; - } - pack_lockfile = index_pack_lockfile(ip.out); - close(ip.out); - status = finish_command(&ip); - if (!status) { - reprepare_packed_git(); - return NULL; - } + const char *keeper[7]; + int s, status, i = 0; + char keep_arg[256]; + struct child_process ip; + + s = sprintf(keep_arg, "--keep=receive-pack %"PRIuMAX" on ", (uintmax_t) getpid()); + if (gethostname(keep_arg + s, sizeof(keep_arg) - s)) + strcpy(keep_arg + s, "localhost"); + + keeper[i++] = "index-pack"; + keeper[i++] = "--stdin"; + if (fsck_objects) + keeper[i++] = "--strict"; + keeper[i++] = "--fix-thin"; + keeper[i++] = keep_arg; + keeper[i++] = NULL; + memset(&ip, 0, sizeof(ip)); + ip.argv = keeper; + ip.out = -1; + ip.err = err_fd; + ip.git_cmd = 1; + status = start_command(&ip); + if (status) { + return "index-pack fork failed"; + } + pack_lockfile = index_pack_lockfile(ip.out); + close(ip.out); + status = finish_command(&ip); + if (status) return "index-pack abnormal exit"; - } + if (!maybe_unpack_objects(pack_lockfile, unpack_limit)) + pack_lockfile = NULL; + return NULL; } static const char *unpack_with_sideband(void) diff --git a/cache.h b/cache.h index 85b544f..0fff958 100644 --- a/cache.h +++ b/cache.h @@ -789,6 +789,7 @@ extern int hash_sha1_file(const void *buf, unsigned long len, const char *type, extern int write_sha1_file(const void *buf, unsigned long len, const char *type, unsigned char *return_sha1); extern int pretend_sha1_file(void *, unsigned long, enum object_type, unsigned char *); extern int force_object_loose(const unsigned char *sha1, time_t mtime); +extern int maybe_unpack_objects(const char *pack_lockfile, int limit); extern void *map_sha1_file(const unsigned char *sha1, unsigned long *size); extern int unpack_sha1_header(git_zstream *stream, unsigned char *map, unsigned long mapsize, void *buffer, unsigned long bufsiz); extern int parse_sha1_header(const char *hdr, unsigned long *sizep); diff --git a/fetch-pack.c b/fetch-pack.c index f5d99c1..9c81a15 100644 --- a/fetch-pack.c +++ b/fetch-pack.c @@ -687,9 +687,7 @@ static int get_pack(struct fetch_pack_args *args, struct async demux; const char *argv[22]; char keep_arg[256]; - char hdr_arg[256]; const char **av; - int do_keep = args->keep_pack; struct child_process cmd; int ret; @@ -711,54 +709,29 @@ static int get_pack(struct fetch_pack_args *args, memset(&cmd, 0, sizeof(cmd)); cmd.argv = argv; + cmd.out = -1; av = argv; - *hdr_arg = 0; - if (!args->keep_pack && unpack_limit) { - struct pack_header header; - - if (read_pack_header(demux.out, &header)) - die("protocol error: bad pack header"); - snprintf(hdr_arg, sizeof(hdr_arg), - "--pack_header=%"PRIu32",%"PRIu32, - ntohl(header.hdr_version), ntohl(header.hdr_entries)); - if (ntohl(header.hdr_entries) < unpack_limit) - do_keep = 0; - else - do_keep = 1; - } if (alternate_shallow_file) { *av++ = "--shallow-file"; *av++ = alternate_shallow_file; } - if (do_keep) { - if (pack_lockfile) - cmd.out = -1; - *av++ = "index-pack"; - *av++ = "--stdin"; - if (!args->quiet && !args->no_progress) - *av++ = "-v"; - if (args->use_thin_pack) - *av++ = "--fix-thin"; - if (args->lock_pack || unpack_limit) { - int s = sprintf(keep_arg, - "--keep=fetch-pack %"PRIuMAX " on ", (uintmax_t) getpid()); - if (gethostname(keep_arg + s, sizeof(keep_arg) - s)) - strcpy(keep_arg + s, "localhost"); - *av++ = keep_arg; - } - if (args->check_self_contained_and_connected) - *av++ = "--check-self-contained-and-connected"; - } - else { - *av++ = "unpack-objects"; - if (args->quiet || args->no_progress) - *av++ = "-q"; - args->check_self_contained_and_connected = 0; - } - if (*hdr_arg) - *av++ = hdr_arg; + *av++ = "index-pack"; + *av++ = "--stdin"; + if (!args->quiet && !args->no_progress) + *av++ = "-v"; + if (args->use_thin_pack) + *av++ = "--fix-thin"; + if (args->lock_pack || unpack_limit) { + int s = sprintf(keep_arg, + "--keep=fetch-pack %"PRIuMAX " on ", (uintmax_t) getpid()); + if (gethostname(keep_arg + s, sizeof(keep_arg) - s)) + strcpy(keep_arg + s, "localhost"); + *av++ = keep_arg; + } + if (args->check_self_contained_and_connected) + *av++ = "--check-self-contained-and-connected"; if (fetch_fsck_objects >= 0 ? fetch_fsck_objects : transfer_fsck_objects >= 0 @@ -771,10 +744,8 @@ static int get_pack(struct fetch_pack_args *args, cmd.git_cmd = 1; if (start_command(&cmd)) die("fetch-pack: unable to fork off %s", argv[0]); - if (do_keep && pack_lockfile) { - *pack_lockfile = index_pack_lockfile(cmd.out); - close(cmd.out); - } + *pack_lockfile = index_pack_lockfile(cmd.out); + close(cmd.out); ret = finish_command(&cmd); if (!ret || (args->check_self_contained_and_connected && ret == 1)) @@ -820,11 +791,12 @@ static struct ref *do_fetch_pack(struct fetch_pack_args *args, int fd[2], const struct ref *orig_ref, struct ref **sought, int nr_sought, - char **pack_lockfile) + char **pack_lockfile_p) { struct ref *ref = copy_ref_list(orig_ref); unsigned char sha1[20]; const char *agent_feature; + char *pack_lockfile = NULL; int agent_len; sort_ref_list(&ref, ref_compare_name); @@ -899,9 +871,15 @@ static struct ref *do_fetch_pack(struct fetch_pack_args *args, setup_alternate_shallow(); else alternate_shallow_file = NULL; - if (get_pack(args, fd, pack_lockfile)) + if (get_pack(args, fd, &pack_lockfile)) die("git fetch-pack: fetch failed."); + if (!maybe_unpack_objects(pack_lockfile, + args->keep_pack ? 0 : unpack_limit)) + pack_lockfile = NULL; + if (pack_lockfile_p) + *pack_lockfile_p = pack_lockfile; + all_done: return ref; } @@ -997,6 +975,5 @@ struct ref *fetch_pack(struct fetch_pack_args *args, commit_lock_file(&shallow_lock); } - reprepare_packed_git(); return ref_cpy; } diff --git a/sha1_file.c b/sha1_file.c index 8e27db1..a0cdeae 100644 --- a/sha1_file.c +++ b/sha1_file.c @@ -61,6 +61,8 @@ static struct cached_object empty_tree = { }; static struct packed_git *last_found_pack; +/* temporarily skip one pack in find_pack_entry() */ +static struct packed_git *skip_this_pack; static struct cached_object *find_cached_object(const unsigned char *sha1) { @@ -2376,11 +2378,15 @@ static int find_pack_entry(const unsigned char *sha1, struct pack_entry *e) if (!packed_git) return 0; - if (last_found_pack && fill_pack_entry(sha1, e, last_found_pack)) + if (last_found_pack && + last_found_pack != skip_this_pack && + fill_pack_entry(sha1, e, last_found_pack)) return 1; for (p = packed_git; p; p = p->next) { - if (p == last_found_pack || !fill_pack_entry(sha1, e, p)) + if (p == last_found_pack || + p == skip_this_pack || + !fill_pack_entry(sha1, e, p)) continue; last_found_pack = p; @@ -3133,3 +3139,63 @@ void assert_sha1_type(const unsigned char *sha1, enum object_type expect) die("%s is not a valid '%s' object", sha1_to_hex(sha1), typename(expect)); } + +static int has_sha1_except_in(struct packed_git *p, + const unsigned char *sha1) +{ + int has; + skip_this_pack = p; + has = has_sha1_file(sha1); + skip_this_pack = NULL; + return has; +} + +/* + * Unpack objects if the number of objects in the pack is lower than + * specified limit. Otherwise make sure the new pack is registered + * (including when pack_lockfile is NULL). Return 0 is the pack is + * removed. + */ +int maybe_unpack_objects(const char *pack_lockfile, int limit) +{ + const char *ext[] = { ".pack", ".idx", ".keep", NULL }; + struct packed_git *p; + char *path; + int len, ret = 0; + uint32_t i; + + reprepare_packed_git(); + if (!pack_lockfile) + return -1; + + /* must be enough for any extensions in ext[] */ + path = xstrdup(pack_lockfile); + len = strlen(pack_lockfile) - strlen(".keep"); + strcpy(path + len, ".pack"); + for (p = packed_git; p; p = p->next) + if (!strcmp(p->pack_name, path)) + break; + if (!p || open_pack_index(p)) + die("unable to find pack %s", path); + + if (p->num_objects >= limit) { + free(path); + return -1; + } + + for (i = 0; i < p->num_objects; i++) { + const unsigned char *sha1 = nth_packed_object_sha1(p, i); + if (!has_sha1_except_in(p, sha1)) /* skip --fix-thin objects */ + ret |= force_object_loose(sha1, p->mtime); + } + + if (!ret) { + free_pack_by_name(p->pack_name); + for (i = 0; ext[i]; i++) { + strcpy(path + len, ext[i]); + unlink_or_warn(path); + } + } + free(path); + return ret; +} -- 1.8.2.83.gc99314b -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html