Here's v2. I think I've addressed all the review comments, including passing the index-pack args as separate arguments (to avoid the necessity to somehow encode in order to get rid of spaces), and by using a custom error function instead of a specific option in fsck. This applies on master. I mentioned earlier [1] that I was planning to implement this on Ævar's fsck API improvements, but after looking at the latest v2, I see that it omits patch 11 from v1 (which is the one I need), so what I've done is to use a string check in the meantime. [1] https://lore.kernel.org/git/20210219004612.1181920-1-jonathantanmy@xxxxxxxxxx/ Jonathan Tan (4): http: allow custom index-pack args http-fetch: allow custom index-pack args fetch-pack: with packfile URIs, use index-pack arg fetch-pack: print and use dangling .gitmodules Documentation/git-http-fetch.txt | 10 ++- Documentation/git-index-pack.txt | 7 ++- builtin/index-pack.c | 25 +++++++- builtin/receive-pack.c | 2 +- fetch-pack.c | 103 ++++++++++++++++++++++++++----- fsck.c | 5 ++ fsck.h | 2 + http-fetch.c | 20 +++++- http.c | 15 ++--- http.h | 10 +-- pack-write.c | 8 ++- pack.h | 2 +- t/t5550-http-fetch-dumb.sh | 5 +- t/t5702-protocol-v2.sh | 58 +++++++++++++++-- 14 files changed, 227 insertions(+), 45 deletions(-) Range-diff against v1: -: ---------- > 1: b7e376be16 http: allow custom index-pack args 1: 9fba6c9bcc ! 2: 57220ceb84 http-fetch: allow custom index-pack args @@ Documentation/git-http-fetch.txt: commit-id:: --packfile=<hash>:: - Instead of a commit id on the command line (which is not expected in -+ For internal use only. Instead of a commit id on the command line (which is not expected in ++ For internal use only. Instead of a commit id on the command ++ line (which is not expected in this case), 'git http-fetch' fetches the packfile directly at the given URL and uses index-pack to generate corresponding .idx and .keep files. The hash is used to determine the name of the temporary file and is @@ fetch-pack.c: static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args, strvec_pushf(&cmd.args, "--packfile=%.*s", (int) the_hash_algo->hexsz, packfile_uris.items[i].string); -+ strvec_push(&cmd.args, "--index-pack-args=index-pack --stdin --keep"); ++ strvec_push(&cmd.args, "--index-pack-arg=index-pack"); ++ strvec_push(&cmd.args, "--index-pack-arg=--stdin"); ++ strvec_push(&cmd.args, "--index-pack-arg=--keep"); strvec_push(&cmd.args, uri); cmd.git_cmd = 1; cmd.no_stdin = 1; @@ http-fetch.c: int cmd_main(int argc, const char **argv) int packfile = 0; int nongit; struct object_id packfile_hash; -+ const char *index_pack_args = NULL; ++ struct strvec index_pack_args = STRVEC_INIT; setup_git_directory_gently(&nongit); @@ http-fetch.c: int cmd_main(int argc, const char **argv) packfile = 1; if (parse_oid_hex(p, &packfile_hash, &end) || *end) die(_("argument to --packfile must be a valid hash (got '%s')"), p); -+ } else if (skip_prefix(argv[arg], "--index-pack-args=", &p)) { -+ index_pack_args = p; ++ } else if (skip_prefix(argv[arg], "--index-pack-arg=", &p)) { ++ strvec_push(&index_pack_args, p); } arg++; } @@ http-fetch.c: int cmd_main(int argc, const char **argv) if (packfile) { - fetch_single_packfile(&packfile_hash, argv[arg]); -+ struct strvec encoded = STRVEC_INIT; -+ char **raw; -+ int i; -+ -+ if (!index_pack_args) ++ if (!index_pack_args.nr) + die(_("--packfile requires --index-pack-args")); + -+ strvec_split(&encoded, index_pack_args); -+ -+ CALLOC_ARRAY(raw, encoded.nr + 1); -+ for (i = 0; i < encoded.nr; i++) -+ raw[i] = url_percent_decode(encoded.v[i]); -+ + fetch_single_packfile(&packfile_hash, argv[arg], -+ (const char **) raw); -+ -+ for (i = 0; i < encoded.nr; i++) -+ free(raw[i]); -+ free(raw); -+ strvec_clear(&encoded); ++ index_pack_args.v); + return 0; } -+ if (index_pack_args) ++ if (index_pack_args.nr) + die(_("--index-pack-args can only be used with --packfile")); + if (commits_on_stdin) { @@ t/t5550-http-fetch-dumb.sh: test_expect_success 'http-fetch --packfile' ' p=$(cd "$HTTPD_DOCUMENT_ROOT_PATH"/repo_pack.git && ls objects/pack/pack-*.pack) && - git -C packfileclient http-fetch --packfile=$ARBITRARY "$HTTPD_URL"/dumb/repo_pack.git/$p >out && + git -C packfileclient http-fetch --packfile=$ARBITRARY \ -+ --index-pack-args="index-pack --stdin --keep" "$HTTPD_URL"/dumb/repo_pack.git/$p >out && ++ --index-pack-arg=index-pack --index-pack-arg=--stdin \ ++ --index-pack-arg=--keep \ ++ "$HTTPD_URL"/dumb/repo_pack.git/$p >out && grep "^keep.[0-9a-f]\{16,\}$" out && cut -c6- out >packhash && 2: 7c3244e79f ! 3: aa87335464 fetch-pack: with packfile URIs, use index-pack arg @@ fetch-pack.c: static void write_promisor_file(const char *keep_name, - * Pass 1 as "only_packfile" if the pack received is the only pack in this - * fetch request (that is, if there were no packfile URIs provided). + * If packfile URIs were provided, pass a non-NULL pointer to index_pack_args. -+ * The string to pass as the --index-pack-args argument to http-fetch will be ++ * The strings to pass as the --index-pack-arg arguments to http-fetch will be + * stored there. (It must be freed by the caller.) */ static int get_pack(struct fetch_pack_args *args, int xd[2], struct string_list *pack_lockfiles, - int only_packfile, -+ char **index_pack_args, ++ struct strvec *index_pack_args, struct ref **sought, int nr_sought) { struct async demux; @@ fetch-pack.c: static int get_pack(struct fetch_pack_args *args, } + if (index_pack_args) { -+ struct strbuf joined = STRBUF_INIT; + int i; + -+ for (i = 0; i < cmd.args.nr; i++) { -+ if (i) -+ strbuf_addch(&joined, ' '); -+ strbuf_addstr_urlencode(&joined, cmd.args.v[i], -+ is_rfc3986_unreserved); -+ } -+ *index_pack_args = strbuf_detach(&joined, NULL); ++ for (i = 0; i < cmd.args.nr; i++) ++ strvec_push(index_pack_args, cmd.args.v[i]); + } + cmd.in = demux.out; @@ fetch-pack.c: static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args, int seen_ack = 0; struct string_list packfile_uris = STRING_LIST_INIT_DUP; int i; -+ char *index_pack_args = NULL; ++ struct strvec index_pack_args = STRVEC_INIT; negotiator = &negotiator_alloc; fetch_negotiator_init(r, negotiator); @@ fetch-pack.c: static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args, die(_("git fetch-pack: fetch failed.")); do_check_stateless_delimiter(args, &reader); +@@ fetch-pack.c: static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args, + } + + for (i = 0; i < packfile_uris.nr; i++) { ++ int j; + struct child_process cmd = CHILD_PROCESS_INIT; + char packname[GIT_MAX_HEXSZ + 1]; + const char *uri = packfile_uris.items[i].string + @@ fetch-pack.c: static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args, strvec_pushf(&cmd.args, "--packfile=%.*s", (int) the_hash_algo->hexsz, packfile_uris.items[i].string); -- strvec_push(&cmd.args, "--index-pack-args=index-pack --stdin --keep"); -+ strvec_pushf(&cmd.args, "--index-pack-args=%s", index_pack_args); +- strvec_push(&cmd.args, "--index-pack-arg=index-pack"); +- strvec_push(&cmd.args, "--index-pack-arg=--stdin"); +- strvec_push(&cmd.args, "--index-pack-arg=--keep"); ++ for (j = 0; j < index_pack_args.nr; j++) ++ strvec_pushf(&cmd.args, "--index-pack-arg=%s", ++ index_pack_args.v[j]); strvec_push(&cmd.args, uri); cmd.git_cmd = 1; cmd.no_stdin = 1; @@ fetch-pack.c: static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args, packname)); } string_list_clear(&packfile_uris, 0); -+ FREE_AND_NULL(index_pack_args); ++ strvec_clear(&index_pack_args); if (negotiator) negotiator->release(negotiator); 3: 384c9d1c73 ! 4: e8b18d02e6 fetch-pack: print and use dangling .gitmodules @@ Documentation/git-index-pack.txt: OPTIONS Specifies the number of threads to spawn when resolving ## builtin/index-pack.c ## +@@ builtin/index-pack.c: static void show_pack_info(int stat_only) + } + } + ++static int print_dangling_gitmodules(struct fsck_options *o, ++ const struct object_id *oid, ++ enum object_type object_type, ++ int msg_type, const char *message) ++{ ++ /* ++ * NEEDSWORK: Plumb the MSG_ID (from fsck.c) here and use it ++ * instead of relying on this string check. ++ */ ++ if (starts_with(message, "gitmodulesMissing")) { ++ printf("%s\n", oid_to_hex(oid)); ++ return 0; ++ } ++ return fsck_error_function(o, oid, object_type, msg_type, message); ++} ++ + int cmd_index_pack(int argc, const char **argv, const char *prefix) + { + int i, fix_thin_pack = 0, verify = 0, stat_only = 0; @@ builtin/index-pack.c: int cmd_index_pack(int argc, const char **argv, const char *prefix) else close(input_fd); @@ builtin/index-pack.c: int cmd_index_pack(int argc, const char **argv, const char - if (do_fsck_object && fsck_finish(&fsck_options)) - die(_("fsck error in pack objects")); + if (do_fsck_object) { -+ struct fsck_options fo = FSCK_OPTIONS_STRICT; ++ struct fsck_options fo = fsck_options; + -+ fo.print_dangling_gitmodules = 1; ++ fo.error_func = print_dangling_gitmodules; + if (fsck_finish(&fo)) + die(_("fsck error in pack objects")); + } @@ fetch-pack.c: static void write_promisor_file(const char *keep_name, + /* * If packfile URIs were provided, pass a non-NULL pointer to index_pack_args. - * The string to pass as the --index-pack-args argument to http-fetch will be + * The strings to pass as the --index-pack-arg arguments to http-fetch will be @@ fetch-pack.c: static void write_promisor_file(const char *keep_name, static int get_pack(struct fetch_pack_args *args, int xd[2], struct string_list *pack_lockfiles, - char **index_pack_args, + struct strvec *index_pack_args, - struct ref **sought, int nr_sought) + struct ref **sought, int nr_sought, + struct oidset *gitmodules_oids) @@ fetch-pack.c: static struct ref *do_fetch_pack(struct fetch_pack_args *args, @@ fetch-pack.c: static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args, struct string_list packfile_uris = STRING_LIST_INIT_DUP; int i; - char *index_pack_args = NULL; + struct strvec index_pack_args = STRVEC_INIT; + struct oidset gitmodules_oids = OIDSET_INIT; negotiator = &negotiator_alloc; @@ fetch-pack.c: static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args, if (finish_command(&cmd)) @@ fetch-pack.c: static struct ref *do_fetch_pack_v2(struct fetch_pack_args *args, string_list_clear(&packfile_uris, 0); - FREE_AND_NULL(index_pack_args); + strvec_clear(&index_pack_args); + fsck_gitmodules_oids(&gitmodules_oids); + @@ fsck.c: int fsck_error_function(struct fsck_options *o, int fsck_finish(struct fsck_options *options) { int ret = 0; -@@ fsck.c: int fsck_finish(struct fsck_options *options) - if (!buf) { - if (is_promisor_object(oid)) - continue; -- ret |= report(options, -- oid, OBJ_BLOB, -- FSCK_MSG_GITMODULES_MISSING, -- "unable to read .gitmodules blob"); -+ if (options->print_dangling_gitmodules) -+ printf("%s\n", oid_to_hex(oid)); -+ else -+ ret |= report(options, -+ oid, OBJ_BLOB, -+ FSCK_MSG_GITMODULES_MISSING, -+ "unable to read .gitmodules blob"); - continue; - } - ## fsck.h ## -@@ fsck.h: struct fsck_options { - int *msg_type; - struct oidset skiplist; - kh_oid_map_t *object_names; -+ -+ /* -+ * If 1, print the hashes of missing .gitmodules blobs instead of -+ * considering them to be errors. -+ */ -+ unsigned print_dangling_gitmodules:1; - }; - - #define FSCK_OPTIONS_DEFAULT { NULL, fsck_error_function, 0, NULL, OIDSET_INIT } @@ fsck.h: int fsck_walk(struct object *obj, void *data, struct fsck_options *options); int fsck_object(struct object *obj, void *data, unsigned long size, struct fsck_options *options); @@ pack.h: int verify_pack_index(struct packed_git *); * The "hdr" output buffer should be at least this big, which will handle sizes ## t/t5702-protocol-v2.sh ## +@@ t/t5702-protocol-v2.sh: test_expect_success 'part of packfile response provided as URI' ' + test -f hfound && + test -f h2found && + +- # Ensure that there are exactly 6 files (3 .pack and 3 .idx). +- ls http_child/.git/objects/pack/* >filelist && ++ # Ensure that there are exactly 3 packfiles with associated .idx ++ ls http_child/.git/objects/pack/*.pack \ ++ http_child/.git/objects/pack/*.idx >filelist && + test_line_count = 6 filelist + ' + +@@ t/t5702-protocol-v2.sh: test_expect_success 'packfile-uri with transfer.fsckobjects' ' + -c fetch.uriprotocols=http,https \ + clone "$HTTPD_URL/smart/http_parent" http_child && + +- # Ensure that there are exactly 4 files (2 .pack and 2 .idx). +- ls http_child/.git/objects/pack/* >filelist && ++ # Ensure that there are exactly 2 packfiles with associated .idx ++ ls http_child/.git/objects/pack/*.pack \ ++ http_child/.git/objects/pack/*.idx >filelist && + test_line_count = 4 filelist + ' + @@ t/t5702-protocol-v2.sh: test_expect_success 'packfile-uri with transfer.fsckobjects fails on bad object' test_i18ngrep "invalid author/committer line - missing email" error ' @@ t/t5702-protocol-v2.sh: test_expect_success 'packfile-uri with transfer.fsckobje + -c fetch.uriprotocols=http,https \ + clone "$HTTPD_URL/smart/http_parent" http_child && + -+ # Ensure that there are exactly 4 files (2 .pack and 2 .idx). -+ ls http_child/.git/objects/pack/* >filelist && ++ # Ensure that there are exactly 2 packfiles with associated .idx ++ ls http_child/.git/objects/pack/*.pack \ ++ http_child/.git/objects/pack/*.idx >filelist && + test_line_count = 4 filelist +' + 4: da0d7b38ae < -: ---------- SQUASH??? test fix -- 2.30.0.617.g56c4b15f3c-goog