Jonathan Tan <jonathantanmy@xxxxxxxxxx> writes: > When Git fetches a pack using dumb HTTP, it does at least 2 things > differently from when it fetches using fetch-pack or receive-pack: (1) > it reuses the server's name for the packfile (which incorporates a hash) > for the packfile, and (2) it does not create a .keep file to avoid race > conditions with repack. > > A subsequent patch will allow downloading packs over HTTP(S) as part of > a fetch. These downloads will not necessarily be from a Git repository, > and thus may not have a hash as part of its name. Also, generating a > .keep file will be necessary to avoid race conditions with repack (until > the fetch has successfully written the new refs). > > Thus, teach http to pass --stdin and --keep to index-pack, the former so > that we have no reliance on the server's name for the packfile, and the > latter so that we have the .keep file. Makes sense. Can a malicious dumb server mount a passive attack that serves misnamed packfiles and wait for victims to come? > Signed-off-by: Jonathan Tan <jonathantanmy@xxxxxxxxxx> > --- > This is part of the work of CDN offloading of fetch responses. > > I have plans to use the http_pack_request suite of functions to > implement the part where we download from CDN over HTTP(S), but need > this change to be able to do so. I think it's better from the code > quality perspective to reuse these functions, but this necessitates a > behavior change in that we no longer use the filename as declared by the > server, so I'm sending this as RFC to see what the community thinks. > --- > http-push.c | 7 ++++++- > http-walker.c | 5 ++++- > http.c | 42 ++++++++++++++++++++---------------------- > http.h | 2 +- > 4 files changed, 31 insertions(+), 25 deletions(-) > > diff --git a/http-push.c b/http-push.c > index b22c7caea0..409b266b0c 100644 > --- a/http-push.c > +++ b/http-push.c > @@ -586,11 +586,16 @@ static void finish_request(struct transfer_request *request) > fprintf(stderr, "Unable to get pack file %s\n%s", > request->url, curl_errorstr); > } else { > + char *lockfile; > + > preq = (struct http_pack_request *)request->userData; > > if (preq) { > - if (finish_http_pack_request(preq) == 0) > + if (finish_http_pack_request(preq, > + &lockfile) == 0) { > + unlink(lockfile); > fail = 0; > + } > release_http_pack_request(preq); > } > } > diff --git a/http-walker.c b/http-walker.c > index 8ae5d76c6a..804dc82304 100644 > --- a/http-walker.c > +++ b/http-walker.c > @@ -425,6 +425,7 @@ static int http_fetch_pack(struct walker *walker, struct alt_base *repo, unsigne > int ret; > struct slot_results results; > struct http_pack_request *preq; > + char *lockfile; > > if (fetch_indices(walker, repo)) > return -1; > @@ -457,7 +458,9 @@ static int http_fetch_pack(struct walker *walker, struct alt_base *repo, unsigne > goto abort; > } > > - ret = finish_http_pack_request(preq); > + ret = finish_http_pack_request(preq, &lockfile); > + if (!ret) > + unlink(lockfile); > release_http_pack_request(preq); > if (ret) > return ret; > diff --git a/http.c b/http.c > index a32ad36ddf..5f8e602cd2 100644 > --- a/http.c > +++ b/http.c > @@ -2200,13 +2200,13 @@ void release_http_pack_request(struct http_pack_request *preq) > free(preq); > } > > -int finish_http_pack_request(struct http_pack_request *preq) > +int finish_http_pack_request(struct http_pack_request *preq, char **lockfile) > { > struct packed_git **lst; > struct packed_git *p = preq->target; > - char *tmp_idx; > - size_t len; > struct child_process ip = CHILD_PROCESS_INIT; > + int tmpfile_fd; > + int ret = 0; > > close_pack_index(p); > > @@ -2218,35 +2218,33 @@ int finish_http_pack_request(struct http_pack_request *preq) > lst = &((*lst)->next); > *lst = (*lst)->next; > > - if (!strip_suffix(preq->tmpfile.buf, ".pack.temp", &len)) > - BUG("pack tmpfile does not end in .pack.temp?"); > - tmp_idx = xstrfmt("%.*s.idx.temp", (int)len, preq->tmpfile.buf); > + tmpfile_fd = xopen(preq->tmpfile.buf, O_RDONLY); > > argv_array_push(&ip.args, "index-pack"); > - argv_array_pushl(&ip.args, "-o", tmp_idx, NULL); > - argv_array_push(&ip.args, preq->tmpfile.buf); > + argv_array_push(&ip.args, "--stdin"); > + argv_array_pushf(&ip.args, "--keep=git %"PRIuMAX, (uintmax_t)getpid()); > ip.git_cmd = 1; > - ip.no_stdin = 1; > - ip.no_stdout = 1; > + ip.in = tmpfile_fd; > + ip.out = -1; > > - if (run_command(&ip)) { > - unlink(preq->tmpfile.buf); > - unlink(tmp_idx); > - free(tmp_idx); > - return -1; > + if (start_command(&ip)) { > + ret = -1; > + goto cleanup; > } > > - unlink(sha1_pack_index_name(p->sha1)); > + *lockfile = index_pack_lockfile(ip.out); > + close(ip.out); > > - if (finalize_object_file(preq->tmpfile.buf, sha1_pack_name(p->sha1)) > - || finalize_object_file(tmp_idx, sha1_pack_index_name(p->sha1))) { > - free(tmp_idx); > - return -1; > + if (finish_command(&ip)) { > + ret = -1; > + goto cleanup; > } > > install_packed_git(the_repository, p); > - free(tmp_idx); > - return 0; > +cleanup: > + close(tmpfile_fd); > + unlink(preq->tmpfile.buf); > + return ret; > } Nicely done. > > struct http_pack_request *new_http_pack_request( > diff --git a/http.h b/http.h > index 4eb4e808e5..20d1c85d0b 100644 > --- a/http.h > +++ b/http.h > @@ -212,7 +212,7 @@ struct http_pack_request { > > extern struct http_pack_request *new_http_pack_request( > struct packed_git *target, const char *base_url); > -extern int finish_http_pack_request(struct http_pack_request *preq); > +int finish_http_pack_request(struct http_pack_request *preq, char **lockfile); > extern void release_http_pack_request(struct http_pack_request *preq); > > /* Helpers for fetching object */