On Sat, Oct 19, 2024 at 09:24:55PM -0400, Jeff King wrote: > So I feel like the root of the problem is that we moved the other side's > "pack-1234abcd.idx" into place at all! We do not plan to use it, but > only need to access it via our custom packed_git structs to see whether > we want to download the matching pack. And in fact I'd suspect there are > other possible bugs/trickery lurking, if the other side gives us a > broken .idx that does not match its pack (our odb would now have the > broken file with no matching pack). > > So IMHO a cleaner fix is that we should keep the stuff we download from > the remote marked as temporary files, and then throw them away when we > complete the fetch (rather than just expecting index-pack to overwrite > them). And here's what that might look like. It stores the temporary index outside of the pack/ directory, and then we don't "finalize" it into place. We have to teach parse_pack to retain the original name, but that's OK. It's used only by the http walker anyway. diff --git a/http.c b/http.c index d59e59f66b..6df032e40f 100644 --- a/http.c +++ b/http.c @@ -19,6 +19,7 @@ #include "string-list.h" #include "object-file.h" #include "object-store-ll.h" +#include "tempfile.h" static struct trace_key trace_curl = TRACE_KEY_INIT(CURL); static int trace_curl_data = 1; @@ -2388,8 +2389,24 @@ static char *fetch_pack_index(unsigned char *hash, const char *base_url) strbuf_addf(&buf, "objects/pack/pack-%s.idx", hash_to_hex(hash)); url = strbuf_detach(&buf, NULL); - strbuf_addf(&buf, "%s.temp", sha1_pack_index_name(hash)); - tmp = strbuf_detach(&buf, NULL); + /* + * Don't put this into packs/, since it's just temporary and we don't + * want to confuse it with our local .idx files. We'll generate our + * own index if we choose to download the matching packfile. + * + * It's tempting to use xmks_tempfile() here, but it's important that + * the file not exist, otherwise http_get_file() complains. So we + * create a filename that should be unique, and then just register it + * as a tempfile so that it will get cleaned up on exit. + * + * Arguably it would be better to hold on to the tempfile handle so + * that we can remove it as soon as we download the pack and generate + * the real index, but that might need more surgery. + */ + tmp = xstrfmt("%s/tmp_pack_%s.idx", + repo_get_object_directory(the_repository), + hash_to_hex(hash)); + register_tempfile(tmp); if (http_get_file(url, tmp, NULL) != HTTP_OK) { error("Unable to get pack index %s", url); @@ -2427,10 +2444,8 @@ static int fetch_and_setup_pack_index(struct packed_git **packs_head, } ret = verify_pack_index(new_pack); - if (!ret) { + if (!ret) close_pack_index(new_pack); - ret = finalize_object_file(tmp_idx, sha1_pack_index_name(sha1)); - } free(tmp_idx); if (ret) return -1; diff --git a/packfile.c b/packfile.c index df4ba67719..16d3bcf7f7 100644 --- a/packfile.c +++ b/packfile.c @@ -237,13 +237,22 @@ static struct packed_git *alloc_packed_git(int extra) return p; } +static char *pack_path_from_idx(const char *idx_path) +{ + size_t len; + if (!strip_suffix(idx_path, ".idx", &len)) + BUG("idx path does not end in .idx: %s", idx_path); + return xstrfmt("%.*s.pack", (int)len, idx_path); +} + struct packed_git *parse_pack_index(unsigned char *sha1, const char *idx_path) { - const char *path = sha1_pack_name(sha1); + char *path = pack_path_from_idx(idx_path); size_t alloc = st_add(strlen(path), 1); struct packed_git *p = alloc_packed_git(alloc); memcpy(p->pack_name, path, alloc); /* includes NUL */ + free(path); hashcpy(p->hash, sha1, the_repository->hash_algo); if (check_packed_git_idx(idx_path, p)) { free(p);