Eric Sunshine <sunshine@xxxxxxxxxxxxxx> writes: > On Wed, Sep 27, 2023 at 3:56 PM Eric W. Biederman <ebiederm@xxxxxxxxx> wrote: >> As part of the transition plan, we'd like to add a file in the .git >> directory that maps loose objects between SHA-1 and SHA-256. Let's >> implement the specification in the transition plan and store this data >> on a per-repository basis in struct repository. >> >> Signed-off-by: brian m. carlson <sandals@xxxxxxxxxxxxxxxxxxxx> >> Signed-off-by: Eric W. Biederman <ebiederm@xxxxxxxxxxxx> >> --- >> diff --git a/loose.c b/loose.c >> @@ -0,0 +1,245 @@ >> +static int load_one_loose_object_map(struct repository *repo, struct object_directory *dir) >> +{ >> + struct strbuf buf = STRBUF_INIT, path = STRBUF_INIT; >> + FILE *fp; >> + >> + if (!dir->loose_map) >> + loose_object_map_init(&dir->loose_map); >> + >> + insert_oid_pair(dir->loose_map->to_compat, repo->hash_algo->empty_tree, repo->compat_hash_algo->empty_tree); >> + insert_oid_pair(dir->loose_map->to_storage, repo->compat_hash_algo->empty_tree, repo->hash_algo->empty_tree); >> + >> + insert_oid_pair(dir->loose_map->to_compat, repo->hash_algo->empty_blob, repo->compat_hash_algo->empty_blob); >> + insert_oid_pair(dir->loose_map->to_storage, repo->compat_hash_algo->empty_blob, repo->hash_algo->empty_blob); >> + >> + insert_oid_pair(dir->loose_map->to_compat, repo->hash_algo->null_oid, repo->compat_hash_algo->null_oid); >> + insert_oid_pair(dir->loose_map->to_storage, repo->compat_hash_algo->null_oid, repo->hash_algo->null_oid); >> + >> + strbuf_git_common_path(&path, repo, "objects/loose-object-idx"); >> + fp = fopen(path.buf, "rb"); >> + if (!fp) >> + return 0; > > This early return leaks `path`. At minimum, call > `strbuf_release(&path)` before returning. yep. Fixed. >> + errno = 0; >> + if (strbuf_getwholeline(&buf, fp, '\n') || strcmp(buf.buf, loose_object_header)) >> + goto err; >> + while (!strbuf_getline_lf(&buf, fp)) { >> + const char *p; >> + struct object_id oid, compat_oid; >> + if (parse_oid_hex_algop(buf.buf, &oid, &p, repo->hash_algo) || >> + *p++ != ' ' || >> + parse_oid_hex_algop(p, &compat_oid, &p, repo->compat_hash_algo) || >> + p != buf.buf + buf.len) >> + goto err; >> + insert_oid_pair(dir->loose_map->to_compat, &oid, &compat_oid); >> + insert_oid_pair(dir->loose_map->to_storage, &compat_oid, &oid); >> + } >> + >> + strbuf_release(&buf); >> + strbuf_release(&path); >> + return errno ? -1 : 0; >> +err: >> + strbuf_release(&buf); >> + strbuf_release(&path); >> + return -1; >> +} >> + >> +int repo_write_loose_object_map(struct repository *repo) >> +{ >> + kh_oid_map_t *map = repo->objects->odb->loose_map->to_compat; >> + struct lock_file lock; >> + int fd; >> + khiter_t iter; >> + struct strbuf buf = STRBUF_INIT, path = STRBUF_INIT; >> + >> + if (!should_use_loose_object_map(repo)) >> + return 0; >> + >> + strbuf_git_common_path(&path, repo, "objects/loose-object-idx"); >> + fd = hold_lock_file_for_update_timeout(&lock, path.buf, LOCK_DIE_ON_ERROR, -1); >> + iter = kh_begin(map); >> + if (write_in_full(fd, loose_object_header, strlen(loose_object_header)) < 0) >> + goto errout; >> + >> + for (; iter != kh_end(map); iter++) { >> + if (kh_exist(map, iter)) { >> + if (oideq(&kh_key(map, iter), the_hash_algo->empty_tree) || >> + oideq(&kh_key(map, iter), the_hash_algo->empty_blob)) >> + continue; >> + strbuf_addf(&buf, "%s %s\n", oid_to_hex(&kh_key(map, iter)), oid_to_hex(kh_value(map, iter))); >> + if (write_in_full(fd, buf.buf, buf.len) < 0) >> + goto errout; >> + strbuf_reset(&buf); >> + } >> + } > > Nit: If you call strbuf_reset() immediately before strbuf_addf(), then > you save the reader of the code the effort of having to scan backward > through the function to verify that `buf` is empty the first time > through the loop. I am actually perplexed why the code works this way. My gut says we should create the entire buffer in memory and then write it to disk all in a single system call, and not perform this line buffering. Doing that would remove the need for the strbuf_reset entirely, and would just require the buffer to be primed with the loose_object_header. But what is there works and I will leave it for now. It isn't a bug, and it can be improved with a follow up patch. >> + strbuf_release(&buf); >> + if (commit_lock_file(&lock) < 0) { >> + error_errno(_("could not write loose object index %s"), path.buf); >> + strbuf_release(&path); >> + return -1; >> + } >> + strbuf_release(&path); >> + return 0; >> +errout: >> + rollback_lock_file(&lock); >> + strbuf_release(&buf); >> + error_errno(_("failed to write loose object index %s\n"), path.buf); >> + strbuf_release(&path); >> + return -1; >> +} >> + >> +int repo_loose_object_map_oid(struct repository *repo, >> + const struct object_id *src, >> + const struct git_hash_algo *to, >> + struct object_id *dest) >> + >> +{ > > Style: unnecessary blank line before opening `{` Yep. Fixed. > >> + struct object_directory *dir; >> + kh_oid_map_t *map; >> + khiter_t pos; >> + >> + for (dir = repo->objects->odb; dir; dir = dir->next) { >> + struct loose_object_map *loose_map = dir->loose_map; >> + if (!loose_map) >> + continue; >> + map = (to == repo->compat_hash_algo) ? >> + loose_map->to_compat : >> + loose_map->to_storage; >> + pos = kh_get_oid_map(map, *src); >> + if (pos < kh_end(map)) { >> + oidcpy(dest, kh_value(map, pos)); >> + return 0; >> + } >> + } >> + return -1; >> +} Eric