On Fri, Dec 20, 2024 at 04:29:50PM +0000, Derrick Stolee via GitGitGadget wrote: > diff --git a/builtin/backfill.c b/builtin/backfill.c > index 38e6aaeaa03..177fd4286c7 100644 > --- a/builtin/backfill.c > +++ b/builtin/backfill.c [snip] > +static int fill_missing_blobs(const char *path UNUSED, > + struct oid_array *list, > + enum object_type type, > + void *data) > +{ > + struct backfill_context *ctx = data; > + > + if (type != OBJ_BLOB) > + return 0; > + > + for (size_t i = 0; i < list->nr; i++) { > + off_t size = 0; > + struct object_info info = OBJECT_INFO_INIT; > + info.disk_sizep = &size; > + if (oid_object_info_extended(ctx->repo, > + &list->oid[i], > + &info, > + OBJECT_INFO_FOR_PREFETCH) || > + !size) So this is the object existence test? Is there a reason why we don't use `repo_has_object_file()`, or its `_with_flags()` variant if we need to pass `OBJECT_INFO_FOR_PREFETCH`? > + oid_array_append(&ctx->current_batch, &list->oid[i]); > + } > + > + if (ctx->current_batch.nr >= ctx->batch_size) > + download_batch(ctx); > + > + return 0; > +} > + > +static int do_backfill(struct backfill_context *ctx) > +{ > + struct rev_info revs; > + struct path_walk_info info = PATH_WALK_INFO_INIT; > + int ret; > + > + repo_init_revisions(ctx->repo, &revs, ""); > + handle_revision_arg("HEAD", &revs, 0, 0); > + > + info.blobs = 1; > + info.tags = info.commits = info.trees = 0; Nit: this should be unnecessary as PATH_WALK_INFO_INIT already initialized those fields for us, right? > + info.revs = &revs; > + info.path_fn = fill_missing_blobs; > + info.path_fn_data = ctx; > + > + ret = walk_objects_by_path(&info); > + > + /* Download the objects that did not fill a batch. */ > + if (!ret) > + download_batch(ctx); > + > + backfill_context_clear(ctx); Nit: I think it's a bit funny that we're cleaning up the context over here rather than in the caller. Patrick