> Write multi-pack bitmaps in the format described by > Documentation/technical/bitmap-format.txt, inferring their presence with > the absence of '--bitmap'. > > To write a multi-pack bitmap, this patch attempts to reuse as much of > the existing machinery from pack-objects as possible. Specifically, the > MIDX code prepares a packing_data struct that pretends as if a single > packfile has been generated containing all of the objects contained > within the MIDX. Sounds good, and makes sense. Conceptually, the MIDX bitmap is the same as a regular packfile bitmap, just that the order of objects in the bitmap is defined differently. > +static void prepare_midx_packing_data(struct packing_data *pdata, > + struct write_midx_context *ctx) > +{ > + uint32_t i; > + > + memset(pdata, 0, sizeof(struct packing_data)); > + prepare_packing_data(the_repository, pdata); > + > + for (i = 0; i < ctx->entries_nr; i++) { > + struct pack_midx_entry *from = &ctx->entries[ctx->pack_order[i]]; > + struct object_entry *to = packlist_alloc(pdata, &from->oid); > + > + oe_set_in_pack(pdata, to, > + ctx->info[ctx->pack_perm[from->pack_int_id]].p); > + } > +} It is surprising to see this right at the top. Scrolling down, I guess that there is more information needed than just the packing_data struct. > +static int add_ref_to_pending(const char *refname, > + const struct object_id *oid, > + int flag, void *cb_data) > +{ > + struct rev_info *revs = (struct rev_info*)cb_data; > + struct object *object; > + > + if ((flag & REF_ISSYMREF) && (flag & REF_ISBROKEN)) { > + warning("symbolic ref is dangling: %s", refname); > + return 0; > + } > + > + object = parse_object_or_die(oid, refname); > + if (object->type != OBJ_COMMIT) > + return 0; > + > + add_pending_object(revs, object, ""); > + if (bitmap_is_preferred_refname(revs->repo, refname)) > + object->flags |= NEEDS_BITMAP; > + return 0; > +} Makes sense. We need to flag certain commits as NEEDS_BITMAP because bitmaps are not made for all commits but only certain ones. > +struct bitmap_commit_cb { > + struct commit **commits; > + size_t commits_nr, commits_alloc; > + > + struct write_midx_context *ctx; > +}; > + > +static const struct object_id *bitmap_oid_access(size_t index, > + const void *_entries) > +{ > + const struct pack_midx_entry *entries = _entries; > + return &entries[index].oid; > +} > + > +static void bitmap_show_commit(struct commit *commit, void *_data) > +{ > + struct bitmap_commit_cb *data = _data; > + if (oid_pos(&commit->object.oid, data->ctx->entries, > + data->ctx->entries_nr, > + bitmap_oid_access) > -1) { > + ALLOC_GROW(data->commits, data->commits_nr + 1, > + data->commits_alloc); > + data->commits[data->commits_nr++] = commit; > + } > +} > + > +static struct commit **find_commits_for_midx_bitmap(uint32_t *indexed_commits_nr_p, > + struct write_midx_context *ctx) > +{ > + struct rev_info revs; > + struct bitmap_commit_cb cb; > + > + memset(&cb, 0, sizeof(struct bitmap_commit_cb)); > + cb.ctx = ctx; > + > + repo_init_revisions(the_repository, &revs, NULL); > + for_each_ref(add_ref_to_pending, &revs); > + > + fetch_if_missing = 0; > + revs.exclude_promisor_objects = 1; I think that the MIDX bitmap requires all objects be present? If yes, we should omit these 2 lines. > + > + if (prepare_revision_walk(&revs)) > + die(_("revision walk setup failed")); > + > + traverse_commit_list(&revs, bitmap_show_commit, NULL, &cb); > + if (indexed_commits_nr_p) > + *indexed_commits_nr_p = cb.commits_nr; > + > + return cb.commits; > +} Hmm...I might be missing something obvious, but this function and its callbacks seem to be written like this in order to put the returned commits in a certain order. But later on in write_midx_bitmap(), the return value of this function is passed to bitmap_writer_select_commits(), which resorts the list anyway? > +static int write_midx_bitmap(char *midx_name, unsigned char *midx_hash, > + struct write_midx_context *ctx, > + unsigned flags) > +{ > + struct packing_data pdata; > + struct pack_idx_entry **index; > + struct commit **commits = NULL; > + uint32_t i, commits_nr; > + char *bitmap_name = xstrfmt("%s-%s.bitmap", midx_name, hash_to_hex(midx_hash)); > + int ret; > + > + prepare_midx_packing_data(&pdata, ctx); > + > + commits = find_commits_for_midx_bitmap(&commits_nr, ctx); > + > + /* > + * Build the MIDX-order index based on pdata.objects (which is already > + * in MIDX order; c.f., 'midx_pack_order_cmp()' for the definition of > + * this order). > + */ > + ALLOC_ARRAY(index, pdata.nr_objects); > + for (i = 0; i < pdata.nr_objects; i++) > + index[i] = (struct pack_idx_entry *)&pdata.objects[i]; > + > + bitmap_writer_show_progress(flags & MIDX_PROGRESS); > + bitmap_writer_build_type_index(&pdata, index, pdata.nr_objects); > + > + /* > + * bitmap_writer_select_commits expects objects in lex order, but > + * pack_order gives us exactly that. use it directly instead of > + * re-sorting the array > + */ > + for (i = 0; i < pdata.nr_objects; i++) > + index[ctx->pack_order[i]] = (struct pack_idx_entry *)&pdata.objects[i]; > + > + bitmap_writer_select_commits(commits, commits_nr, -1); The comment above says bitmap_writer_select_commits() expects objects in lex order, but (1) you're putting "index" in lex order, not "commits", and (2) the first thing in bitmap_writer_select_commits() is a QSORT. Did you mean another function? > + ret = bitmap_writer_build(&pdata); > + if (!ret) > + goto cleanup; > + > + bitmap_writer_set_checksum(midx_hash); > + bitmap_writer_finish(index, pdata.nr_objects, bitmap_name, 0); So bitmap_writer_build_type_index() and bitmap_writer_finish() are called with 2 different orders of commits. Is this expected? If yes, maybe this is worth a comment. > + > +cleanup: > + free(index); > + free(bitmap_name); > + return ret; > +} [snip] > @@ -930,9 +1073,16 @@ static int write_midx_internal(const char *object_dir, struct multi_pack_index * > for (i = 0; i < ctx.m->num_packs; i++) { > ALLOC_GROW(ctx.info, ctx.nr + 1, ctx.alloc); > > + if (prepare_midx_pack(the_repository, ctx.m, i)) { > + error(_("could not load pack %s"), > + ctx.m->pack_names[i]); > + result = 1; > + goto cleanup; > + } > + > ctx.info[ctx.nr].orig_pack_int_id = i; > ctx.info[ctx.nr].pack_name = xstrdup(ctx.m->pack_names[i]); > - ctx.info[ctx.nr].p = NULL; > + ctx.info[ctx.nr].p = ctx.m->packs[i]; > ctx.info[ctx.nr].expired = 0; > ctx.nr++; > } Why is this needed now and not before? From what I see in this function, nothing seems to happen to this .p pack except that they are closed later. > @@ -1096,6 +1264,15 @@ static int write_midx_internal(const char *object_dir, struct multi_pack_index * > if (ctx.info[i].p) { > close_pack(ctx.info[i].p); > free(ctx.info[i].p); > + if (ctx.m) { > + /* > + * Destroy a stale reference to the pack in > + * 'ctx.m'. > + */ > + uint32_t orig = ctx.info[i].orig_pack_int_id; > + if (orig < ctx.m->num_packs) > + ctx.m->packs[orig] = NULL; > + } > } > free(ctx.info[i].pack_name); > } Is this hunk needed? "ctx" is a local variable and will not outlast this function. I'll review the rest tomorrow. It seems like I've gotten over the most difficult patches.