[snip commit message] Thanks for the very clear commit message explaining the new algorithm. > +struct bb_commit { > + struct commit_list *children; > + struct bitmap *bitmap; > + unsigned selected:1; > + unsigned idx; /* within selected array */ > +}; > + > +define_commit_slab(bb_data, struct bb_commit); > + > +struct bitmap_builder { > + struct bb_data data; > + struct commit **commits; > + size_t commits_nr, commits_alloc; > +}; > + > +static void bitmap_builder_init(struct bitmap_builder *bb, > + struct bitmap_writer *writer) > { > struct rev_info revs; > + struct commit *commit; > + unsigned int i; > + > + memset(bb, 0, sizeof(*bb)); > + init_bb_data(&bb->data); > + > + reset_revision_walk(); > + repo_init_revisions(writer->to_pack->repo, &revs, NULL); > + revs.topo_order = 1; > + > + for (i = 0; i < writer->selected_nr; i++) { > + struct commit *c = writer->selected[i].commit; > + struct bb_commit *ent = bb_data_at(&bb->data, c); > + ent->selected = 1; > + ent->idx = i; > + add_pending_object(&revs, &c->object, ""); > + } > + > + if (prepare_revision_walk(&revs)) > + die("revision walk setup failed"); > + > + while ((commit = get_revision(&revs))) { > + struct commit_list *p; > + > + parse_commit_or_die(commit); > + > + ALLOC_GROW(bb->commits, bb->commits_nr + 1, bb->commits_alloc); > + bb->commits[bb->commits_nr++] = commit; > + > + for (p = commit->parents; p; p = p->next) { > + struct bb_commit *ent = bb_data_at(&bb->data, p->item); > + commit_list_insert(commit, &ent->children); > + } > + } > +} Looks straightforward. > +static void bitmap_builder_clear(struct bitmap_builder *bb) > +{ > + clear_bb_data(&bb->data); > + free(bb->commits); > + bb->commits_nr = bb->commits_alloc = 0; > +} I was wondering why the commit list and the children in struct bb_commit weren't cleared, but that's because they are cleared during the algorithm. So this is fine. > +static void fill_bitmap_tree(struct bitmap *bitmap, > + struct tree *tree) > +{ > + uint32_t pos; > + struct tree_desc desc; > + struct name_entry entry; > + > + /* > + * If our bit is already set, then there is nothing to do. Both this > + * tree and all of its children will be set. > + */ > + pos = find_object_pos(&tree->object.oid); > + if (bitmap_get(bitmap, pos)) > + return; > + bitmap_set(bitmap, pos); > + > + if (parse_tree(tree) < 0) > + die("unable to load tree object %s", > + oid_to_hex(&tree->object.oid)); > + init_tree_desc(&desc, tree->buffer, tree->size); > + > + while (tree_entry(&desc, &entry)) { > + switch (object_type(entry.mode)) { > + case OBJ_TREE: > + fill_bitmap_tree(bitmap, > + lookup_tree(the_repository, &entry.oid)); > + break; > + case OBJ_BLOB: > + bitmap_set(bitmap, find_object_pos(&entry.oid)); > + break; > + default: > + /* Gitlink, etc; not reachable */ > + break; > + } > + } > + > + free_tree_buffer(tree); > +} Looks straightforward. > +static void fill_bitmap_commit(struct bb_commit *ent, > + struct commit *commit) > +{ > + if (!ent->bitmap) > + ent->bitmap = bitmap_new(); > + > + /* > + * mark ourselves, but do not bother with parents; their values > + * will already have been propagated to us > + */ > + bitmap_set(ent->bitmap, find_object_pos(&commit->object.oid)); > + fill_bitmap_tree(ent->bitmap, get_commit_tree(commit)); > +} OK - when filling the bitmap for a commit, we only set the specific bit for the commit itself, and all the bits for the commit's tree and the tree's descendants. This is consistent with the explanation of the algorithm in the commit message. > +static void store_selected(struct bb_commit *ent, struct commit *commit) > +{ > + struct bitmapped_commit *stored = &writer.selected[ent->idx]; > + khiter_t hash_pos; > + int hash_ret; > + > + /* > + * the "reuse bitmaps" phase may have stored something here, but > + * our new algorithm doesn't use it. Drop it. > + */ > + if (stored->bitmap) > + ewah_free(stored->bitmap); I tried to figure out how the "reuse bitmaps" phase stores things in this field, but that led me down a rabbit hole that I didn't pursue. But anyway, the new bitmap is correctly generated, so clearing the old bitmap is safe (except, possibly, wasting time, but I see that in a subsequent patch, existing bitmaps will be reused in a new wawy). > + > + stored->bitmap = bitmap_to_ewah(ent->bitmap); > + > + hash_pos = kh_put_oid_map(writer.bitmaps, commit->object.oid, &hash_ret); > + if (hash_ret == 0) > + die("Duplicate entry when writing index: %s", > + oid_to_hex(&commit->object.oid)); > + kh_value(writer.bitmaps, hash_pos) = stored; > +} > + > +void bitmap_writer_build(struct packing_data *to_pack) > +{ > + struct bitmap_builder bb; > + size_t i; > + int nr_stored = 0; /* for progress */ > > writer.bitmaps = kh_init_oid_map(); > writer.to_pack = to_pack; > > if (writer.show_progress) > writer.progress = start_progress("Building bitmaps", writer.selected_nr); > + trace2_region_enter("pack-bitmap-write", "building_bitmaps_total", > + the_repository); > + > + bitmap_builder_init(&bb, &writer); > + for (i = bb.commits_nr; i > 0; i--) { > + struct commit *commit = bb.commits[i-1]; > + struct bb_commit *ent = bb_data_at(&bb.data, commit); > + struct commit *child; > + > + fill_bitmap_commit(ent, commit); > + > + if (ent->selected) { > + store_selected(ent, commit); > + nr_stored++; > + display_progress(writer.progress, nr_stored); > + } > + > + while ((child = pop_commit(&ent->children))) { Here the children (specifically, the struct commit_list) are freed (one by one). > + struct bb_commit *child_ent = > + bb_data_at(&bb.data, child); > + > + if (child_ent->bitmap) > + bitmap_or(child_ent->bitmap, ent->bitmap); > + else > + child_ent->bitmap = bitmap_dup(ent->bitmap); > + } > + bitmap_free(ent->bitmap); > + ent->bitmap = NULL; Here the bitmap is freed. > } > + bitmap_builder_clear(&bb); > > stop_progress(&writer.progress); > > compute_xor_offsets(); Thanks - overall this looks straightforward.