I've reviewed the commit message already [1]; now let's look at the code. [1] https://lore.kernel.org/git/20201124060738.762751-1-jonathantanmy@xxxxxxxxxx/ > struct bb_commit { > struct commit_list *reverse_edges; > + struct bitmap *commit_mask; > struct bitmap *bitmap; > - unsigned selected:1; > + unsigned selected:1, > + maximal:1; The code itself probably should contain comments about the algorithm, but I'm not sure of the best way to do it. (E.g. I would say that "maximal" should be "When iteration in bitmap_builder_init() reaches this bb_commit, this is true iff none of its descendants has or will ever have the exact same commit_mask" - but then when do we explain why the commit_mask matters?) > unsigned idx; /* within selected array */ > }; > > @@ -198,7 +200,7 @@ static void bitmap_builder_init(struct bitmap_builder *bb, > { > struct rev_info revs; > struct commit *commit; > - unsigned int i; > + unsigned int i, num_maximal; > > memset(bb, 0, sizeof(*bb)); > init_bb_data(&bb->data); > @@ -210,27 +212,85 @@ static void bitmap_builder_init(struct bitmap_builder *bb, > for (i = 0; i < writer->selected_nr; i++) { > struct commit *c = writer->selected[i].commit; > struct bb_commit *ent = bb_data_at(&bb->data, c); > + > ent->selected = 1; > + ent->maximal = 1; > ent->idx = i; > + > + ent->commit_mask = bitmap_new(); > + bitmap_set(ent->commit_mask, i); > + > add_pending_object(&revs, &c->object, ""); > } > + num_maximal = writer->selected_nr; > > if (prepare_revision_walk(&revs)) > die("revision walk setup failed"); > > while ((commit = get_revision(&revs))) { > struct commit_list *p; > + struct bb_commit *c_ent; > > parse_commit_or_die(commit); > > - ALLOC_GROW(bb->commits, bb->commits_nr + 1, bb->commits_alloc); > - bb->commits[bb->commits_nr++] = commit; > + c_ent = bb_data_at(&bb->data, commit); > + > + if (c_ent->maximal) { > + if (!c_ent->selected) { > + bitmap_set(c_ent->commit_mask, num_maximal); > + num_maximal++; > + } > + > + ALLOC_GROW(bb->commits, bb->commits_nr + 1, bb->commits_alloc); > + bb->commits[bb->commits_nr++] = commit; So the order of bit assignments in the commit_mask and the order of commits in bb->commits are not the same. In the commit_mask, bits are first assigned for selected commits and then the rest for commits we discover to be maximal. But in bb->commits, the order follows the topologically-sorted iteration. This is fine, but might be worth a comment (to add to the already big comment burden...) > + } > > for (p = commit->parents; p; p = p->next) { > - struct bb_commit *ent = bb_data_at(&bb->data, p->item); > - commit_list_insert(commit, &ent->reverse_edges); > + struct bb_commit *p_ent = bb_data_at(&bb->data, p->item); > + int c_not_p, p_not_c; > + > + if (!p_ent->commit_mask) { > + p_ent->commit_mask = bitmap_new(); > + c_not_p = 1; > + p_not_c = 0; > + } else { > + c_not_p = bitmap_diff_nonzero(c_ent->commit_mask, p_ent->commit_mask); > + p_not_c = bitmap_diff_nonzero(p_ent->commit_mask, c_ent->commit_mask); > + } > + > + if (!c_not_p) > + continue; > + > + bitmap_or(p_ent->commit_mask, c_ent->commit_mask); > + > + if (p_not_c) > + p_ent->maximal = 1; > + else { > + p_ent->maximal = 0; > + free_commit_list(p_ent->reverse_edges); > + p_ent->reverse_edges = NULL; > + } > + > + if (c_ent->maximal) { > + commit_list_insert(commit, &p_ent->reverse_edges); > + } else { > + struct commit_list *cc = c_ent->reverse_edges; > + > + for (; cc; cc = cc->next) { > + if (!commit_list_contains(cc->item, p_ent->reverse_edges)) > + commit_list_insert(cc->item, &p_ent->reverse_edges); > + } > + } > } > + > + bitmap_free(c_ent->commit_mask); > + c_ent->commit_mask = NULL; > } > + > + trace2_data_intmax("pack-bitmap-write", the_repository, > + "num_selected_commits", writer->selected_nr); > + trace2_data_intmax("pack-bitmap-write", the_repository, > + "num_maximal_commits", num_maximal); > } The rest looks like a faithful implementation of the algorithm. Now let's look at the tests. > +# To ensure the logic for "maximal commits" is exercised, make > +# the repository a bit more complicated. > +# > +# other master > +# * * > +# (99 commits) (99 commits) > +# * * > +# |\ /| > +# | * octo-other octo-master * | > +# |/|\_________ ____________/|\| > +# | \ \/ __________/ | > +# | | ________/\ / | > +# * |/ * merge-right * > +# | _|__________/ \____________ | > +# |/ | \| > +# (l1) * * merge-left * (r1) > +# | / \________________________ | > +# |/ \| > +# (l2) * * (r2) > +# \____________...____________ | What does the ... represent? If a certain number of commits, it would be clearer to write that there. > +# \| > +# * (base) OK - some of the crosses are unclear, but from the bitmask given below, I know where the lines should go. > +# > +# The important part for the maximal commit algorithm is how > +# the bitmasks are extended. Assuming starting bit positions > +# for master (bit 0) and other (bit 1), and some flexibility > +# in the order that merge bases are visited, the bitmasks at > +# the end should be: > +# > +# master: 1 (maximal, selected) > +# other: 01 (maximal, selected) > +# octo-master: 1 > +# octo-other: 01 > +# merge-right: 111 (maximal) > +# (l1): 111 > +# (r1): 111 > +# merge-left: 1101 (maximal) > +# (l2): 11111 (maximal) > +# (r2): 111101 (maximal) > +# (base): 1111111 (maximal) This makes sense. (l1) and (r1) are not maximal because everything that can reach merge-right can also reach them. [snip] > test_expect_success 'full repack creates bitmaps' ' > - git repack -ad && > + GIT_TRACE2_EVENT_NESTING=4 GIT_TRACE2_EVENT="$(pwd)/trace" \ > + git repack -ad && > ls .git/objects/pack/ | grep bitmap >output && > - test_line_count = 1 output > + test_line_count = 1 output && > + grep "\"key\":\"num_selected_commits\",\"value\":\"106\"" trace && > + grep "\"key\":\"num_maximal_commits\",\"value\":\"111\"" trace >From the diagram and bit masks, I see that the important number for "maximal" is 7. Could this test be run twice - one without the crosses and one with, and we can verify that the difference between the maximal commits is 7? As it is, this 111 number is hard to verify.