Abhishek Kumar <abhishekkumar8222@xxxxxxxxx> writes: > The member graph_pos refers to the integer position used to identify a > commit in commit-graph files. However, graph_pos is not useful in other > contexts and bloats the struct. > > Let's move it to a commit-slab and shrink the struct by four bytes. > > Existing references to graph_pos are replaced using > 'contrib/coccinelle/graph_pos.cocci'. > > Signed-off-by: Abhishek Kumar <abhishekkumar8222@xxxxxxxxx> > --- > alloc.c | 1 - > bloom.c | 6 ++-- > commit-graph.c | 50 +++++++++++++++++++++++------- > commit-graph.h | 3 ++ > commit.c | 2 +- > commit.h | 2 -- > contrib/coccinelle/graph_pos.cocci | 12 +++++++ > 7 files changed, 58 insertions(+), 18 deletions(-) > create mode 100644 contrib/coccinelle/graph_pos.cocci I have reordered the chunks to make it easier to review. > diff --git a/commit.h b/commit.h > index 01e1c4c3eb..0b10464a10 100644 > --- a/commit.h > +++ b/commit.h > @@ -10,8 +10,6 @@ > #include "pretty.h" > #include "commit-slab.h" > > -#define COMMIT_NOT_FROM_GRAPH 0xFFFFFFFF > - > struct commit_list { > struct commit *item; > struct commit_list *next; > diff --git a/commit-graph.h b/commit-graph.h > index 653bd041ad..3cb59ba336 100644 > --- a/commit-graph.h > +++ b/commit-graph.h > @@ -8,6 +8,7 @@ > #include "object-store.h" > #include "oidset.h" > > +#define COMMIT_NOT_FROM_GRAPH 0xFFFFFFFF > #define GENERATION_NUMBER_INFINITY 0xFFFFFFFF > #define GENERATION_NUMBER_MAX 0x3FFFFFFF > #define GENERATION_NUMBER_ZERO 0 Those two chunks move COMMIT_NOT_FROM_GRAPH from commit.h to commit-graph.h, because it is no longer needed in init_commit_node() from alloc.c. On the other hand the remaining commit-graph #define-s were moved in first commit in series. What I DO NOT SEE in this commit is actual *removal* of `graph_pos` field from the `struct commit`, i.e.: diff --git a/commit.h b/commit.h index cc610400d5..01e1c4c3eb 100644 --- a/commit.h +++ b/commit.h @@ -34,6 +34,5 @@ struct commit { */ struct tree *maybe_tree; - uint32_t graph_pos; unsigned int index; }; > diff --git a/alloc.c b/alloc.c > index cbed187094..f37fb3b8b6 100644 > --- a/alloc.c > +++ b/alloc.c > @@ -108,7 +108,6 @@ void init_commit_node(struct repository *r, struct commit *c) > { > c->object.type = OBJ_COMMIT; > c->index = alloc_commit_index(r); > - c->graph_pos = COMMIT_NOT_FROM_GRAPH; > } > > void *alloc_commit_node(struct repository *r) This removes commit->graph_pos initialization from init_commit_node(), and thus from alloc_commit_node(); the handling of COMMIT_NOT_FROM_GRAPH is moved to setter and getter "methods", see below. > diff --git a/commit-graph.c b/commit-graph.c > index 9ce7d4acb1..7ff460b442 100644 > --- a/commit-graph.c > +++ b/commit-graph.c > @@ -87,6 +87,34 @@ static int commit_pos_cmp(const void *va, const void *vb) > commit_pos_at(&commit_pos, b); > } > > +define_commit_slab(graph_pos_slab, uint32_t); > +static struct graph_pos_slab graph_pos_slab = COMMIT_SLAB_INIT(1, graph_pos_slab); > + > +uint32_t graph_pos(const struct commit *c) > +{ > + uint32_t *pos = graph_pos_slab_peek(&graph_pos_slab, c); > + > + return pos ? *pos : COMMIT_NOT_FROM_GRAPH; > +} > + > +static void set_graph_pos(const struct commit *c, const uint32_t position) > +{ > + unsigned int i = graph_pos_slab.slab_count; > + uint32_t *pos = graph_pos_slab_at(&graph_pos_slab, c); > + > + /* > + * commit-slab initializes with zero, overwrite this with > + * COMMIT_NOT_FROM_GRAPH > + */ > + for (; i < graph_pos_slab.slab_count; ++i) > + { > + memset(graph_pos_slab.slab[i], COMMIT_NOT_FROM_GRAPH, > + graph_pos_slab.slab_size * sizeof(uint32_t)); > + } > + > + *pos = position; > +} > + > define_commit_slab(generation_slab, uint32_t); > static struct generation_slab generation_slab = COMMIT_SLAB_INIT(1, generation_slab); > [...] > @@ -142,4 +143,6 @@ void free_commit_graph(struct commit_graph *); > void disable_commit_graph(struct repository *r); > > uint32_t generation(const struct commit *c); > + > +uint32_t graph_pos(const struct commit *c); > #endif I wonder why those helper functions: graph_pos() and set_graph_pos() were not introduced in the 1st patch of this series, together with generation() and set_generation(). The same comments as for previous patch apply: if graph_pos was not explicitely set, we want for it to be COMMIT_NOT_FROM_GRAPH (like init_commit_node() did before this change). If it is not on commit-slab, it is not set -- this is handled by graph_pos() function. However we allocate memory on slab in chunks, and set_graph_pos() ensures that those extra allocated `graph_pos` values on commit-slab are properly initialized to COMMIT_NOT_FROM_GRAPH, like init_commit_node() did. Note that we always have COMMIT_NOT_FROM_GRAPH for `graph_pos` if and only if there is GENERATION_NUMBER_INFINITY for `generation`, so perhaps putting those two together in `struct commit_graph_info` would make sense. But whether doing more work in "setter" set_*() functions or doing extra conditional in "getter" *() would give better performance needs (micro-)benchmarking. > diff --git a/contrib/coccinelle/graph_pos.cocci b/contrib/coccinelle/graph_pos.cocci > new file mode 100644 > index 0000000000..0929164bdf > --- /dev/null > +++ b/contrib/coccinelle/graph_pos.cocci > @@ -0,0 +1,12 @@ > +@@ > +struct commit *c; > +expression E; > +@@ > +- c->graph_pos = E > ++ set_graph_pos(c, E) > + > +@@ > +struct commit *c; > +@@ > +- c->graph_pos > ++ graph_pos(c) This is semantic patch that generates the rest of changes. (If any of those needs manual improvement, it is better left for a separate "manual fixup" patch, in my opinion.) > diff --git a/bloom.c b/bloom.c > index 9b86aa3f59..5bee5bb0c1 100644 > --- a/bloom.c > +++ b/bloom.c > @@ -34,14 +34,14 @@ static int load_bloom_filter_from_graph(struct commit_graph *g, > { > uint32_t lex_pos, start_index, end_index; > > - while (c->graph_pos < g->num_commits_in_base) > + while (graph_pos(c) < g->num_commits_in_base) > g = g->base_graph; > > /* The commit graph commit 'c' lives in doesn't carry bloom filters. */ > if (!g->chunk_bloom_indexes) > return 0; > > - lex_pos = c->graph_pos - g->num_commits_in_base; > + lex_pos = graph_pos(c) - g->num_commits_in_base; > > end_index = get_be32(g->chunk_bloom_indexes + 4 * lex_pos); > Here graph_pos(c) is used twice. It probably needs to be checked (with benchmark) if it matters. > @@ -188,7 +188,7 @@ struct bloom_filter *get_bloom_filter(struct repository *r, > > if (!filter->data) { > load_commit_graph_info(r, c); > - if (c->graph_pos != COMMIT_NOT_FROM_GRAPH && > + if (graph_pos(c) != COMMIT_NOT_FROM_GRAPH && > r->objects->commit_graph->chunk_bloom_indexes) { > if (load_bloom_filter_from_graph(r->objects->commit_graph, filter, c)) > return filter; > diff --git a/commit-graph.c b/commit-graph.c > index 9ce7d4acb1..7ff460b442 100644 > --- a/commit-graph.c > +++ b/commit-graph.c > @@ -697,7 +725,7 @@ static struct commit_list **insert_parent_or_die(struct repository *r, > c = lookup_commit(r, &oid); > if (!c) > die(_("could not find commit %s"), oid_to_hex(&oid)); > - c->graph_pos = pos; > + set_graph_pos(c, pos); > return &commit_list_insert(c, pptr)->next; > } > > @@ -711,7 +739,7 @@ static void fill_commit_graph_info(struct commit *item, struct commit_graph *g, > > lex_index = pos - g->num_commits_in_base; > commit_data = g->chunk_commit_data + GRAPH_DATA_WIDTH * lex_index; > - item->graph_pos = pos; > + set_graph_pos(item, pos); > set_generation(item, get_be32(commit_data + g->hash_len + 8) >> 2); > } > > @@ -741,7 +769,7 @@ static int fill_commit_in_graph(struct repository *r, > * Store the "full" position, but then use the > * "local" position for the rest of the calculation. > */ > - item->graph_pos = pos; > + set_graph_pos(item, pos); > lex_index = pos - g->num_commits_in_base; > > commit_data = g->chunk_commit_data + (g->hash_len + 16) * lex_index; > @@ -786,8 +814,8 @@ static int fill_commit_in_graph(struct repository *r, > > static int find_commit_in_graph(struct commit *item, struct commit_graph *g, uint32_t *pos) > { > - if (item->graph_pos != COMMIT_NOT_FROM_GRAPH) { > - *pos = item->graph_pos; > + if (graph_pos(item) != COMMIT_NOT_FROM_GRAPH) { > + *pos = graph_pos(item); > return 1; Here graph_pos(item) is used twice. > } else { > struct commit_graph *cur_g = g; > @@ -843,11 +871,11 @@ static struct tree *load_tree_for_commit(struct repository *r, > struct object_id oid; > const unsigned char *commit_data; > > - while (c->graph_pos < g->num_commits_in_base) > + while (graph_pos(c) < g->num_commits_in_base) > g = g->base_graph; > > commit_data = g->chunk_commit_data + > - GRAPH_DATA_WIDTH * (c->graph_pos - g->num_commits_in_base); > + GRAPH_DATA_WIDTH * (graph_pos(c) - g->num_commits_in_base); > > hashcpy(oid.hash, commit_data); > set_commit_tree(c, lookup_tree(r, &oid)); Here graph_pos(c) is used twice. > @@ -861,7 +889,7 @@ static struct tree *get_commit_tree_in_graph_one(struct repository *r, > { > if (c->maybe_tree) > return c->maybe_tree; > - if (c->graph_pos == COMMIT_NOT_FROM_GRAPH) > + if (graph_pos(c) == COMMIT_NOT_FROM_GRAPH) > BUG("get_commit_tree_in_graph_one called from non-commit-graph commit"); > > return load_tree_for_commit(r, g, (struct commit *)c); > @@ -1247,7 +1275,7 @@ static void close_reachable(struct write_commit_graph_context *ctx) > continue; > if (ctx->split) { > if ((!parse_commit(commit) && > - commit->graph_pos == COMMIT_NOT_FROM_GRAPH) || > + graph_pos(commit) == COMMIT_NOT_FROM_GRAPH) || > flags == COMMIT_GRAPH_SPLIT_REPLACE) > add_missing_parents(ctx, commit); > } else if (!parse_commit_no_graph(commit)) > @@ -1493,7 +1521,7 @@ static uint32_t count_distinct_commits(struct write_commit_graph_context *ctx) > if (ctx->split) { > struct commit *c = lookup_commit(ctx->r, &ctx->oids.list[i]); > > - if (!c || c->graph_pos != COMMIT_NOT_FROM_GRAPH) > + if (!c || graph_pos(c) != COMMIT_NOT_FROM_GRAPH) > continue; > } > > @@ -1527,7 +1555,7 @@ static void copy_oids_to_commits(struct write_commit_graph_context *ctx) > ctx->commits.list[ctx->commits.nr] = lookup_commit(ctx->r, &ctx->oids.list[i]); > > if (ctx->split && flags != COMMIT_GRAPH_SPLIT_REPLACE && > - ctx->commits.list[ctx->commits.nr]->graph_pos != COMMIT_NOT_FROM_GRAPH) > + graph_pos(ctx->commits.list[ctx->commits.nr]) != COMMIT_NOT_FROM_GRAPH) > continue; > > if (ctx->split && flags == COMMIT_GRAPH_SPLIT_REPLACE) > diff --git a/commit.c b/commit.c > index 8dad0f8446..da6de08b2b 100644 > --- a/commit.c > +++ b/commit.c > @@ -339,7 +339,7 @@ struct tree *repo_get_commit_tree(struct repository *r, > if (commit->maybe_tree || !commit->object.parsed) > return commit->maybe_tree; > > - if (commit->graph_pos != COMMIT_NOT_FROM_GRAPH) > + if (graph_pos(commit) != COMMIT_NOT_FROM_GRAPH) > return get_commit_tree_in_graph(r, commit); > > return NULL; Best, -- Jakub Narębski