Re: [PATCH v2 5/7] reset: make sparse-aware (except --mixed)

Victoria Dye <vdye@xxxxxxxxxx> · Wed, 6 Oct 2021 16:56:13 -0400

Elijah Newren wrote:
>> +static void prime_cache_tree_sparse_dir(struct repository *r,
>> +                                       struct cache_tree *it,
>> +                                       struct tree *tree,
>> +                                       struct strbuf *tree_path)
>> +{
>> +
>> +       oidcpy(&it->oid, &tree->object.oid);
>> +       it->entry_count = 1;
>> +       return;
> 
> Why are 'r' and 'tree_path' passed to this function?
> 

I mindlessly copied the function signature of `prime_cache_tree_rec` and
didn't notice those variables weren't needed (I'll remove them in V3).

>> +}
>> +
>>  static void prime_cache_tree_rec(struct repository *r,
>>                                  struct cache_tree *it,
>> -                                struct tree *tree)
>> +                                struct tree *tree,
>> +                                struct strbuf *tree_path)
>>  {
>> +       struct strbuf subtree_path = STRBUF_INIT;
>>         struct tree_desc desc;
>>         struct name_entry entry;
>>         int cnt;
>>
>>         oidcpy(&it->oid, &tree->object.oid);
>> +
> 
> Why the blank line addition here?
> 

My goal was to visually separate the parts of `prime_cache_tree_rec` that
update the properties of the `tree` itself and the parts that deal with its
entries. For me, it was helpful when reading and understanding what this
function does and seemed like an good (minor) readability change.

>>         init_tree_desc(&desc, tree->buffer, tree->size);
>>         cnt = 0;
>>         while (tree_entry(&desc, &entry)) {
>> @@ -757,27 +771,49 @@ static void prime_cache_tree_rec(struct repository *r,
>>                 else {
>>                         struct cache_tree_sub *sub;
>>                         struct tree *subtree = lookup_tree(r, &entry.oid);
>> +
>>                         if (!subtree->object.parsed)
>>                                 parse_tree(subtree);
>>                         sub = cache_tree_sub(it, entry.path);
>>                         sub->cache_tree = cache_tree();
>> -                       prime_cache_tree_rec(r, sub->cache_tree, subtree);
> 
>> +                       strbuf_reset(&subtree_path);
>> +                       strbuf_grow(&subtree_path, tree_path->len + entry.pathlen + 1);
>> +                       strbuf_addbuf(&subtree_path, tree_path);
>> +                       strbuf_add(&subtree_path, entry.path, entry.pathlen);
>> +                       strbuf_addch(&subtree_path, '/');
> 
> Reconstructing the full path each time?  And despite only being useful
> for the sparse-index case?
> 
> Would it be better to drop subtree_path from this function, then
> append entry.path + '/' here to tree_path, and then after the if-block
> below, call strbuf_setlen to remove the part that this function call
> added?  That way, we don't need subtree_path, and don't have to copy
> the leading path every time.
> 
> Also, maybe it'd be better to only do this strbuf manipulation if
> r->index->sparse_index, since it's not ever used otherwise?
> 

[...]

>> -static int index_name_stage_pos(struct index_state *istate, const char *name, int namelen, int stage)
>> +static int index_name_stage_pos(struct index_state *istate,
>> +                               const char *name, int namelen,
>> +                               int stage,
>> +                               int search_sparse)
> 
> It'd be nicer to make search_sparse an enum defined within this file, so that...
> 
>>  {
>>         int first, last;
>>
>> @@ -570,7 +573,7 @@ static int index_name_stage_pos(struct index_state *istate, const char *name, in
>>                 first = next+1;
>>         }
>>
>> -       if (istate->sparse_index &&
>> +       if (search_sparse && istate->sparse_index &&
>>             first > 0) {
>>                 /* Note: first <= istate->cache_nr */
>>                 struct cache_entry *ce = istate->cache[first - 1];
>> @@ -586,7 +589,7 @@ static int index_name_stage_pos(struct index_state *istate, const char *name, in
>>                     ce_namelen(ce) < namelen &&
>>                     !strncmp(name, ce->name, ce_namelen(ce))) {
>>                         ensure_full_index(istate);
>> -                       return index_name_stage_pos(istate, name, namelen, stage);
>> +                       return index_name_stage_pos(istate, name, namelen, stage, search_sparse);
>>                 }
>>         }
>>
>> @@ -595,7 +598,12 @@ static int index_name_stage_pos(struct index_state *istate, const char *name, in
>>
>>  int index_name_pos(struct index_state *istate, const char *name, int namelen)
>>  {
>> -       return index_name_stage_pos(istate, name, namelen, 0);
>> +       return index_name_stage_pos(istate, name, namelen, 0, 1);
> 
> ...this could use SEARCH_SPARSE or some name like that which is more
> meaningful than "1" here.
> 
>> +}
>> +
>> +int index_entry_exists(struct index_state *istate, const char *name, int namelen)
>> +{
>> +       return index_name_stage_pos(istate, name, namelen, 0, 0) >= 0;
> 
> ...and likewise this spot could use SEARCH_FULL or some name like
> that, which is more meaningful than the second "0".
> 
> Similarly for multiple call sites below...
> 
> 

I like all of these suggestions and will include them in the next version. Thanks!