Re: [PATCH 06/30] cache.h: have base_name_compare() take "is tree?", not "mode"

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Mon, Mar 8, 2021 at 7:07 AM Ævar Arnfjörð Bjarmason <avarab@xxxxxxxxx> wrote:
>
> Change the base_name_compare() API and the related df_name_compare()
> function to take a boolean argument indicating whether the entry is a
> tree or not, instead of having them call S_ISDIR(mode) on their own.
>
> This makes use of the new "object_type" field in the "name_entry".
>
> The API being modified here was originally added way back in
> 958ba6c96eb (Introduce "base_name_compare()" helper function,
> 2005-05-20).
>
> Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@xxxxxxxxx>
> ---
>  builtin/fast-import.c |  8 ++++----
>  builtin/mktree.c      |  4 ++--
>  cache.h               |  4 ++--
>  combine-diff.c        |  8 +++++---
>  match-trees.c         |  6 ++++--
>  merge-ort.c           |  4 ++--
>  merge-recursive.c     |  6 +++---
>  read-cache.c          | 16 ++++++++--------
>  tree-diff.c           |  7 +++++--
>  unpack-trees.c        | 15 ++++++++-------
>  10 files changed, 43 insertions(+), 35 deletions(-)
>
> diff --git a/builtin/fast-import.c b/builtin/fast-import.c
> index dd4d09ceceb..ce4613c1595 100644
> --- a/builtin/fast-import.c
> +++ b/builtin/fast-import.c
> @@ -1288,8 +1288,8 @@ static int tecmp0 (const void *_a, const void *_b)
>         struct tree_entry *a = *((struct tree_entry**)_a);
>         struct tree_entry *b = *((struct tree_entry**)_b);
>         return base_name_compare(
> -               a->name->str_dat, a->name->str_len, a->versions[0].mode,
> -               b->name->str_dat, b->name->str_len, b->versions[0].mode);
> +               a->name->str_dat, a->name->str_len, 1,
> +               b->name->str_dat, b->name->str_len, 1);
>  }
>
>  static int tecmp1 (const void *_a, const void *_b)
> @@ -1297,8 +1297,8 @@ static int tecmp1 (const void *_a, const void *_b)
>         struct tree_entry *a = *((struct tree_entry**)_a);
>         struct tree_entry *b = *((struct tree_entry**)_b);
>         return base_name_compare(
> -               a->name->str_dat, a->name->str_len, a->versions[1].mode,
> -               b->name->str_dat, b->name->str_len, b->versions[1].mode);
> +               a->name->str_dat, a->name->str_len, 1,
> +               b->name->str_dat, b->name->str_len, 1);
>  }

You've treated all files as directories, and in doing so broken the
sorting order of paths like the following
    foo
    foo.txt
foo needs to sort after foo.txt if it's a directory, and before
foo.txt if it's a file; that's the whole reason base_name_compare()
wants to know is_tree-ness.

It looks like we have a whole in our regression test coverage, since
they didn't catch this breakage.  (git will accept and work with trees
in the wrong sort-order, but it can create confusing diffs and
whatnot.)

>  static void mktree(struct tree_content *t, int v, struct strbuf *b)
> diff --git a/builtin/mktree.c b/builtin/mktree.c
> index 891991b00d6..2c1973229ac 100644
> --- a/builtin/mktree.c
> +++ b/builtin/mktree.c
> @@ -37,8 +37,8 @@ static int ent_compare(const void *a_, const void *b_)
>  {
>         struct treeent *a = *(struct treeent **)a_;
>         struct treeent *b = *(struct treeent **)b_;
> -       return base_name_compare(a->name, a->len, a->mode,
> -                                b->name, b->len, b->mode);
> +       return base_name_compare(a->name, a->len, S_ISDIR(a->mode),
> +                                b->name, b->len, S_ISDIR(b->mode));
>  }
>
>  static void write_tree(struct object_id *oid)
> diff --git a/cache.h b/cache.h
> index e513f0ee5b4..49994dae916 100644
> --- a/cache.h
> +++ b/cache.h
> @@ -1506,8 +1506,8 @@ int repo_interpret_branch_name(struct repository *r,
>
>  int validate_headref(const char *ref);
>
> -int base_name_compare(const char *name1, int len1, int mode1, const char *name2, int len2, int mode2);
> -int df_name_compare(const char *name1, int len1, int mode1, const char *name2, int len2, int mode2);
> +int base_name_compare(const char *name1, int len1, int isdir1, const char *name2, int len2, int isdir2);
> +int df_name_compare(const char *name1, int len1, int isdir1, const char *name2, int len2, int isdir2);

A very minor nit/question:

Do we want to call this isdir or istree?  I know we use S_ISDIR() but
that's because it's a pre-existing macro pre-dating git.  As you
mentioned in your commit message, we want to know whether the object
in question is a git "tree", so I'm inclined to go with istree.

But, I'm probably bikeshedding here.

>  int name_compare(const char *name1, size_t len1, const char *name2, size_t len2);
>  int cache_name_stage_compare(const char *name1, int len1, int stage1, const char *name2, int len2, int stage2);
>
> diff --git a/combine-diff.c b/combine-diff.c
> index 9228aebc16b..64d7aaf1710 100644
> --- a/combine-diff.c
> +++ b/combine-diff.c
> @@ -16,11 +16,13 @@
>  static int compare_paths(const struct combine_diff_path *one,
>                           const struct diff_filespec *two)
>  {
> -       if (!S_ISDIR(one->mode) && !S_ISDIR(two->mode))
> +       int isdir_one = S_ISDIR(one->mode);
> +       int isdir_two = S_ISDIR(two->mode);
> +       if (!isdir_one && !isdir_two)
>                 return strcmp(one->path, two->path);
>
> -       return base_name_compare(one->path, strlen(one->path), one->mode,
> -                                two->path, strlen(two->path), two->mode);
> +       return base_name_compare(one->path, strlen(one->path), isdir_one,
> +                                two->path, strlen(two->path), isdir_two);
>  }
>
>  static int filename_changed(char status)
> diff --git a/match-trees.c b/match-trees.c
> index 1011357ad0c..4d124a0fd1b 100644
> --- a/match-trees.c
> +++ b/match-trees.c
> @@ -67,8 +67,10 @@ static void *fill_tree_desc_strict(struct tree_desc *desc,
>  static int base_name_entries_compare(const struct name_entry *a,
>                                      const struct name_entry *b)
>  {
> -       return base_name_compare(a->path, tree_entry_len(a), a->mode,
> -                                b->path, tree_entry_len(b), b->mode);
> +       int isdira = a->object_type == OBJ_TREE;
> +       int isdirb = b->object_type == OBJ_TREE;
> +       return base_name_compare(a->path, tree_entry_len(a), isdira,
> +                                b->path, tree_entry_len(b), isdirb);
>  }
>
>  /*
> diff --git a/merge-ort.c b/merge-ort.c
> index 603d30c5217..4075d13aaab 100644
> --- a/merge-ort.c
> +++ b/merge-ort.c
> @@ -2390,8 +2390,8 @@ static int tree_entry_order(const void *a_, const void *b_)
>
>         const struct merged_info *ami = a->util;
>         const struct merged_info *bmi = b->util;
> -       return base_name_compare(a->string, strlen(a->string), ami->result.mode,
> -                                b->string, strlen(b->string), bmi->result.mode);
> +       return base_name_compare(a->string, strlen(a->string), S_ISDIR(ami->result.mode),
> +                                b->string, strlen(b->string), S_ISDIR(bmi->result.mode));
>  }
>
>  static void write_tree(struct object_id *result_oid,
> diff --git a/merge-recursive.c b/merge-recursive.c
> index 1593f374495..0baa6b5ca56 100644
> --- a/merge-recursive.c
> +++ b/merge-recursive.c
> @@ -554,12 +554,12 @@ static int string_list_df_name_compare(const char *one, const char *two)
>          *
>          * To achieve this, we sort with df_name_compare and provide
>          * the mode S_IFDIR so that D/F conflicts will sort correctly.
> -        * We use the mode S_IFDIR for everything else for simplicity,
> +        * We say we have a directory for everything else for simplicity,
>          * since in other cases any changes in their order due to
>          * sorting cause no problems for us.
>          */
> -       int cmp = df_name_compare(one, onelen, S_IFDIR,
> -                                 two, twolen, S_IFDIR);
> +       int cmp = df_name_compare(one, onelen, 1, two, twolen, 1);
> +
>         /*
>          * Now that 'foo' and 'foo/bar' compare equal, we have to make sure
>          * that 'foo' comes before 'foo/bar'.
> diff --git a/read-cache.c b/read-cache.c
> index 1e9a50c6c73..edfce1f0cb8 100644
> --- a/read-cache.c
> +++ b/read-cache.c
> @@ -462,8 +462,8 @@ int ie_modified(struct index_state *istate,
>         return 0;
>  }
>
> -int base_name_compare(const char *name1, int len1, int mode1,
> -                     const char *name2, int len2, int mode2)
> +int base_name_compare(const char *name1, int len1, int isdir1,
> +                     const char *name2, int len2, int isdir2)
>  {
>         unsigned char c1, c2;
>         int len = len1 < len2 ? len1 : len2;
> @@ -474,9 +474,9 @@ int base_name_compare(const char *name1, int len1, int mode1,
>                 return cmp;
>         c1 = name1[len];
>         c2 = name2[len];
> -       if (!c1 && S_ISDIR(mode1))
> +       if (!c1 && isdir1)
>                 c1 = '/';
> -       if (!c2 && S_ISDIR(mode2))
> +       if (!c2 && isdir2)
>                 c2 = '/';
>         return (c1 < c2) ? -1 : (c1 > c2) ? 1 : 0;
>  }
> @@ -491,8 +491,8 @@ int base_name_compare(const char *name1, int len1, int mode1,
>   * This is used by routines that want to traverse the git namespace
>   * but then handle conflicting entries together when possible.
>   */
> -int df_name_compare(const char *name1, int len1, int mode1,
> -                   const char *name2, int len2, int mode2)
> +int df_name_compare(const char *name1, int len1, int isdir1,
> +                   const char *name2, int len2, int isdir2)
>  {
>         int len = len1 < len2 ? len1 : len2, cmp;
>         unsigned char c1, c2;
> @@ -504,10 +504,10 @@ int df_name_compare(const char *name1, int len1, int mode1,
>         if (len1 == len2)
>                 return 0;
>         c1 = name1[len];
> -       if (!c1 && S_ISDIR(mode1))
> +       if (!c1 && isdir1)
>                 c1 = '/';
>         c2 = name2[len];
> -       if (!c2 && S_ISDIR(mode2))
> +       if (!c2 && isdir2)
>                 c2 = '/';
>         if (c1 == '/' && !c2)
>                 return 0;
> diff --git a/tree-diff.c b/tree-diff.c
> index 7cebbb327e2..f133834597c 100644
> --- a/tree-diff.c
> +++ b/tree-diff.c
> @@ -50,6 +50,7 @@ static int tree_entry_pathcmp(struct tree_desc *t1, struct tree_desc *t2)
>  {
>         struct name_entry *e1, *e2;
>         int cmp;
> +       int e1_is_tree, e2_is_tree;
>
>         /* empty descriptors sort after valid tree entries */
>         if (!t1->size)
> @@ -58,9 +59,11 @@ static int tree_entry_pathcmp(struct tree_desc *t1, struct tree_desc *t2)
>                 return -1;
>
>         e1 = &t1->entry;
> +       e1_is_tree = e1->object_type == OBJ_TREE;

This would read slightly clearer with parenthesis around the right hand side.

>         e2 = &t2->entry;
> -       cmp = base_name_compare(e1->path, tree_entry_len(e1), e1->mode,
> -                               e2->path, tree_entry_len(e2), e2->mode);
> +       e2_is_tree = e2->object_type == OBJ_TREE;

Same.

> +       cmp = base_name_compare(e1->path, tree_entry_len(e1), e1_is_tree,
> +                               e2->path, tree_entry_len(e2), e2_is_tree);
>         return cmp;
>  }
>
> diff --git a/unpack-trees.c b/unpack-trees.c
> index f5f668f532d..802f7771d75 100644
> --- a/unpack-trees.c
> +++ b/unpack-trees.c
> @@ -922,7 +922,7 @@ static int traverse_trees_recursive(int n, unsigned long dirmask,
>  static int do_compare_entry_piecewise(const struct cache_entry *ce,
>                                       const struct traverse_info *info,
>                                       const char *name, size_t namelen,
> -                                     unsigned mode)
> +                                     unsigned is_tree)

isdir elsewhere and is_tree here?  ;-)

>  {
>         int pathlen, ce_len;
>         const char *ce_name;
> @@ -930,7 +930,7 @@ static int do_compare_entry_piecewise(const struct cache_entry *ce,
>         if (info->prev) {
>                 int cmp = do_compare_entry_piecewise(ce, info->prev,
>                                                      info->name, info->namelen,
> -                                                    info->mode);
> +                                                    S_ISDIR(info->mode));
>                 if (cmp)
>                         return cmp;
>         }
> @@ -944,13 +944,13 @@ static int do_compare_entry_piecewise(const struct cache_entry *ce,
>         ce_len -= pathlen;
>         ce_name = ce->name + pathlen;
>
> -       return df_name_compare(ce_name, ce_len, S_IFREG, name, namelen, mode);
> +       return df_name_compare(ce_name, ce_len, 0, name, namelen, is_tree);
>  }
>
>  static int do_compare_entry(const struct cache_entry *ce,
>                             const struct traverse_info *info,
>                             const char *name, size_t namelen,
> -                           unsigned mode)
> +                           unsigned is_tree)
>  {
>         int pathlen, ce_len;
>         const char *ce_name;
> @@ -962,7 +962,7 @@ static int do_compare_entry(const struct cache_entry *ce,
>          * it is quicker to use the precomputed version.
>          */
>         if (!info->traverse_path)
> -               return do_compare_entry_piecewise(ce, info, name, namelen, mode);
> +               return do_compare_entry_piecewise(ce, info, name, namelen, is_tree);
>
>         cmp = strncmp(ce->name, info->traverse_path, info->pathlen);
>         if (cmp)
> @@ -977,12 +977,13 @@ static int do_compare_entry(const struct cache_entry *ce,
>         ce_len -= pathlen;
>         ce_name = ce->name + pathlen;
>
> -       return df_name_compare(ce_name, ce_len, S_IFREG, name, namelen, mode);
> +       return df_name_compare(ce_name, ce_len, 0, name, namelen, is_tree);
>  }
>
>  static int compare_entry(const struct cache_entry *ce, const struct traverse_info *info, const struct name_entry *n)
>  {
> -       int cmp = do_compare_entry(ce, info, n->path, n->pathlen, n->mode);
> +       int is_tree = n->object_type == OBJ_TREE;

Another place where parentheses would speed my reading comprehension
just slightly.


> +       int cmp = do_compare_entry(ce, info, n->path, n->pathlen, is_tree);
>         if (cmp)
>                 return cmp;
>
> --
> 2.31.0.rc0.126.g04f22c5b82




[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]

  Powered by Linux