Re: [GSoC][PATCH v3] t: port helper/test-hashmap.c to unit-tests/t-hashmap.c

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Fri, Jul 12, 2024 at 1:52 AM Ghanshyam Thakkar
<shyamthakkar001@xxxxxxxxx> wrote:
>
> helper/test-hashmap.c along with t0011-hashmap.sh test the hashmap.h
> library. Migrate them to the unit testing framework for better
> debugging, runtime performance and concise code.
>
> Along with the migration, make 'add' tests from the shellscript order

Nit: s/shellscript/shell script/

> agnostic in unit tests, since they iterate over entries with the same
> keys and we do not guarantee the order.

The above is not very clear. Maybe rephrasing a bit and adding an
example could help.

> The helper/test-hashmap.c is still not removed because it contains a
> performance test meant to be run by the user directly (not used in
> t/perf). And it makes sense for such a utility to be a helper.


> diff --git a/t/unit-tests/t-hashmap.c b/t/unit-tests/t-hashmap.c
> new file mode 100644
> index 0000000000..3112b10b33
> --- /dev/null
> +++ b/t/unit-tests/t-hashmap.c
> @@ -0,0 +1,358 @@
> +#include "test-lib.h"
> +#include "hashmap.h"
> +#include "strbuf.h"
> +
> +struct test_entry {
> +       int padding; /* hashmap entry no longer needs to be the first member */
> +       struct hashmap_entry ent;
> +       /* key and value as two \0-terminated strings */
> +       char key[FLEX_ARRAY];
> +};
> +
> +static int test_entry_cmp(const void *cmp_data,
> +                         const struct hashmap_entry *eptr,
> +                         const struct hashmap_entry *entry_or_key,
> +                         const void *keydata)
> +{
> +       const int ignore_case = cmp_data ? *((int *)cmp_data) : 0;
> +       const struct test_entry *e1, *e2;
> +       const char *key = keydata;
> +
> +       e1 = container_of(eptr, const struct test_entry, ent);
> +       e2 = container_of(entry_or_key, const struct test_entry, ent);
> +
> +       if (ignore_case)
> +               return strcasecmp(e1->key, key ? key : e2->key);
> +       else
> +               return strcmp(e1->key, key ? key : e2->key);
> +}
> +
> +static const char *get_value(const struct test_entry *e)
> +{
> +       return e->key + strlen(e->key) + 1;
> +}
> +
> +static struct test_entry *alloc_test_entry(unsigned int ignore_case,
> +                                          const char *key, const char *value)

Nit: `unsigned int ignore_case` is a bool flag argument which we often
put at the end of function arguments, so perhaps:

static struct test_entry *alloc_test_entry(const char *key, const char *value,
                                          unsigned int ignore_case)

> +{
> +       size_t klen = strlen(key);
> +       size_t vlen = strlen(value);
> +       unsigned int hash = ignore_case ? strihash(key) : strhash(key);
> +       struct test_entry *entry = xmalloc(st_add4(sizeof(*entry), klen, vlen, 2));
> +
> +       hashmap_entry_init(&entry->ent, hash);
> +       memcpy(entry->key, key, klen + 1);
> +       memcpy(entry->key + klen + 1, value, vlen + 1);
> +       return entry;
> +}
> +
> +static struct test_entry *get_test_entry(struct hashmap *map,
> +                                        unsigned int ignore_case, const char *key)

Here also `unsigned int ignore_case` might want to be the last argument.

> +{
> +       return hashmap_get_entry_from_hash(
> +               map, ignore_case ? strihash(key) : strhash(key), key,
> +               struct test_entry, ent);
> +}
> +
> +static int key_val_contains(const char *key_val[][2], char seen[], size_t n,
> +                           struct test_entry *entry)
> +{
> +       for (size_t i = 0; i < n; i++) {
> +               if (!strcmp(entry->key, key_val[i][0]) &&
> +                   !strcmp(get_value(entry), key_val[i][1])) {
> +                       if (seen[i])
> +                               return 2;
> +                       seen[i] = 1;
> +                       return 0;
> +               }
> +       }
> +       return 1;
> +}
> +
> +static void setup(void (*f)(struct hashmap *map, int ignore_case),
> +                 int ignore_case)

Why `int ignore_case` here, instead of `unsigned int ignore_case` above?

It's nice that the `ignore_case` argument is the last argument though.

> +{
> +       struct hashmap map = HASHMAP_INIT(test_entry_cmp, &ignore_case);
> +
> +       f(&map, ignore_case);
> +       hashmap_clear_and_free(&map, struct test_entry, ent);
> +}
> +
> +static void t_replace(struct hashmap *map, int ignore_case)

Here also, why `int ignore_case` here, instead of `unsigned int
ignore_case` towards the top of this file?

I won't mention it anymore below, but I think it might be better to
have `unsigned int ignore_case` everywhere.

> +{
> +       struct test_entry *entry;
> +
> +       entry = alloc_test_entry(ignore_case, "key1", "value1");
> +       check_pointer_eq(hashmap_put_entry(map, entry, ent), NULL);
> +
> +       entry = alloc_test_entry(ignore_case, ignore_case ? "Key1" : "key1",
> +                                "value2");
> +       entry = hashmap_put_entry(map, entry, ent);
> +       if (check(entry != NULL))
> +               check_str(get_value(entry), "value1");
> +       free(entry);
> +
> +       entry = alloc_test_entry(ignore_case, "fooBarFrotz", "value3");
> +       check_pointer_eq(hashmap_put_entry(map, entry, ent), NULL);
> +
> +       entry = alloc_test_entry(ignore_case,
> +                                ignore_case ? "foobarfrotz" : "fooBarFrotz",
> +                                "value4");

Maybe using something like "FOObarFrotZ" instead of "foobarfrotz"
would make it clear that we don't need to downcase in case we ignore
case.

> +       entry = hashmap_put_entry(map, entry, ent);
> +       if (check(entry != NULL))
> +               check_str(get_value(entry), "value3");
> +       free(entry);
> +}
> +
> +static void t_get(struct hashmap *map, int ignore_case)
> +{
> +       struct test_entry *entry;
> +       const char *key_val[][2] = { { "key1", "value1" },
> +                                    { "key2", "value2" },
> +                                    { "fooBarFrotz", "value3" },
> +                                    { ignore_case ? "key4" : "foobarfrotz", "value4" } };
> +       const char *query[][2] = {
> +               { ignore_case ? "Key1" : "key1", "value1" },
> +               { ignore_case ? "keY2" : "key2", "value2" },
> +               { ignore_case ? "foobarfrotz" : "fooBarFrotz", "value3" }
> +       };

I think adding a test case like:

               { ignore_case ? "FOOBarFrotZ" : "foobarfrotz",
                 ignore_case ? : "value3" : "value4" }

which is a bit similar to what Junio suggested, could help a bit check
that things work well especially when not ignoring the case.

> +       for (size_t i = 0; i < ARRAY_SIZE(key_val); i++) {
> +               entry = alloc_test_entry(ignore_case, key_val[i][0], key_val[i][1]);
> +               check_pointer_eq(hashmap_put_entry(map, entry, ent), NULL);
> +       }
> +
> +       for (size_t i = 0; i < ARRAY_SIZE(query); i++) {
> +               entry = get_test_entry(map, ignore_case, query[i][0]);
> +               if (check(entry != NULL))
> +                       check_str(get_value(entry), query[i][1]);
> +               else
> +                       test_msg("query key: %s", query[i][0]);
> +       }
> +
> +       check_pointer_eq(get_test_entry(map, ignore_case, "notInMap"), NULL);
> +       check_int(map->tablesize, ==, 64);
> +       check_int(hashmap_get_size(map), ==, ARRAY_SIZE(key_val));
> +}
> +
> +static void t_add(struct hashmap *map, int ignore_case)
> +{
> +       struct test_entry *entry;
> +       const char *key_val[][2] = {
> +               { "key1", "value1" },
> +               { ignore_case ? "Key1" : "key1", "value2" },
> +               { "fooBarFrotz", "value3" },
> +               { ignore_case ? "Foobarfrotz" : "fooBarFrotz", "value4" }
> +       };
> +       const char *queries[] = { "key1",
> +                                 ignore_case ? "Foobarfrotz" : "fooBarFrotz" };

It is tricky that here `queries` is not defined and doesn't contain
the same things as in the other functions above:

const char *queries[][2] = ...

Maybe changing the name to something like `query_keys` would help make
people aware that here it contains only keys.

> +       char seen[ARRAY_SIZE(key_val)] = { 0 };
> +
> +       for (size_t i = 0; i < ARRAY_SIZE(key_val); i++) {
> +               entry = alloc_test_entry(ignore_case, key_val[i][0], key_val[i][1]);
> +               hashmap_add(map, &entry->ent);
> +       }
> +
> +       for (size_t i = 0; i < ARRAY_SIZE(queries); i++) {
> +               int count = 0;
> +               entry = hashmap_get_entry_from_hash(map,
> +                       ignore_case ? strihash(queries[i]) :
> +                                     strhash(queries[i]),
> +                       queries[i], struct test_entry, ent);
> +
> +               hashmap_for_each_entry_from(map, entry, ent)
> +               {
> +                       int ret;
> +                       if (!check_int((ret = key_val_contains(
> +                                               key_val, seen,
> +                                               ARRAY_SIZE(key_val), entry)),
> +                                      ==, 0)) {
> +                               switch (ret) {
> +                               case 1:
> +                                       test_msg("found entry was not given in the input\n"
> +                                                "    key: %s\n  value: %s",
> +                                                entry->key, get_value(entry));
> +                                       break;
> +                               case 2:
> +                                       test_msg("duplicate entry detected\n"
> +                                                "    key: %s\n  value: %s",
> +                                                entry->key, get_value(entry));
> +                                       break;
> +                               }
> +                       } else {
> +                               count++;
> +                       }
> +               }
> +               check_int(count, ==, 2);
> +       }
> +
> +       for (size_t i = 0; i < ARRAY_SIZE(seen); i++) {
> +               if (!check_int(seen[i], ==, 1))
> +                       test_msg("following key-val pair was not iterated over:\n"
> +                                "    key: %s\n  value: %s",
> +                                key_val[i][0], key_val[i][1]);
> +       }
> +
> +       check_int(hashmap_get_size(map), ==, ARRAY_SIZE(key_val));
> +       check_pointer_eq(get_test_entry(map, ignore_case, "notInMap"), NULL);
> +}

Thanks!





[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]

  Powered by Linux