This looks like a good conversion to me. There are a few small improvements that could be made, but mostly LGTM. Comments are inline below. On 2024.07.08 21:45, Ghanshyam Thakkar wrote: > helper/test-hashmap.c along with t0011-hashmap.sh test the hashmap.h > library. Migrate them to the unit testing framework for better > debugging, runtime performance and consice code. Typo: s/consice/concise/ > Along with the migration, make 'add' tests from the shellscript order > agnostic in unit tests, since they iterate over entries with the same > keys and we do not guarantee the order. > > The helper/test-hashmap.c is still not removed because it contains a > performance test meant to be run by the user directly (not used in > t/perf). And it makes sense for such a utility to be a helper. > > Mentored-by: Christian Couder <chriscool@xxxxxxxxxxxxx> > Mentored-by: Kaartic Sivaraam <kaartic.sivaraam@xxxxxxxxx> > Signed-off-by: Ghanshyam Thakkar <shyamthakkar001@xxxxxxxxx> > --- > The changes in v2 are inspired from the review of another similar > test t-oidmap: https://lore.kernel.org/git/16e06a6d-5fd0-4132-9d82-5c6f13b7f9ed@xxxxxxxxx/ > > The v2 also includes some formatting corrections and one of the > testcases, t_add(), was changed to be more similar to the original. > > Makefile | 1 + > t/helper/test-hashmap.c | 100 +---------- > t/t0011-hashmap.sh | 260 ---------------------------- > t/unit-tests/t-hashmap.c | 359 +++++++++++++++++++++++++++++++++++++++ > 4 files changed, 362 insertions(+), 358 deletions(-) > delete mode 100755 t/t0011-hashmap.sh > create mode 100644 t/unit-tests/t-hashmap.c [snip] > diff --git a/t/unit-tests/t-hashmap.c b/t/unit-tests/t-hashmap.c > new file mode 100644 > index 0000000000..1c951fcfd8 > --- /dev/null > +++ b/t/unit-tests/t-hashmap.c > @@ -0,0 +1,359 @@ > +#include "test-lib.h" > +#include "hashmap.h" > +#include "strbuf.h" > + > +struct test_entry { > + int padding; /* hashmap entry no longer needs to be the first member */ > + struct hashmap_entry ent; > + /* key and value as two \0-terminated strings */ > + char key[FLEX_ARRAY]; > +}; > + > +static int test_entry_cmp(const void *cmp_data, > + const struct hashmap_entry *eptr, > + const struct hashmap_entry *entry_or_key, > + const void *keydata) > +{ > + const int ignore_case = cmp_data ? *((int *)cmp_data) : 0; > + const struct test_entry *e1, *e2; > + const char *key = keydata; > + > + e1 = container_of(eptr, const struct test_entry, ent); > + e2 = container_of(entry_or_key, const struct test_entry, ent); > + > + if (ignore_case) > + return strcasecmp(e1->key, key ? key : e2->key); > + else > + return strcmp(e1->key, key ? key : e2->key); > +} > + > +static const char *get_value(const struct test_entry *e) > +{ > + return e->key + strlen(e->key) + 1; > +} > + > +static struct test_entry *alloc_test_entry(unsigned int ignore_case, > + const char *key, const char *value) > +{ > + size_t klen = strlen(key); > + size_t vlen = strlen(value); > + unsigned int hash = ignore_case ? strihash(key) : strhash(key); > + struct test_entry *entry = xmalloc(st_add4(sizeof(*entry), klen, vlen, 2)); > + > + hashmap_entry_init(&entry->ent, hash); > + memcpy(entry->key, key, klen + 1); > + memcpy(entry->key + klen + 1, value, vlen + 1); > + return entry; > +} So we're duplicating `struct test_entry`, `test_entry_cmp()`, and `alloc_test_entry()`, which have (almost) identical definitions in t/helper/test-hashmap.c. I wonder if it's worth splitting these into a separate .c file. Maybe it's too much of a pain to add Makefile rules to share objects across the test helper and the unit tests. Something to keep in mind I guess, if we find that we want to share more code than this. > +static struct test_entry *get_test_entry(struct hashmap *map, > + unsigned int ignore_case, const char *key) > +{ > + return hashmap_get_entry_from_hash( > + map, ignore_case ? strihash(key) : strhash(key), key, > + struct test_entry, ent); > +} > + > +static int key_val_contains(const char *key_val[][3], size_t n, > + struct test_entry *entry) > +{ > + for (size_t i = 0; i < n; i++) { > + if (!strcmp(entry->key, key_val[i][0]) && > + !strcmp(get_value(entry), key_val[i][1])) { > + if (!strcmp(key_val[i][2], "USED")) > + return 2; > + key_val[i][2] = "USED"; > + return 0; > + } > + } > + return 1; > +} > + > +static void setup(void (*f)(struct hashmap *map, int ignore_case), > + int ignore_case) > +{ > + struct hashmap map = HASHMAP_INIT(test_entry_cmp, &ignore_case); > + > + f(&map, ignore_case); > + hashmap_clear_and_free(&map, struct test_entry, ent); > +} As I mentioned in my review [1] on René's TEST_RUN series, I don't think we get much value out of having a setup + callback approach when the setup is minimal. Would you consider rewriting a v2 using TEST_RUN once that is ready in `next`? [1] https://lore.kernel.org/git/tswyfparvchgi7qxrjxbx4eb7cohypzekjqzbnkbffsesaiazs@vtewtz7o6twi/ > +static void t_put(struct hashmap *map, int ignore_case) > +{ > + struct test_entry *entry; > + const char *key_val[][2] = { { "key1", "value1" }, > + { "key2", "value2" }, > + { "fooBarFrotz", "value3" } }; > + > + for (size_t i = 0; i < ARRAY_SIZE(key_val); i++) { > + entry = alloc_test_entry(ignore_case, key_val[i][0], key_val[i][1]); > + check(hashmap_put_entry(map, entry, ent) == NULL); > + } > + > + entry = alloc_test_entry(ignore_case, "foobarfrotz", "value4"); > + entry = hashmap_put_entry(map, entry, ent); > + check(ignore_case ? entry != NULL : entry == NULL); > + free(entry); > + > + check_int(map->tablesize, ==, 64); > + check_int(hashmap_get_size(map), ==, > + ignore_case ? ARRAY_SIZE(key_val) : ARRAY_SIZE(key_val) + 1); > +} Ahhh, so you're using the same function for both case-sensitive and -insensitive tests. So I guess TEST_RUN isn't useful here after all. Personally I'd still rather get rid of setup(), but I don't feel super strongly about it. > +static void t_replace(struct hashmap *map, int ignore_case) > +{ > + struct test_entry *entry; > + > + entry = alloc_test_entry(ignore_case, "key1", "value1"); > + check(hashmap_put_entry(map, entry, ent) == NULL); > + > + entry = alloc_test_entry(ignore_case, ignore_case ? "Key1" : "key1", > + "value2"); > + entry = hashmap_put_entry(map, entry, ent); > + if (check(entry != NULL)) > + check_str(get_value(entry), "value1"); > + free(entry); > + > + entry = alloc_test_entry(ignore_case, "fooBarFrotz", "value3"); > + check(hashmap_put_entry(map, entry, ent) == NULL); > + > + entry = alloc_test_entry(ignore_case, > + ignore_case ? "foobarfrotz" : "fooBarFrotz", > + "value4"); > + entry = hashmap_put_entry(map, entry, ent); > + if (check(entry != NULL)) > + check_str(get_value(entry), "value3"); > + free(entry); > +} > + > +static void t_get(struct hashmap *map, int ignore_case) > +{ > + struct test_entry *entry; > + const char *key_val[][2] = { { "key1", "value1" }, > + { "key2", "value2" }, > + { "fooBarFrotz", "value3" }, > + { ignore_case ? "key4" : "foobarfrotz", "value4" } }; > + const char *query[][2] = { > + { ignore_case ? "Key1" : "key1", "value1" }, > + { ignore_case ? "keY2" : "key2", "value2" }, > + { ignore_case ? "foobarfrotz" : "fooBarFrotz", "value3" } > + }; > + > + for (size_t i = 0; i < ARRAY_SIZE(key_val); i++) { > + entry = alloc_test_entry(ignore_case, key_val[i][0], key_val[i][1]); > + check(hashmap_put_entry(map, entry, ent) == NULL); > + } > + > + for (size_t i = 0; i < ARRAY_SIZE(query); i++) { > + entry = get_test_entry(map, ignore_case, query[i][0]); > + if (check(entry != NULL)) > + check_str(get_value(entry), query[i][1]); > + } > + > + check(get_test_entry(map, ignore_case, "notInMap") == NULL); > +} > + > +static void t_add(struct hashmap *map, int ignore_case) > +{ > + struct test_entry *entry; > + const char *key_val[][3] = { > + { "key1", "value1", "UNUSED" }, > + { ignore_case ? "Key1" : "key1", "value2", "UNUSED" }, > + { "fooBarFrotz", "value3", "UNUSED" }, > + { ignore_case ? "Foobarfrotz" : "fooBarFrotz", "value4", "UNUSED" } > + }; > + const char *queries[] = { "key1", > + ignore_case ? "Foobarfrotz" : "fooBarFrotz" }; > + > + for (size_t i = 0; i < ARRAY_SIZE(key_val); i++) { > + entry = alloc_test_entry(ignore_case, key_val[i][0], key_val[i][1]); > + hashmap_add(map, &entry->ent); > + } > + > + for (size_t i = 0; i < ARRAY_SIZE(queries); i++) { Since we only have one query, can we remove the loop and simplify the following block of code? Also (here and elsewhere), it might be less confusing to say "UNSEEN" / "SEEN" instead of "UNUSED" / "USED". The latter makes it sound to me like there's some API requirement to have a 3-item array that we don't actually need, but in this case those fields are actually used in key_val_contains() to track duplicates. > + int count = 0; > + entry = hashmap_get_entry_from_hash(map, > + ignore_case ? strihash(queries[i]) : > + strhash(queries[i]), > + queries[i], struct test_entry, ent); > + > + hashmap_for_each_entry_from(map, entry, ent) > + { > + int ret; > + if (!check_int((ret = key_val_contains( > + key_val, ARRAY_SIZE(key_val), > + entry)), ==, 0)) { > + switch (ret) { > + case 1: > + test_msg("found entry was not given in the input\n" > + " key: %s\n value: %s", > + entry->key, get_value(entry)); > + break; > + case 2: > + test_msg("duplicate entry detected\n" > + " key: %s\n value: %s", > + entry->key, get_value(entry)); > + break; > + } > + } else { > + count++; > + } > + } > + check_int(count, ==, 2); > + } > + check_int(hashmap_get_size(map), ==, ARRAY_SIZE(key_val)); > + check(get_test_entry(map, ignore_case, "notInMap") == NULL); > +} > + > +static void t_remove(struct hashmap *map, int ignore_case) > +{ > + struct test_entry *entry, *removed; > + const char *key_val[][2] = { { "key1", "value1" }, > + { "key2", "value2" }, > + { "fooBarFrotz", "value3" } }; > + const char *remove[][2] = { { ignore_case ? "Key1" : "key1", "value1" }, > + { ignore_case ? "keY2" : "key2", "value2" } }; > + > + for (size_t i = 0; i < ARRAY_SIZE(key_val); i++) { > + entry = alloc_test_entry(ignore_case, key_val[i][0], key_val[i][1]); > + check(hashmap_put_entry(map, entry, ent) == NULL); > + } > + > + for (size_t i = 0; i < ARRAY_SIZE(remove); i++) { > + entry = alloc_test_entry(ignore_case, remove[i][0], ""); > + removed = hashmap_remove_entry(map, entry, ent, remove[i][0]); > + if (check(removed != NULL)) > + check_str(get_value(removed), remove[i][1]); > + free(entry); > + free(removed); > + } > + > + entry = alloc_test_entry(ignore_case, "notInMap", ""); > + check(hashmap_remove_entry(map, entry, ent, "notInMap") == NULL); > + free(entry); > +} Is there a reason why you don't check the table size as the shell test did? (similar to what you have in t_put()) > +static void t_iterate(struct hashmap *map, int ignore_case) > +{ > + struct test_entry *entry; > + struct hashmap_iter iter; > + const char *key_val[][3] = { { "key1", "value1", "UNUSED" }, > + { "key2", "value2", "UNUSED" }, > + { "fooBarFrotz", "value3", "UNUSED" } }; > + int count = 0; > + > + for (size_t i = 0; i < ARRAY_SIZE(key_val); i++) { > + entry = alloc_test_entry(ignore_case, key_val[i][0], key_val[i][1]); > + check(hashmap_put_entry(map, entry, ent) == NULL); > + } > + > + hashmap_for_each_entry(map, &iter, entry, ent /* member name */) > + { > + int ret; > + if (!check_int((ret = key_val_contains(key_val, ARRAY_SIZE(key_val), > + entry)), ==, 0)) { > + switch (ret) { > + case 1: > + test_msg("found entry was not given in the input\n" > + " key: %s\n value: %s", > + entry->key, get_value(entry)); > + break; > + case 2: > + test_msg("duplicate entry detected\n" > + " key: %s\n value: %s", > + entry->key, get_value(entry)); > + break; > + } > + } else { > + count++; > + } > + } > + check_int(count, ==, ARRAY_SIZE(key_val)); > + check_int(hashmap_get_size(map), ==, ARRAY_SIZE(key_val)); > +} > + > +static void t_alloc(struct hashmap *map, int ignore_case) > +{ > + struct test_entry *entry, *removed; > + > + for (int i = 1; i <= 51; i++) { > + char *key = xstrfmt("key%d", i); > + char *value = xstrfmt("value%d", i); > + entry = alloc_test_entry(ignore_case, key, value); > + check(hashmap_put_entry(map, entry, ent) == NULL); > + free(key); > + free(value); > + } > + check_int(map->tablesize, ==, 64); > + check_int(hashmap_get_size(map), ==, 51); > + > + entry = alloc_test_entry(ignore_case, "key52", "value52"); > + check(hashmap_put_entry(map, entry, ent) == NULL); > + check_int(map->tablesize, ==, 256); > + check_int(hashmap_get_size(map), ==, 52); > + > + for (int i = 1; i <= 12; i++) { > + char *key = xstrfmt("key%d", i); > + char *value = xstrfmt("value%d", i); > + > + entry = alloc_test_entry(ignore_case, key, ""); > + removed = hashmap_remove_entry(map, entry, ent, key); > + if (check(removed != NULL)) > + check_str(value, get_value(removed)); > + free(key); > + free(value); > + free(entry); > + free(removed); > + } > + check_int(map->tablesize, ==, 256); > + check_int(hashmap_get_size(map), ==, 40); > + > + entry = alloc_test_entry(ignore_case, "key40", ""); > + removed = hashmap_remove_entry(map, entry, ent, "key40"); > + if (check(removed != NULL)) > + check_str("value40", get_value(removed)); > + check_int(map->tablesize, ==, 64); > + check_int(hashmap_get_size(map), ==, 39); > + free(entry); > + free(removed); > +} > + > +static void t_intern(struct hashmap *map, int ignore_case) > +{ > + const char *values[] = { "value1", "Value1", "value2", "value2" }; > + > + for (size_t i = 0; i < ARRAY_SIZE(values); i++) { > + const char *i1 = strintern(values[i]); > + const char *i2 = strintern(values[i]); > + > + if (!check(!strcmp(i1, values[i]))) Should we use check_str() here? Or does that add too much extraneous detail when the test_msg() below is what we really care about? > + test_msg("strintern(%s) returns %s\n", values[i], i1); > + else if (!check(i1 != values[i])) Similarly, check_pointer_eq() here? > + test_msg("strintern(%s) returns input pointer\n", > + values[i]); > + else if (!check(i1 == i2)) And check_pointer_eq() here as well. > + test_msg("address('%s') != address('%s'), so strintern('%s') != strintern('%s')", > + i1, i2, values[i], values[i]); > + else > + check_str(i1, values[i]); > + } > +} > + > +int cmd_main(int argc UNUSED, const char **argv UNUSED) > +{ > + TEST(setup(t_put, 0), "put works"); > + TEST(setup(t_put, 1), "put (case insensitive) works"); > + TEST(setup(t_replace, 0), "replace works"); > + TEST(setup(t_replace, 1), "replace (case insensitive) works"); > + TEST(setup(t_get, 0), "get works"); > + TEST(setup(t_get, 1), "get (case insensitive) works"); > + TEST(setup(t_add, 0), "add works"); > + TEST(setup(t_add, 1), "add (case insensitive) works"); > + TEST(setup(t_remove, 0), "remove works"); > + TEST(setup(t_remove, 1), "remove (case insensitive) works"); > + TEST(setup(t_iterate, 0), "iterate works"); > + TEST(setup(t_iterate, 1), "iterate (case insensitive) works"); > + TEST(setup(t_alloc, 0), "grow / shrink works"); > + TEST(setup(t_intern, 0), "string interning works"); > + return test_done(); > +} > -- > 2.45.2 > >