Josh Steadmon <steadmon@xxxxxxxxxx> wrote: > > This looks like a good conversion to me. There are a few small > improvements that could be made, but mostly LGTM. Comments are inline > below. > > > On 2024.07.08 21:45, Ghanshyam Thakkar wrote: > > helper/test-hashmap.c along with t0011-hashmap.sh test the hashmap.h > > library. Migrate them to the unit testing framework for better > > debugging, runtime performance and consice code. > > Typo: s/consice/concise/ Will update. > > > Along with the migration, make 'add' tests from the shellscript order > > agnostic in unit tests, since they iterate over entries with the same > > keys and we do not guarantee the order. > > > > The helper/test-hashmap.c is still not removed because it contains a > > performance test meant to be run by the user directly (not used in > > t/perf). And it makes sense for such a utility to be a helper. > > > > Mentored-by: Christian Couder <chriscool@xxxxxxxxxxxxx> > > Mentored-by: Kaartic Sivaraam <kaartic.sivaraam@xxxxxxxxx> > > Signed-off-by: Ghanshyam Thakkar <shyamthakkar001@xxxxxxxxx> > > --- > > The changes in v2 are inspired from the review of another similar > > test t-oidmap: https://lore.kernel.org/git/16e06a6d-5fd0-4132-9d82-5c6f13b7f9ed@xxxxxxxxx/ > > > > The v2 also includes some formatting corrections and one of the > > testcases, t_add(), was changed to be more similar to the original. > > > > Makefile | 1 + > > t/helper/test-hashmap.c | 100 +---------- > > t/t0011-hashmap.sh | 260 ---------------------------- > > t/unit-tests/t-hashmap.c | 359 +++++++++++++++++++++++++++++++++++++++ > > 4 files changed, 362 insertions(+), 358 deletions(-) > > delete mode 100755 t/t0011-hashmap.sh > > create mode 100644 t/unit-tests/t-hashmap.c > > [snip] > > > diff --git a/t/unit-tests/t-hashmap.c b/t/unit-tests/t-hashmap.c > > new file mode 100644 > > index 0000000000..1c951fcfd8 > > --- /dev/null > > +++ b/t/unit-tests/t-hashmap.c > > @@ -0,0 +1,359 @@ > > +#include "test-lib.h" > > +#include "hashmap.h" > > +#include "strbuf.h" > > + > > +struct test_entry { > > + int padding; /* hashmap entry no longer needs to be the first member */ > > + struct hashmap_entry ent; > > + /* key and value as two \0-terminated strings */ > > + char key[FLEX_ARRAY]; > > +}; > > + > > +static int test_entry_cmp(const void *cmp_data, > > + const struct hashmap_entry *eptr, > > + const struct hashmap_entry *entry_or_key, > > + const void *keydata) > > +{ > > + const int ignore_case = cmp_data ? *((int *)cmp_data) : 0; > > + const struct test_entry *e1, *e2; > > + const char *key = keydata; > > + > > + e1 = container_of(eptr, const struct test_entry, ent); > > + e2 = container_of(entry_or_key, const struct test_entry, ent); > > + > > + if (ignore_case) > > + return strcasecmp(e1->key, key ? key : e2->key); > > + else > > + return strcmp(e1->key, key ? key : e2->key); > > +} > > + > > +static const char *get_value(const struct test_entry *e) > > +{ > > + return e->key + strlen(e->key) + 1; > > +} > > + > > +static struct test_entry *alloc_test_entry(unsigned int ignore_case, > > + const char *key, const char *value) > > +{ > > + size_t klen = strlen(key); > > + size_t vlen = strlen(value); > > + unsigned int hash = ignore_case ? strihash(key) : strhash(key); > > + struct test_entry *entry = xmalloc(st_add4(sizeof(*entry), klen, vlen, 2)); > > + > > + hashmap_entry_init(&entry->ent, hash); > > + memcpy(entry->key, key, klen + 1); > > + memcpy(entry->key + klen + 1, value, vlen + 1); > > + return entry; > > +} > > So we're duplicating `struct test_entry`, `test_entry_cmp()`, and > `alloc_test_entry()`, which have (almost) identical definitions in > t/helper/test-hashmap.c. I wonder if it's worth splitting these into a > separate .c file. Maybe it's too much of a pain to add Makefile rules to > share objects across the test helper and the unit tests. Something to > keep in mind I guess, if we find that we want to share more code than > this. > > > > +static struct test_entry *get_test_entry(struct hashmap *map, > > + unsigned int ignore_case, const char *key) > > +{ > > + return hashmap_get_entry_from_hash( > > + map, ignore_case ? strihash(key) : strhash(key), key, > > + struct test_entry, ent); > > +} > > + > > +static int key_val_contains(const char *key_val[][3], size_t n, > > + struct test_entry *entry) > > +{ > > + for (size_t i = 0; i < n; i++) { > > + if (!strcmp(entry->key, key_val[i][0]) && > > + !strcmp(get_value(entry), key_val[i][1])) { > > + if (!strcmp(key_val[i][2], "USED")) > > + return 2; > > + key_val[i][2] = "USED"; > > + return 0; > > + } > > + } > > + return 1; > > +} > > + > > +static void setup(void (*f)(struct hashmap *map, int ignore_case), > > + int ignore_case) > > +{ > > + struct hashmap map = HASHMAP_INIT(test_entry_cmp, &ignore_case); > > + > > + f(&map, ignore_case); > > + hashmap_clear_and_free(&map, struct test_entry, ent); > > +} > > As I mentioned in my review [1] on René's TEST_RUN series, I don't > think > we get much value out of having a setup + callback approach when the > setup is minimal. Would you consider rewriting a v2 using TEST_RUN once > that is ready in `next`? > > [1] > https://lore.kernel.org/git/tswyfparvchgi7qxrjxbx4eb7cohypzekjqzbnkbffsesaiazs@vtewtz7o6twi/ > > > +static void t_put(struct hashmap *map, int ignore_case) > > +{ > > + struct test_entry *entry; > > + const char *key_val[][2] = { { "key1", "value1" }, > > + { "key2", "value2" }, > > + { "fooBarFrotz", "value3" } }; > > + > > + for (size_t i = 0; i < ARRAY_SIZE(key_val); i++) { > > + entry = alloc_test_entry(ignore_case, key_val[i][0], key_val[i][1]); > > + check(hashmap_put_entry(map, entry, ent) == NULL); > > + } > > + > > + entry = alloc_test_entry(ignore_case, "foobarfrotz", "value4"); > > + entry = hashmap_put_entry(map, entry, ent); > > + check(ignore_case ? entry != NULL : entry == NULL); > > + free(entry); > > + > > + check_int(map->tablesize, ==, 64); > > + check_int(hashmap_get_size(map), ==, > > + ignore_case ? ARRAY_SIZE(key_val) : ARRAY_SIZE(key_val) + 1); > > +} > > Ahhh, so you're using the same function for both case-sensitive and > -insensitive tests. So I guess TEST_RUN isn't useful here after all. > Personally I'd still rather get rid of setup(), but I don't feel super > strongly about it. Sure, I'll get rid of setup(). > > > +static void t_replace(struct hashmap *map, int ignore_case) > > +{ > > + struct test_entry *entry; > > + > > + entry = alloc_test_entry(ignore_case, "key1", "value1"); > > + check(hashmap_put_entry(map, entry, ent) == NULL); > > + > > + entry = alloc_test_entry(ignore_case, ignore_case ? "Key1" : "key1", > > + "value2"); > > + entry = hashmap_put_entry(map, entry, ent); > > + if (check(entry != NULL)) > > + check_str(get_value(entry), "value1"); > > + free(entry); > > + > > + entry = alloc_test_entry(ignore_case, "fooBarFrotz", "value3"); > > + check(hashmap_put_entry(map, entry, ent) == NULL); > > + > > + entry = alloc_test_entry(ignore_case, > > + ignore_case ? "foobarfrotz" : "fooBarFrotz", > > + "value4"); > > + entry = hashmap_put_entry(map, entry, ent); > > + if (check(entry != NULL)) > > + check_str(get_value(entry), "value3"); > > + free(entry); > > +} > > + > > +static void t_get(struct hashmap *map, int ignore_case) > > +{ > > + struct test_entry *entry; > > + const char *key_val[][2] = { { "key1", "value1" }, > > + { "key2", "value2" }, > > + { "fooBarFrotz", "value3" }, > > + { ignore_case ? "key4" : "foobarfrotz", "value4" } }; > > + const char *query[][2] = { > > + { ignore_case ? "Key1" : "key1", "value1" }, > > + { ignore_case ? "keY2" : "key2", "value2" }, > > + { ignore_case ? "foobarfrotz" : "fooBarFrotz", "value3" } > > + }; > > + > > + for (size_t i = 0; i < ARRAY_SIZE(key_val); i++) { > > + entry = alloc_test_entry(ignore_case, key_val[i][0], key_val[i][1]); > > + check(hashmap_put_entry(map, entry, ent) == NULL); > > + } > > + > > + for (size_t i = 0; i < ARRAY_SIZE(query); i++) { > > + entry = get_test_entry(map, ignore_case, query[i][0]); > > + if (check(entry != NULL)) > > + check_str(get_value(entry), query[i][1]); > > + } > > + > > + check(get_test_entry(map, ignore_case, "notInMap") == NULL); > > +} > > + > > +static void t_add(struct hashmap *map, int ignore_case) > > +{ > > + struct test_entry *entry; > > + const char *key_val[][3] = { > > + { "key1", "value1", "UNUSED" }, > > + { ignore_case ? "Key1" : "key1", "value2", "UNUSED" }, > > + { "fooBarFrotz", "value3", "UNUSED" }, > > + { ignore_case ? "Foobarfrotz" : "fooBarFrotz", "value4", "UNUSED" } > > + }; > > + const char *queries[] = { "key1", > > + ignore_case ? "Foobarfrotz" : "fooBarFrotz" }; > > + > > + for (size_t i = 0; i < ARRAY_SIZE(key_val); i++) { > > + entry = alloc_test_entry(ignore_case, key_val[i][0], key_val[i][1]); > > + hashmap_add(map, &entry->ent); > > + } > > + > > + for (size_t i = 0; i < ARRAY_SIZE(queries); i++) { > > Since we only have one query, can we remove the loop and simplify the > following block of code? We have two queries as can be seen above. One is "key1" and the other is "Foobarfrotz"/"fooBarFrotz". > Also (here and elsewhere), it might be less confusing to say "UNSEEN" / > "SEEN" instead of "UNUSED" / "USED". The latter makes it sound to me > like there's some API requirement to have a 3-item array that we don't > actually need, but in this case those fields are actually used in > key_val_contains() to track duplicates. Yeah, "UNSEEN" is definitely less confusing. Will change. > > > + int count = 0; > > + entry = hashmap_get_entry_from_hash(map, > > + ignore_case ? strihash(queries[i]) : > > + strhash(queries[i]), > > + queries[i], struct test_entry, ent); > > + > > + hashmap_for_each_entry_from(map, entry, ent) > > + { > > + int ret; > > + if (!check_int((ret = key_val_contains( > > + key_val, ARRAY_SIZE(key_val), > > + entry)), ==, 0)) { > > + switch (ret) { > > + case 1: > > + test_msg("found entry was not given in the input\n" > > + " key: %s\n value: %s", > > + entry->key, get_value(entry)); > > + break; > > + case 2: > > + test_msg("duplicate entry detected\n" > > + " key: %s\n value: %s", > > + entry->key, get_value(entry)); > > + break; > > + } > > + } else { > > + count++; > > + } > > + } > > + check_int(count, ==, 2); > > + } > > + check_int(hashmap_get_size(map), ==, ARRAY_SIZE(key_val)); > > + check(get_test_entry(map, ignore_case, "notInMap") == NULL); > > +} > > + > > +static void t_remove(struct hashmap *map, int ignore_case) > > +{ > > + struct test_entry *entry, *removed; > > + const char *key_val[][2] = { { "key1", "value1" }, > > + { "key2", "value2" }, > > + { "fooBarFrotz", "value3" } }; > > + const char *remove[][2] = { { ignore_case ? "Key1" : "key1", "value1" }, > > + { ignore_case ? "keY2" : "key2", "value2" } }; > > + > > + for (size_t i = 0; i < ARRAY_SIZE(key_val); i++) { > > + entry = alloc_test_entry(ignore_case, key_val[i][0], key_val[i][1]); > > + check(hashmap_put_entry(map, entry, ent) == NULL); > > + } > > + > > + for (size_t i = 0; i < ARRAY_SIZE(remove); i++) { > > + entry = alloc_test_entry(ignore_case, remove[i][0], ""); > > + removed = hashmap_remove_entry(map, entry, ent, remove[i][0]); > > + if (check(removed != NULL)) > > + check_str(get_value(removed), remove[i][1]); > > + free(entry); > > + free(removed); > > + } > > + > > + entry = alloc_test_entry(ignore_case, "notInMap", ""); > > + check(hashmap_remove_entry(map, entry, ent, "notInMap") == NULL); > > + free(entry); > > +} > > Is there a reason why you don't check the table size as the shell test > did? (similar to what you have in t_put()) Huh, I seem to have forgotten to put this snippet here: check_int(map->tablesize, ==, 64); check_int(hashmap_get_size(map), ==, ARRAY_SIZE(key_val) - ARRAY_SIZE(remove)); I'll add it in v3. > > > +static void t_iterate(struct hashmap *map, int ignore_case) > > +{ > > + struct test_entry *entry; > > + struct hashmap_iter iter; > > + const char *key_val[][3] = { { "key1", "value1", "UNUSED" }, > > + { "key2", "value2", "UNUSED" }, > > + { "fooBarFrotz", "value3", "UNUSED" } }; > > + int count = 0; > > + > > + for (size_t i = 0; i < ARRAY_SIZE(key_val); i++) { > > + entry = alloc_test_entry(ignore_case, key_val[i][0], key_val[i][1]); > > + check(hashmap_put_entry(map, entry, ent) == NULL); > > + } > > + > > + hashmap_for_each_entry(map, &iter, entry, ent /* member name */) > > + { > > + int ret; > > + if (!check_int((ret = key_val_contains(key_val, ARRAY_SIZE(key_val), > > + entry)), ==, 0)) { > > + switch (ret) { > > + case 1: > > + test_msg("found entry was not given in the input\n" > > + " key: %s\n value: %s", > > + entry->key, get_value(entry)); > > + break; > > + case 2: > > + test_msg("duplicate entry detected\n" > > + " key: %s\n value: %s", > > + entry->key, get_value(entry)); > > + break; > > + } > > + } else { > > + count++; > > + } > > + } > > + check_int(count, ==, ARRAY_SIZE(key_val)); > > + check_int(hashmap_get_size(map), ==, ARRAY_SIZE(key_val)); > > +} > > + > > +static void t_alloc(struct hashmap *map, int ignore_case) > > +{ > > + struct test_entry *entry, *removed; > > + > > + for (int i = 1; i <= 51; i++) { > > + char *key = xstrfmt("key%d", i); > > + char *value = xstrfmt("value%d", i); > > + entry = alloc_test_entry(ignore_case, key, value); > > + check(hashmap_put_entry(map, entry, ent) == NULL); > > + free(key); > > + free(value); > > + } > > + check_int(map->tablesize, ==, 64); > > + check_int(hashmap_get_size(map), ==, 51); > > + > > + entry = alloc_test_entry(ignore_case, "key52", "value52"); > > + check(hashmap_put_entry(map, entry, ent) == NULL); > > + check_int(map->tablesize, ==, 256); > > + check_int(hashmap_get_size(map), ==, 52); > > + > > + for (int i = 1; i <= 12; i++) { > > + char *key = xstrfmt("key%d", i); > > + char *value = xstrfmt("value%d", i); > > + > > + entry = alloc_test_entry(ignore_case, key, ""); > > + removed = hashmap_remove_entry(map, entry, ent, key); > > + if (check(removed != NULL)) > > + check_str(value, get_value(removed)); > > + free(key); > > + free(value); > > + free(entry); > > + free(removed); > > + } > > + check_int(map->tablesize, ==, 256); > > + check_int(hashmap_get_size(map), ==, 40); > > + > > + entry = alloc_test_entry(ignore_case, "key40", ""); > > + removed = hashmap_remove_entry(map, entry, ent, "key40"); > > + if (check(removed != NULL)) > > + check_str("value40", get_value(removed)); > > + check_int(map->tablesize, ==, 64); > > + check_int(hashmap_get_size(map), ==, 39); > > + free(entry); > > + free(removed); > > +} > > + > > +static void t_intern(struct hashmap *map, int ignore_case) > > +{ > > + const char *values[] = { "value1", "Value1", "value2", "value2" }; > > + > > + for (size_t i = 0; i < ARRAY_SIZE(values); i++) { > > + const char *i1 = strintern(values[i]); > > + const char *i2 = strintern(values[i]); > > + > > + if (!check(!strcmp(i1, values[i]))) > > Should we use check_str() here? Or does that add too much extraneous > detail when the test_msg() below is what we really care about? It would repetitive as both print the same thing but test_msg() has more context. > > + test_msg("strintern(%s) returns %s\n", values[i], i1); > > + else if (!check(i1 != values[i])) > > Similarly, check_pointer_eq() here? How would we use check_pointer_eq() here as we are checking to make sure they are not equal? > > > + test_msg("strintern(%s) returns input pointer\n", > > + values[i]); > > + else if (!check(i1 == i2)) > > And check_pointer_eq() here as well. Will update here. Thanks for the review.