Re: [PATCH v2 08/17] libtraceeval histograms: Move hash functions into their own file

Ross Zwisler <zwisler@xxxxxxxxxx> · Tue, 15 Aug 2023 13:31:56 -0600

On Fri, Aug 11, 2023 at 01:39:31AM -0400, Steven Rostedt wrote:
> From: "Steven Rostedt (Google)" <rostedt@xxxxxxxxxxx>
> 
> Move the hash functions into their own file so that it does not clutter
> the histogram.c file. Functionality should be the same, but some code
> restructuring was done.
> 
> To keep the hash abstract, helper functions were introduced:
> 
>   hash_iter_start() - to start iterating over all items in the hash
>   hash_iter_next() - to get the next item in the hash
> 
>   hash_iter_bucket() - to start iterating over a single bucket in the hash
>   hash_iter_bucket_next() - to get the next item in the bucket
> 
>   hash_nr_items() - to get the number of items in the hash
> 
> Also implemented hash_remove() to remove an item from the hash.
> 
> Also created a eval-local.h header file to store the prototypes of the
> local functions as well as moved the structures there too.
> 
> Signed-off-by: Steven Rostedt (Google) <rostedt@xxxxxxxxxxx>
> ---
>  src/Makefile     |   1 +
>  src/eval-local.h | 103 ++++++++++++++++++++++++++++++++++++++
>  src/hash.c       | 119 ++++++++++++++++++++++++++++++++++++++++++++
>  src/histograms.c | 126 +++++++----------------------------------------
>  4 files changed, 240 insertions(+), 109 deletions(-)
>  create mode 100644 src/eval-local.h
>  create mode 100644 src/hash.c

<>

> diff --git a/src/hash.c b/src/hash.c
> new file mode 100644
> index 000000000000..e4f2a983d39c
> --- /dev/null
> +++ b/src/hash.c
> @@ -0,0 +1,119 @@
> +/* SPDX-License-Identifier: MIT */
> +/*
> + * libtraceeval hashtable interface implementation.
> + *
> + * Copyright (C) 2023 Google Inc, Steven Rostedt <rostedt@xxxxxxxxxxx>
> + */
> +
> +#include <traceeval-hist.h>
> +
> +#include "eval-local.h"
> +
> +__hidden struct hash_table *hash_alloc(void)
> +{
> +	struct hash_table *hash;
> +
> +	hash = calloc(1, sizeof(*hash));
> +	if (!hash)
> +		return NULL;
> +
> +	hash->bits = HASH_BITS;
> +	hash->hash = calloc(HASH_SIZE(hash->bits), sizeof(*hash->hash));
> +	if (!hash->hash) {
> +		free(hash);
> +		hash = NULL;
> +	}
> +
> +	return hash;
> +}
> +
> +__hidden void hash_free(struct hash_table *hash)
> +{
> +	free(hash->hash);
> +	free(hash);
> +}
> +
> +__hidden void hash_add(struct hash_table *hash, struct hash_item *item, unsigned key)
> +{
> +	key &= HASH_MASK(hash->bits);

key should already be masked to HASH_MASK(hash->bits) via make_hash().  If
those bits are set, we have a bug somewhere.

I think it's better to check to see if those bits are set and bail out loudly
with an error.

> +
> +	item->next = hash->hash[key];
> +	hash->hash[key] = item;
> +	item->key = key;
> +
> +	hash->nr_items++;
> +}
> +
> +__hidden int hash_remove(struct hash_table *hash, struct hash_item *item)
> +{
> +	struct hash_item **parent;
> +
> +	for (parent = &hash->hash[item->key]; *parent; parent = &(*parent)->next) {
> +		if (*parent == item) {
> +			*parent = item->next;
> +			hash->nr_items--;
> +			return 1;
> +		}
> +	}
> +	return 0;
> +}
> +
> +__hidden struct hash_iter *hash_iter_start(struct hash_table *hash)
> +{
> +	struct hash_iter *iter = &hash->iter;
> +	size_t i;
> +
> +	for (i = 0; i < HASH_SIZE(hash->bits); i++) {
> +		if (!hash->hash[i])
> +			continue;
> +		iter->next_item = hash->hash[i];

I think we need to break here after we've found a populated bucket and set
iter->next_item.  Right now this works only if we have a single populated
bucket, because we'll set iter->next_item once and then keep iterating until
i == HASH_SIZE(hash->bits).

This will mean that iter->current_bucket will always == HASH_SIZE(hash->bits),
but we have a bug in hash_iter_next() that meant we weren't returning early
with NULL when we hit this condition.

> +	}
> +	iter->current_bucket = i;
> +	return iter;
> +}
> +
> +__hidden struct hash_item *hash_iter_next(struct hash_iter *iter)
> +{
> +	struct hash_table *hash = container_of(iter, struct hash_table, iter);
> +	struct hash_item *item;
> +
> +	if (iter->current_bucket > HASH_SIZE(hash->bits))

	if (iter->current_bucket >= HASH_SIZE(hash->bits))

Right now we're missing the exit case where
iter->current_bucket == HASH_SIZE(hash->bits), which means we've run out of
buckets with entries.

> +		return NULL;
> +
> +	item = iter->next_item;
> +
> +	iter->next_item = item->next;
> +	if (!iter->next_item) {
> +		size_t i;
> +
> +		for (i = iter->current_bucket + 1; i < HASH_SIZE(hash->bits); i++) {
> +			if (!hash->hash[i])
> +				continue;
> +			iter->next_item = hash->hash[i];

As in hash_iter_start(), we need to break when we find the next non-empty
bucket and set iter->next_item.  This will let us set iter->current_bucket
correctly as well.

> +		}
> +		iter->current_bucket = i;
> +	}
> +	return item;
> +}
> +
> +__hidden struct hash_iter *hash_iter_bucket(struct hash_table *hash, unsigned key)
> +{
> +	struct hash_iter *iter = &hash->iter;
> +
> +	key &= HASH_MASK(hash->bits);

As with hash_add(), 'key' should already be masked and I think it'd be better
to error out loudly if upper bits in 'key' are unexpectedly set.

> +
> +	iter->current_bucket = key;
> +	iter->next_item = hash->hash[key];
> +
> +	return iter;
> +}
> +