On 23/03/17 13:47, git@xxxxxxxxxxxxxxxxx wrote: > From: Jeff Hostetler <jeffhost@xxxxxxxxxxxxx> > [snip] > > diff --git a/name-hash.c b/name-hash.c > index 3f7722a..71ef07e 100644 > --- a/name-hash.c > +++ b/name-hash.c > @@ -23,15 +23,21 @@ static int dir_entry_cmp(const struct dir_entry *e1, > name ? name : e2->name, e1->namelen); > } > > -static struct dir_entry *find_dir_entry(struct index_state *istate, > - const char *name, unsigned int namelen) > +static struct dir_entry *find_dir_entry__hash(struct index_state *istate, > + const char *name, unsigned int namelen, unsigned int hash) > { > struct dir_entry key; > - hashmap_entry_init(&key, memihash(name, namelen)); > + hashmap_entry_init(&key, hash); > key.namelen = namelen; > return hashmap_get(&istate->dir_hash, &key, name); > } > > +static struct dir_entry *find_dir_entry(struct index_state *istate, > + const char *name, unsigned int namelen) > +{ > + return find_dir_entry__hash(istate, name, namelen, memihash(name, namelen)); > +} > + > static struct dir_entry *hash_dir_entry(struct index_state *istate, > struct cache_entry *ce, int namelen) > { > @@ -112,21 +118,493 @@ static int cache_entry_cmp(const struct cache_entry *ce1, > return remove ? !(ce1 == ce2) : 0; > } > > -static void lazy_init_name_hash(struct index_state *istate) > +static int lazy_try_threaded = 1; > +static int lazy_nr_dir_threads; > + > +#ifdef NO_PTHREADS > + > +static inline int lookup_lazy_params(struct index_state *istate) > { > - int nr; > + return 0; > +} > + > +static inline void threaded_lazy_init_name_hash( > + struct index_state *istate) > +{ > +} > + > +#else > + > +#include "thread-utils.h" > + > +/* > + * Set a minimum number of cache_entries that we will handle per > + * thread and use that to decide how many threads to run (upto > + * the number on the system). > + * > + * For guidance setting the lower per-thread bound, see: > + * t/helper/test-lazy-init-name-hash --analyze > + */ > +#define LAZY_THREAD_COST (2000) > + > +/* > + * We use n mutexes to guard n partitions of the "istate->dir_hash" > + * hashtable. Since "find" and "insert" operations will hash to a > + * particular bucket and modify/search a single chain, we can say > + * that "all chains mod n" are guarded by the same mutex -- rather > + * than having a single mutex to guard the entire table. (This does > + * require that we disable "rehashing" on the hashtable.) > + * > + * So, a larger value here decreases the probability of a collision > + * and the time that each thread must wait for the mutex. > + */ > +#define LAZY_MAX_MUTEX (32) > + > +static pthread_mutex_t *lazy_dir_mutex_array; > + > +/* > + * An array of lazy_entry items is used by the n threads in > + * the directory parse (first) phase to (lock-free) store the > + * intermediate results. These values are then referenced by > + * the 2 threads in the second phase. > + */ > +struct lazy_entry { > + struct dir_entry *dir; > + unsigned int hash_dir; > + unsigned int hash_name; > +}; > + > +/* > + * Decide if we want to use threads (if available) to load > + * the hash tables. We set "lazy_nr_dir_threads" to zero when > + * it is not worth it. > + */ > +static int lookup_lazy_params(struct index_state *istate) > +{ > + int nr_cpus; > + > + lazy_nr_dir_threads = 0; > + > + if (!lazy_try_threaded) > + return 0; > + > + /* > + * If we are respecting case, just use the original > + * code to build the "istate->name_hash". We don't > + * need the complexity here. > + */ > + if (!ignore_case) > + return 0; > + > + nr_cpus = online_cpus(); > + if (nr_cpus < 2) > + return 0; > + > + if (istate->cache_nr < 2 * LAZY_THREAD_COST) > + return 0; > + > + if (istate->cache_nr < nr_cpus * LAZY_THREAD_COST) > + nr_cpus = istate->cache_nr / LAZY_THREAD_COST; > + lazy_nr_dir_threads = nr_cpus; > + return lazy_nr_dir_threads; > +} > + > +/* > + * Initialize n mutexes for use when searching and inserting > + * into "istate->dir_hash". All "dir" threads are trying > + * to insert partial pathnames into the hash as they iterate > + * over their portions of the index, so lock contention is > + * high. > + * > + * However, the hashmap is going to put items into bucket > + * chains based on their hash values. Use that to create n > + * mutexes and lock on mutex[bucket(hash) % n]. This will > + * decrease the collision rate by (hopefully) by a factor of n. > + */ > +static void init_dir_mutex(void) > +{ > + int j; > + > + lazy_dir_mutex_array = xcalloc(LAZY_MAX_MUTEX, sizeof(pthread_mutex_t)); > + > + for (j = 0; j < LAZY_MAX_MUTEX; j++) > + init_recursive_mutex(&lazy_dir_mutex_array[j]); > +} > + > +static void cleanup_dir_mutex(void) > +{ > + int j; > + > + for (j = 0; j < LAZY_MAX_MUTEX; j++) > + pthread_mutex_destroy(&lazy_dir_mutex_array[j]); > + > + free(lazy_dir_mutex_array); > +} > + > +static void lock_dir_mutex(int j) > +{ > + pthread_mutex_lock(&lazy_dir_mutex_array[j]); > +} > + > +static void unlock_dir_mutex(int j) > +{ > + pthread_mutex_unlock(&lazy_dir_mutex_array[j]); > +} > + > +static inline int compute_dir_lock_nr( > + const struct hashmap *map, > + unsigned int hash) > +{ > + return hashmap_bucket(map, hash) % LAZY_MAX_MUTEX; > +} > + > +static struct dir_entry *hash_dir_entry_with_parent_and_prefix( > + struct index_state *istate, > + struct dir_entry *parent, > + struct strbuf *prefix) > +{ > + struct dir_entry *dir; > + unsigned int hash; > + int lock_nr; > + > + /* > + * Either we have a parent directory and path with slash(es) > + * or the directory is an immediate child of the root directory. > + */ > + assert((parent != NULL) ^ (strchr(prefix->buf, '/') == 0)); sparse complains about 'Using plain integer as a NULL pointer', (the return from strchr() is NULL, if '/' is not found) so maybe: + assert((parent != NULL) ^ (strchr(prefix->buf, '/') == NULL)); ATB, Ramsay Jones