The new hashmap implementation supports remove, so really remove unused cache entries from the name hashmap instead of just marking them. The CE_UNHASHED flag and CE_STATE_MASK are no longer needed. Keep the CE_HASHED flag to prevent adding entries twice. Signed-off-by: Karsten Blees <blees@xxxxxxx> --- cache.h | 6 ++---- name-hash.c | 46 ++++++++++++++++++++++------------------------ read-cache.c | 2 +- unpack-trees.c | 2 +- 4 files changed, 26 insertions(+), 30 deletions(-) diff --git a/cache.h b/cache.h index 46309c6..8833944 100644 --- a/cache.h +++ b/cache.h @@ -160,7 +160,6 @@ struct cache_entry { #define CE_ADDED (1 << 19) #define CE_HASHED (1 << 20) -#define CE_UNHASHED (1 << 21) #define CE_WT_REMOVE (1 << 22) /* remove in work directory */ #define CE_CONFLICTED (1 << 23) @@ -194,11 +193,10 @@ struct cache_entry { * Copy the sha1 and stat state of a cache entry from one to * another. But we never change the name, or the hash state! */ -#define CE_STATE_MASK (CE_HASHED | CE_UNHASHED) static inline void copy_cache_entry(struct cache_entry *dst, const struct cache_entry *src) { - unsigned int state = dst->ce_flags & CE_STATE_MASK; + unsigned int state = dst->ce_flags & CE_HASHED; /* Don't copy hash chain and name */ memcpy(&dst->ce_stat_data, &src->ce_stat_data, @@ -206,7 +204,7 @@ static inline void copy_cache_entry(struct cache_entry *dst, offsetof(struct cache_entry, ce_stat_data)); /* Restore the hash state */ - dst->ce_flags = (dst->ce_flags & ~CE_STATE_MASK) | state; + dst->ce_flags = (dst->ce_flags & ~CE_HASHED) | state; } static inline unsigned create_ce_flags(unsigned stage) diff --git a/name-hash.c b/name-hash.c index 4879ce4..4aac33f 100644 --- a/name-hash.c +++ b/name-hash.c @@ -105,17 +105,29 @@ static void hash_index_entry(struct index_state *istate, struct cache_entry *ce) hashmap_entry_init(ce, memihash(ce->name, ce_namelen(ce))); hashmap_add(&istate->name_hash, ce); - if (ignore_case && !(ce->ce_flags & CE_UNHASHED)) + if (ignore_case) add_dir_entry(istate, ce); } +static int cache_entry_cmp(const struct cache_entry *ce1, + const struct cache_entry *ce2, const void *remove) +{ + /* + * For remove_name_hash, find the exact entry (pointer equality); for + * index_name_exists, find all entries with matching hash code and + * decide whether the entry matches in same_name. + */ + return remove ? !(ce1 == ce2) : 0; +} + static void lazy_init_name_hash(struct index_state *istate) { int nr; if (istate->name_hash_initialized) return; - hashmap_init(&istate->name_hash, NULL, istate->cache_nr); + hashmap_init(&istate->name_hash, (hashmap_cmp_fn) cache_entry_cmp, + istate->cache_nr); hashmap_init(&istate->dir_hash, (hashmap_cmp_fn) dir_entry_cmp, 0); for (nr = 0; nr < istate->cache_nr; nr++) hash_index_entry(istate, istate->cache[nr]); @@ -124,31 +136,19 @@ static void lazy_init_name_hash(struct index_state *istate) void add_name_hash(struct index_state *istate, struct cache_entry *ce) { - /* if already hashed, add reference to directory entries */ - if (ignore_case && (ce->ce_flags & CE_STATE_MASK) == CE_STATE_MASK) - add_dir_entry(istate, ce); - - ce->ce_flags &= ~CE_UNHASHED; if (istate->name_hash_initialized) hash_index_entry(istate, ce); } -/* - * We don't actually *remove* it, we can just mark it invalid so that - * we won't find it in lookups. - * - * Not only would we have to search the lists (simple enough), but - * we'd also have to rehash other hash buckets in case this makes the - * hash bucket empty (common). So it's much better to just mark - * it. - */ void remove_name_hash(struct index_state *istate, struct cache_entry *ce) { - /* if already hashed, release reference to directory entries */ - if (ignore_case && (ce->ce_flags & CE_STATE_MASK) == CE_HASHED) - remove_dir_entry(istate, ce); + if (!istate->name_hash_initialized || !(ce->ce_flags & CE_HASHED)) + return; + ce->ce_flags &= ~CE_HASHED; + hashmap_remove(&istate->name_hash, ce, ce); - ce->ce_flags |= CE_UNHASHED; + if (ignore_case) + remove_dir_entry(istate, ce); } static int slow_same_name(const char *name1, int len1, const char *name2, int len2) @@ -197,10 +197,8 @@ struct cache_entry *index_name_exists(struct index_state *istate, const char *na hashmap_entry_init(&key, memihash(name, namelen)); ce = hashmap_get(&istate->name_hash, &key, NULL); while (ce) { - if (!(ce->ce_flags & CE_UNHASHED)) { - if (same_name(ce, name, namelen, icase)) - return ce; - } + if (same_name(ce, name, namelen, icase)) + return ce; ce = hashmap_get_next(&istate->name_hash, ce); } diff --git a/read-cache.c b/read-cache.c index c3d5e35..0b5c44b 100644 --- a/read-cache.c +++ b/read-cache.c @@ -58,7 +58,7 @@ void rename_index_entry_at(struct index_state *istate, int nr, const char *new_n new = xmalloc(cache_entry_size(namelen)); copy_cache_entry(new, old); - new->ce_flags &= ~CE_STATE_MASK; + new->ce_flags &= ~CE_HASHED; new->ce_namelen = namelen; memcpy(new->name, new_name, namelen + 1); diff --git a/unpack-trees.c b/unpack-trees.c index 9c108a2..92c7bc4 100644 --- a/unpack-trees.c +++ b/unpack-trees.c @@ -105,7 +105,7 @@ void setup_unpack_trees_porcelain(struct unpack_trees_options *opts, static void do_add_entry(struct unpack_trees_options *o, struct cache_entry *ce, unsigned int set, unsigned int clear) { - clear |= CE_HASHED | CE_UNHASHED; + clear |= CE_HASHED; if (set & CE_REMOVE) set |= CE_WT_REMOVE; -- 1.8.4.11.g4f52745.dirty -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html