Re: [PATCH 1/3] name-hash: refactor polymorphic index_name_exists()

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Eric Sunshine <sunshine@xxxxxxxxxxxxxx> writes:

> Depending upon the absence or presence of a trailing '/' on the incoming
> pathname, index_name_exists() checks either if a file is present in the
> index or if a directory is represented within the index.  Each caller
> explicitly chooses the mode of operation by adding or removing a
> trailing '/' before invoking index_name_exists().
>
> Since these two modes of operations are disjoint and have no code in
> common (one searches index_state.name_hash; the other dir_hash), they
> can be represented more naturally as distinct functions: one to search
> for a file, and one for a directory.
>
> Splitting index searching into two functions relieves callers of the
> artificial burden of having to add or remove a slash to select the mode
> of operation; instead they just call the desired function. A subsequent
> patch will take advantage of this benefit in order to eliminate the
> requirement that the incoming pathname for a directory search must have
> a trailing slash.

Good thinking.  Thanks for working on this; I agree with the general
direction this series is going.

I however wonder if it would be a good idea to rename the other one
to {cache|index}_file_exists(), and even keep the *_name_exists()
variant that switches between the two based on the trailing slash,
to avoid breaking other topics in flight that may have added new
callsites to *_name_exists().  Otherwise the new callsites will feed
a path with a trailing slash to *_name_exists() and get a false
result, no?


> Signed-off-by: Eric Sunshine <sunshine@xxxxxxxxxxxxxx>
> ---
>  cache.h      |  2 ++
>  dir.c        |  7 ++++---
>  name-hash.c  | 47 ++++++++++++++++++++++++-----------------------
>  read-cache.c |  2 +-
>  4 files changed, 31 insertions(+), 27 deletions(-)
>
> diff --git a/cache.h b/cache.h
> index 9ef778a..03ec24c 100644
> --- a/cache.h
> +++ b/cache.h
> @@ -314,6 +314,7 @@ extern void free_name_hash(struct index_state *istate);
>  #define refresh_cache(flags) refresh_index(&the_index, (flags), NULL, NULL, NULL)
>  #define ce_match_stat(ce, st, options) ie_match_stat(&the_index, (ce), (st), (options))
>  #define ce_modified(ce, st, options) ie_modified(&the_index, (ce), (st), (options))
> +#define cache_dir_exists(name, namelen) index_dir_exists(&the_index, (name), (namelen))
>  #define cache_name_exists(name, namelen, igncase) index_name_exists(&the_index, (name), (namelen), (igncase))
>  #define cache_name_is_other(name, namelen) index_name_is_other(&the_index, (name), (namelen))
>  #define resolve_undo_clear() resolve_undo_clear_index(&the_index)
> @@ -463,6 +464,7 @@ extern int write_index(struct index_state *, int newfd);
>  extern int discard_index(struct index_state *);
>  extern int unmerged_index(const struct index_state *);
>  extern int verify_path(const char *path);
> +extern struct cache_entry *index_dir_exists(struct index_state *istate, const char *name, int namelen);
>  extern struct cache_entry *index_name_exists(struct index_state *istate, const char *name, int namelen, int igncase);
>  extern int index_name_pos(const struct index_state *, const char *name, int namelen);
>  #define ADD_CACHE_OK_TO_ADD 1		/* Ok to add */
> diff --git a/dir.c b/dir.c
> index b439ff0..0080673 100644
> --- a/dir.c
> +++ b/dir.c
> @@ -860,7 +860,8 @@ static struct dir_entry *dir_entry_new(const char *pathname, int len)
>  
>  static struct dir_entry *dir_add_name(struct dir_struct *dir, const char *pathname, int len)
>  {
> -	if (cache_name_exists(pathname, len, ignore_case))
> +	if ((len == 0 || pathname[len - 1] != '/') &&
> +	    cache_name_exists(pathname, len, ignore_case))
>  		return NULL;
>  
>  	ALLOC_GROW(dir->entries, dir->nr+1, dir->alloc);
> @@ -885,11 +886,11 @@ enum exist_status {
>  /*
>   * Do not use the alphabetically sorted index to look up
>   * the directory name; instead, use the case insensitive
> - * name hash.
> + * directory hash.
>   */
>  static enum exist_status directory_exists_in_index_icase(const char *dirname, int len)
>  {
> -	const struct cache_entry *ce = cache_name_exists(dirname, len + 1, ignore_case);
> +	const struct cache_entry *ce = cache_dir_exists(dirname, len + 1);
>  	unsigned char endchar;
>  
>  	if (!ce)
> diff --git a/name-hash.c b/name-hash.c
> index 617c86c..5b01554 100644
> --- a/name-hash.c
> +++ b/name-hash.c
> @@ -222,11 +222,35 @@ static int same_name(const struct cache_entry *ce, const char *name, int namelen
>  	return slow_same_name(name, namelen, ce->name, len);
>  }
>  
> +struct cache_entry *index_dir_exists(struct index_state *istate, const char *name, int namelen)
> +{
> +	struct cache_entry *ce;
> +	struct dir_entry *dir;
> +
> +	lazy_init_name_hash(istate);
> +	dir = find_dir_entry(istate, name, namelen);
> +	if (dir && dir->nr)
> +		return dir->ce;
> +
> +	/*
> +	 * It might be a submodule. Unlike plain directories, which are stored
> +	 * in the dir-hash, submodules are stored in the name-hash, so check
> +	 * there, as well.
> +	 */
> +	ce = index_name_exists(istate, name, namelen - 1, 1);
> +	if (ce && S_ISGITLINK(ce->ce_mode))
> +		return ce;
> +
> +	return NULL;
> +}
> +
>  struct cache_entry *index_name_exists(struct index_state *istate, const char *name, int namelen, int icase)
>  {
>  	unsigned int hash = hash_name(name, namelen);
>  	struct cache_entry *ce;
>  
> +	assert(namelen == 0 || name[namelen - 1] != '/');
> +
>  	lazy_init_name_hash(istate);
>  	ce = lookup_hash(hash, &istate->name_hash);
>  
> @@ -237,29 +261,6 @@ struct cache_entry *index_name_exists(struct index_state *istate, const char *na
>  		}
>  		ce = ce->next;
>  	}
> -
> -	/*
> -	 * When looking for a directory (trailing '/'), it might be a
> -	 * submodule or a directory. Despite submodules being directories,
> -	 * they are stored in the name hash without a closing slash.
> -	 * When ignore_case is 1, directories are stored in a separate hash
> -	 * table *with* their closing slash.
> -	 *
> -	 * The side effect of this storage technique is we have need to
> -	 * lookup the directory in a separate hash table, and if not found
> -	 * remove the slash from name and perform the lookup again without
> -	 * the slash.  If a match is made, S_ISGITLINK(ce->mode) will be
> -	 * true.
> -	 */
> -	if (icase && name[namelen - 1] == '/') {
> -		struct dir_entry *dir = find_dir_entry(istate, name, namelen);
> -		if (dir && dir->nr)
> -			return dir->ce;
> -
> -		ce = index_name_exists(istate, name, namelen - 1, icase);
> -		if (ce && S_ISGITLINK(ce->ce_mode))
> -			return ce;
> -	}
>  	return NULL;
>  }
>  
> diff --git a/read-cache.c b/read-cache.c
> index 885943a..a59644a 100644
> --- a/read-cache.c
> +++ b/read-cache.c
> @@ -643,7 +643,7 @@ int add_to_index(struct index_state *istate, const char *path, struct stat *st,
>  			if (*ptr == '/') {
>  				struct cache_entry *foundce;
>  				++ptr;
> -				foundce = index_name_exists(istate, ce->name, ptr - ce->name, ignore_case);
> +				foundce = index_dir_exists(istate, ce->name, ptr - ce->name);
>  				if (foundce) {
>  					memcpy((void *)startPtr, foundce->name + (startPtr - ce->name), ptr - startPtr);
>  					startPtr = ptr;
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html




[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]