Re: [PATCH 3/4] dir.c: support marking some patterns already matched

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Nguyễn Thái Ngọc Duy  <pclouds@xxxxxxxxx> writes:

> Given path "a" and the pattern "a", it's matched. But if we throw path
> "a/b" to pattern "a", the code fails to realize that if "a" matches
> "a" then "a/b" should also be matched.
>
> When the pattern is matched the first time, we can mark it "sticky", so
> that all files and dirs inside the matched path also matches. This is a
> simpler solution than modify all match scenarios to fix that.

I am not quite sure what this one tries to achieve.  Is this a
performance thing, or is it a correctness thing?

"This is a simpler solution than" is skimpy on the description of
what the solution is.

When you see 'a' and path 'a/', you would throw it in the sticky
list.  when you descend into 'a/' and see things under it,
e.g. 'a/b', you would say "we have a match" because 'a' is sticky.
Do you throw 'a/b' also into the sticky list so that you would catch
'a/b/c' later?  Do you rely on the order of tree walking to cull
entries from the sticky list that are no longer relevant?
e.g. after you enumerate everything in 'a/b', you do not need 'a/b'
anymore.

Or do you notice that 'a/' matched at the top-level and stop
bothering the sticky list when you descend into 'a/b' and others?

How does this interact with negative patterns?

> Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@xxxxxxxxx>
> ---
>  dir.c | 77 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++---
>  dir.h |  3 +++
>  2 files changed, 77 insertions(+), 3 deletions(-)
>
> diff --git a/dir.c b/dir.c
> index 0be7cf1..8a9d8c0 100644
> --- a/dir.c
> +++ b/dir.c
> @@ -521,6 +521,7 @@ void add_exclude(const char *string, const char *base,
>  	x->baselen = baselen;
>  	x->flags = flags;
>  	x->srcpos = srcpos;
> +	string_list_init(&x->sticky_paths, 1);
>  	ALLOC_GROW(el->excludes, el->nr + 1, el->alloc);
>  	el->excludes[el->nr++] = x;
>  	x->el = el;
> @@ -561,8 +562,10 @@ void clear_exclude_list(struct exclude_list *el)
>  {
>  	int i;
>  
> -	for (i = 0; i < el->nr; i++)
> +	for (i = 0; i < el->nr; i++) {
> +		string_list_clear(&el->excludes[i]->sticky_paths, 0);
>  		free(el->excludes[i]);
> +	}
>  	free(el->excludes);
>  	free(el->filebuf);
>  
> @@ -889,6 +892,44 @@ int match_pathname(const char *pathname, int pathlen,
>  				 WM_PATHNAME) == 0;
>  }
>  
> +static void add_sticky(struct exclude *exc, const char *pathname, int pathlen)
> +{
> +	struct strbuf sb = STRBUF_INIT;
> +	int i;
> +
> +	for (i = exc->sticky_paths.nr - 1; i >= 0; i--) {
> +		const char *sticky = exc->sticky_paths.items[i].string;
> +		int len = strlen(sticky);
> +
> +		if (pathlen < len && sticky[pathlen] == '/' &&
> +		    !strncmp(pathname, sticky, pathlen))
> +			return;
> +	}
> +
> +	strbuf_add(&sb, pathname, pathlen);
> +	string_list_append_nodup(&exc->sticky_paths, strbuf_detach(&sb, NULL));
> +}
> +
> +static int match_sticky(struct exclude *exc, const char *pathname, int pathlen, int dtype)
> +{
> +	int i;
> +
> +	for (i = exc->sticky_paths.nr - 1; i >= 0; i--) {
> +		const char *sticky = exc->sticky_paths.items[i].string;
> +		int len = strlen(sticky);
> +
> +		if (pathlen == len && dtype == DT_DIR &&
> +		    !strncmp(pathname, sticky, len))
> +			return 1;
> +
> +		if (pathlen > len && pathname[len] == '/' &&
> +		    !strncmp(pathname, sticky, len))
> +			return 1;
> +	}
> +
> +	return 0;
> +}
> +
>  /*
>   * Scan the given exclude list in reverse to see whether pathname
>   * should be ignored.  The first match (i.e. the last on the list), if
> @@ -914,6 +955,16 @@ static struct exclude *last_exclude_matching_from_list(const char *pathname,
>  		const char *exclude = x->pattern;
>  		int prefix = x->nowildcardlen;
>  
> +		if (x->sticky_paths.nr) {
> +			if (*dtype == DT_UNKNOWN)
> +				*dtype = get_dtype(NULL, pathname, pathlen);
> +			if (match_sticky(x, pathname, pathlen, *dtype)) {
> +				exc = x;
> +				break;
> +			}
> +			continue;
> +		}
> +
>  		if (x->flags & EXC_FLAG_MUSTBEDIR) {
>  			if (*dtype == DT_UNKNOWN)
>  				*dtype = get_dtype(NULL, pathname, pathlen);
> @@ -947,9 +998,10 @@ static struct exclude *last_exclude_matching_from_list(const char *pathname,
>  		return NULL;
>  	}
>  
> -	trace_printf_key(&trace_exclude, "exclude: %.*s vs %s at line %d => %s\n",
> +	trace_printf_key(&trace_exclude, "exclude: %.*s vs %s at line %d => %s%s\n",
>  			 pathlen, pathname, exc->pattern, exc->srcpos,
> -			 exc->flags & EXC_FLAG_NEGATIVE ? "no" : "yes");
> +			 exc->flags & EXC_FLAG_NEGATIVE ? "no" : "yes",
> +			 exc->sticky_paths.nr ? " (stuck)" : "");
>  	return exc;
>  }
>  
> @@ -2005,6 +2057,25 @@ static struct untracked_cache_dir *validate_untracked_cache(struct dir_struct *d
>  	return root;
>  }
>  
> +static void clear_sticky(struct dir_struct *dir)
> +{
> +	struct exclude_list_group *g;
> +	struct exclude_list *el;
> +	struct exclude *x;
> +	int i, j, k;
> +
> +	for (i = EXC_CMDL; i <= EXC_FILE; i++) {
> +		g = &dir->exclude_list_group[i];
> +		for (j = g->nr - 1; j >= 0; j--) {
> +			el = &g->el[j];
> +			for (k = el->nr - 1; 0 <= k; k--) {
> +				x = el->excludes[k];
> +				string_list_clear(&x->sticky_paths, 0);
> +			}
> +		}
> +	}
> +}
> +
>  int read_directory(struct dir_struct *dir, const char *path, int len, const struct pathspec *pathspec)
>  {
>  	struct path_simplify *simplify;
> diff --git a/dir.h b/dir.h
> index cd46f30..3ec3fb0 100644
> --- a/dir.h
> +++ b/dir.h
> @@ -4,6 +4,7 @@
>  /* See Documentation/technical/api-directory-listing.txt */
>  
>  #include "strbuf.h"
> +#include "string-list.h"
>  
>  struct dir_entry {
>  	unsigned int len;
> @@ -34,6 +35,8 @@ struct exclude {
>  	 * and from -1 decrementing for patterns from CLI args.
>  	 */
>  	int srcpos;
> +
> +	struct string_list sticky_paths;
>  };
>  
>  /*
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html



[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]