Re: [PATCH/WIP v2 05/14] read-cache: put some limits on file watching

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Nguyễn Thái Ngọc Duy <pclouds@xxxxxxxxx> writes:

> watch_entries() is a lot of computation and could trigger a lot more
> lookups in file-watcher. Normally after the first set of watches are
> in place, we do not need to update often. Moreover if the number of
> entries is small, the overhead of file watcher may actually slow git
> down.
>
> This patch only allows to update watches if the number of watchable
> files is over a limit (and there are new files added if this is not
> the first time). Measurements on Core i5-2520M and Linux 3.7.6, about
> 920 lstat() take 1ms. Somewhere between 2^16 and 2^17 lstat calls that
> it starts to take longer than 100ms. 2^16 is chosen at the minimum
> limit to start using file watcher.
>
> Recently updated files are not considered watchable because they are
> likely to be updated again soon, not worth the ping-pong game with
> file watcher. The default limit 30min is just a random value.

But then a fresh clone of a big repository would not get any benefit
from the watcher?

Not yet sure how to best handle this.

> Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@xxxxxxxxx>
> ---
>  Documentation/config.txt |  9 +++++++++
>  cache.h                  |  1 +
>  read-cache.c             | 44 ++++++++++++++++++++++++++++++++++++--------
>  3 files changed, 46 insertions(+), 8 deletions(-)
>
> diff --git a/Documentation/config.txt b/Documentation/config.txt
> index a405806..e394399 100644
> --- a/Documentation/config.txt
> +++ b/Documentation/config.txt
> @@ -1038,6 +1038,15 @@ difftool.<tool>.cmd::
>  difftool.prompt::
>  	Prompt before each invocation of the diff tool.
>  
> +filewatcher.minfiles::
> +	Start watching files if the number of watchable files are
> +	above this limit. Default value is 65536.
> +
> +filewatcher.recentlimit::
> +	Files that are last updated within filewatcher.recentlimit
> +	seconds from now are not considered watchable. Default value
> +	is 1800 (30 minutes).
> +
>  fetch.recurseSubmodules::
>  	This option can be either set to a boolean value or to 'on-demand'.
>  	Setting it to a boolean changes the behavior of fetch and pull to
> diff --git a/cache.h b/cache.h
> index 0d55551..bcec29b 100644
> --- a/cache.h
> +++ b/cache.h
> @@ -278,6 +278,7 @@ struct index_state {
>  	struct cache_tree *cache_tree;
>  	struct cache_time timestamp;
>  	unsigned name_hash_initialized : 1,
> +		 update_watches : 1,
>  		 initialized : 1;
>  	struct hashmap name_hash;
>  	struct hashmap dir_hash;
> diff --git a/read-cache.c b/read-cache.c
> index 21c3207..406834a 100644
> --- a/read-cache.c
> +++ b/read-cache.c
> @@ -38,6 +38,8 @@ static struct cache_entry *refresh_cache_entry(struct cache_entry *ce, int reall
>  #define CACHE_EXT_WATCH 0x57415443	  /* "WATC" */
>  
>  struct index_state the_index;
> +static int watch_lowerlimit = 65536;
> +static int recent_limit = 1800;
>  
>  static void set_index_entry(struct index_state *istate, int nr, struct cache_entry *ce)
>  {
> @@ -1014,6 +1016,7 @@ int add_index_entry(struct index_state *istate, struct cache_entry *ce, int opti
>  			(istate->cache_nr - pos - 1) * sizeof(ce));
>  	set_index_entry(istate, pos, ce);
>  	istate->cache_changed = 1;
> +	istate->update_watches = 1;
>  	return 0;
>  }
>  
> @@ -1300,13 +1303,14 @@ static void read_watch_extension(struct index_state *istate, uint8_t *data,
>  				 unsigned long sz)
>  {
>  	int i;
> -	if ((istate->cache_nr + 7) / 8 != sz) {
> +	if ((istate->cache_nr + 7) / 8 + 1 != sz) {
>  		error("invalid 'WATC' extension");
>  		return;
>  	}
>  	for (i = 0; i < istate->cache_nr; i++)
>  		if (data[i / 8] & (1 << (i % 8)))
>  			istate->cache[i]->ce_flags |= CE_WATCHED;
> +	istate->update_watches = data[sz - 1];
>  }
>  
>  static int read_index_extension(struct index_state *istate,
> @@ -1449,6 +1453,19 @@ static struct cache_entry *create_from_disk(struct ondisk_cache_entry *ondisk,
>  	return ce;
>  }
>  
> +static int watcher_config(const char *var, const char *value, void *data)
> +{
> +	if (!strcmp(var, "filewatcher.minfiles")) {
> +		watch_lowerlimit = git_config_int(var, value);
> +		return 0;
> +	}
> +	if (!strcmp(var, "filewatcher.recentlimit")) {
> +		recent_limit = git_config_int(var, value);
> +		return 0;
> +	}
> +	return 0;
> +}
> +
>  static void validate_watcher(struct index_state *istate, const char *path)
>  {
>  	int i;
> @@ -1458,6 +1475,7 @@ static void validate_watcher(struct index_state *istate, const char *path)
>  		return;
>  	}
>  
> +	git_config(watcher_config, NULL);
>  	istate->watcher = connect_watcher(path);
>  	if (istate->watcher != -1) {
>  		struct strbuf sb = STRBUF_INIT;
> @@ -1478,6 +1496,7 @@ static void validate_watcher(struct index_state *istate, const char *path)
>  			istate->cache[i]->ce_flags &= ~CE_WATCHED;
>  			istate->cache_changed = 1;
>  		}
> +	istate->update_watches = 1;
>  }
>  
>  static int sort_by_date(const void *a_, const void *b_)
> @@ -1524,7 +1543,7 @@ static int do_watch_entries(struct index_state *istate,
>  	return 0;
>  }
>  
> -static inline int ce_watchable(struct cache_entry *ce)
> +static inline int ce_watchable(struct cache_entry *ce, time_t now)
>  {
>  	return ce_uptodate(ce) && /* write_index will catch late ce_uptodate bits */
>  		!(ce->ce_flags & CE_WATCHED) &&
> @@ -1534,7 +1553,8 @@ static inline int ce_watchable(struct cache_entry *ce)
>  		 * obviously. S_IFLNK could be problematic because
>  		 * inotify may follow symlinks without IN_DONT_FOLLOW
>  		 */
> -		S_ISREG(ce->ce_mode);
> +		S_ISREG(ce->ce_mode) &&
> +		(ce->ce_stat_data.sd_mtime.sec + recent_limit < now);
>  }
>  
>  static void watch_entries(struct index_state *istate)
> @@ -1544,15 +1564,20 @@ static void watch_entries(struct index_state *istate)
>  	struct strbuf sb = STRBUF_INIT;
>  	int val;
>  	socklen_t vallen = sizeof(val);
> +	time_t now = time(NULL);
>  
> -	if (istate->watcher <= 0)
> +	if (istate->watcher <= 0 || !istate->update_watches)
>  		return;
> +	istate->update_watches = 0;
> +	istate->cache_changed = 1;
>  	for (i = nr = 0; i < istate->cache_nr; i++)
> -		if (ce_watchable(istate->cache[i]))
> +		if (ce_watchable(istate->cache[i], now))
>  			nr++;
> +	if (nr < watch_lowerlimit)
> +		return;
>  	sorted = xmalloc(sizeof(*sorted) * nr);
>  	for (i = nr = 0; i < istate->cache_nr; i++)
> -		if (ce_watchable(istate->cache[i]))
> +		if (ce_watchable(istate->cache[i], now))
>  			sorted[nr++] = istate->cache[i];
>  
>  	getsockopt(istate->watcher, SOL_SOCKET, SO_SNDBUF, &val, &vallen);
> @@ -1616,6 +1641,7 @@ int read_index_from(struct index_state *istate, const char *path)
>  	istate->cache_alloc = alloc_nr(istate->cache_nr);
>  	istate->cache = xcalloc(istate->cache_alloc, sizeof(*istate->cache));
>  	istate->initialized = 1;
> +	istate->update_watches = 1;
>  
>  	if (istate->version == 4)
>  		previous_name = &previous_name_buf;
> @@ -2024,8 +2050,9 @@ int write_index(struct index_state *istate, int newfd)
>  		if (err)
>  			return -1;
>  	}
> -	if (has_watches) {
> -		int id, sz = (entries - removed + 7) / 8;
> +	if (has_watches ||
> +	    (istate->watcher != -1 && !istate->update_watches)) {
> +		int id, sz = (entries - removed + 7) / 8 + 1;
>  		uint8_t *data = xmalloc(sz);
>  		memset(data, 0, sz);
>  		for (i = 0, id = 0; i < entries && has_watches; i++) {
> @@ -2038,6 +2065,7 @@ int write_index(struct index_state *istate, int newfd)
>  			}
>  			id++;
>  		}
> +		data[sz - 1] = istate->update_watches;
>  		err = write_index_ext_header(&c, newfd, CACHE_EXT_WATCH, sz) < 0
>  			|| ce_write(&c, newfd, data, sz) < 0;
>  		free(data);

-- 
Thomas Rast
tr@xxxxxxxxxxxxx
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html




[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]