[PATCH/WIP v2 06/14] read-cache: get modified file list from file watcher

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



A new command is added to file watcher to send back the list of
updated files to git. These entries will have CE_WATCHED removed. The
remaining CE_WATCHED entries will have CE_VALID set (i.e. no changes
and no lstat either).

The file watcher does not cache stat info and send back to git. Its
main purpose is to reduce lstat on most untouched files, not to
completely eliminate lstat.

The file watcher keeps reporting the same "updated" list until it
receives "forget" commands, which should only be issued after the
updated index is written down. This ensures that if git crashes half
way before it could update the index (or multiple processes is reading
the same index), "updated" info is not lost.

After the index is updated (e.g. in this case because of toggling
CE_WATCHED bits), git sends the new index signature to the file
watcher.

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@xxxxxxxxx>
---
 cache.h        |   1 +
 file-watcher.c |  63 +++++++++++++++++++++++++++++++++---
 read-cache.c   | 100 +++++++++++++++++++++++++++++++++++++++++++++++++++++++--
 3 files changed, 157 insertions(+), 7 deletions(-)

diff --git a/cache.h b/cache.h
index bcec29b..8f065ed 100644
--- a/cache.h
+++ b/cache.h
@@ -284,6 +284,7 @@ struct index_state {
 	struct hashmap dir_hash;
 	unsigned char sha1[20];
 	int watcher;
+	struct string_list *updated_entries;
 };
 
 extern struct index_state the_index;
diff --git a/file-watcher.c b/file-watcher.c
index 3a54168..369af37 100644
--- a/file-watcher.c
+++ b/file-watcher.c
@@ -3,6 +3,7 @@
 #include "parse-options.h"
 #include "exec_cmd.h"
 #include "file-watcher-lib.h"
+#include "string-list.h"
 #include "pkt-line.h"
 
 static const char *const file_watcher_usage[] = {
@@ -11,6 +12,8 @@ static const char *const file_watcher_usage[] = {
 };
 
 static char index_signature[41];
+static struct string_list updated = STRING_LIST_INIT_DUP;
+static int updated_sorted;
 
 static int watch_path(char *path)
 {
@@ -23,6 +26,37 @@ static int watch_path(char *path)
 	return -1;
 }
 
+static void reset(void)
+{
+	string_list_clear(&updated, 0);
+	index_signature[0] = '\0';
+}
+
+static void send_status(int fd, struct sockaddr_un *sun)
+{
+	struct strbuf sb = STRBUF_INIT;
+	int i, size;
+	socklen_t vallen = sizeof(size);
+	if (getsockopt(fd, SOL_SOCKET, SO_SNDBUF, &size, &vallen))
+		die_errno("could not get SO_SNDBUF from socket %d", fd);
+
+	strbuf_grow(&sb, size);
+	strbuf_addstr(&sb, "new ");
+
+	for (i = 0; i < updated.nr; i++) {
+		int len = strlen(updated.items[i].string) + 4;
+		if (sb.len + len >= size) {
+			send_watcher(fd, sun, "%s", sb.buf);
+			strbuf_reset(&sb);
+			strbuf_addstr(&sb, "new ");
+		}
+		packet_buf_write(&sb, "%s", updated.items[i].string);
+	}
+	strbuf_addstr(&sb, "0000");
+	send_watcher(fd, sun, "%s", sb.buf);
+	strbuf_release(&sb);
+}
+
 static void watch_paths(char *buf, int maxlen,
 			int fd, struct sockaddr_un *sock)
 {
@@ -40,6 +74,19 @@ static void watch_paths(char *buf, int maxlen,
 	send_watcher(fd, sock, "fine %d", n);
 }
 
+static void remove_updated(const char *path)
+{
+	struct string_list_item *item;
+	if (!updated_sorted) {
+		sort_string_list(&updated);
+		updated_sorted = 1;
+	}
+	item = string_list_lookup(&updated, path);
+	if (!item)
+		return;
+	unsorted_string_list_delete_item(&updated, item - updated.items, 0);
+}
+
 static int handle_command(int fd)
 {
 	struct sockaddr_un sun;
@@ -53,11 +100,17 @@ static int handle_command(int fd)
 	if ((arg = skip_prefix(msg, "hello "))) {
 		send_watcher(fd, &sun, "hello %s", index_signature);
 		if (strcmp(arg, index_signature))
-			/*
-			 * Index SHA-1 mismatch, something has gone
-			 * wrong. Clean up and start over.
-			 */
-			index_signature[0] = '\0';
+			reset();
+	} else if ((arg = skip_prefix(msg, "clear"))) {
+		reset();
+	} else if (!strcmp(msg, "status")) {
+		send_status(fd, &sun);
+	} else if ((arg = skip_prefix(msg, "bye "))) {
+		strlcpy(index_signature, arg, sizeof(index_signature));
+	} else if ((arg = skip_prefix(msg, "forget "))) {
+		int len = strlen(index_signature);
+		if (!strncmp(arg, index_signature, len) && arg[len] == ' ')
+			remove_updated(arg + len + 1);
 	} else if (starts_with(msg, "watch ")) {
 		watch_paths(msg + 6, len - 6, fd, &sun);
 	} else if (!strcmp(msg, "die")) {
diff --git a/read-cache.c b/read-cache.c
index 406834a..3aa541d 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -1453,6 +1453,69 @@ static struct cache_entry *create_from_disk(struct ondisk_cache_entry *ondisk,
 	return ce;
 }
 
+static void update_watched_files(struct index_state *istate)
+{
+	int i;
+	if (istate->watcher <= 0)
+		return;
+	if (send_watcher(istate->watcher, NULL, "status") < 0)
+		goto failed;
+	for (;;) {
+		char *line, *end;
+		ssize_t len;
+		int ch;
+		line = read_watcher(istate->watcher, &len, NULL);
+		if (!line || !starts_with(line, "new ")) {
+			if (!len) {
+				close(istate->watcher);
+				istate->watcher = -1;
+			}
+			goto failed;
+		}
+		end = line + len;
+		line += 4;
+		for (; line < end; line[len] = ch, line += len) {
+			len = packet_length(line);
+			if (!len)
+				break;
+			ch = line[len];
+			line[len] = '\0';
+			i = index_name_pos(istate, line + 4, len - 4);
+			if (i < 0)
+				continue;
+			if (istate->cache[i]->ce_flags & CE_WATCHED) {
+				istate->cache[i]->ce_flags &= ~CE_WATCHED;
+				istate->cache_changed = 1;
+			}
+			if (!istate->updated_entries) {
+				struct string_list *sl;
+				sl = xmalloc(sizeof(*sl));
+				memset(sl, 0, sizeof(*sl));
+				sl->strdup_strings = 1;
+				istate->updated_entries = sl;
+			}
+			string_list_append(istate->updated_entries, line + 4);
+		}
+		if (!len)
+			break;
+	}
+
+	for (i = 0; i < istate->cache_nr; i++)
+		if (istate->cache[i]->ce_flags & CE_WATCHED)
+			istate->cache[i]->ce_flags |= CE_VALID;
+	return;
+failed:
+	if (istate->updated_entries) {
+		string_list_clear(istate->updated_entries, 0);
+		free(istate->updated_entries);
+		istate->updated_entries = NULL;
+	}
+	send_watcher(istate->watcher, NULL, "clear");
+	for (i = 0; i < istate->cache_nr; i++)
+		istate->cache[i]->ce_flags &= ~CE_WATCHED;
+	istate->cache_changed = 1;
+}
+
 static int watcher_config(const char *var, const char *value, void *data)
 {
 	if (!strcmp(var, "filewatcher.minfiles")) {
@@ -1484,6 +1547,7 @@ static void validate_watcher(struct index_state *istate, const char *path)
 		if (send_watcher(istate->watcher, NULL, "%s", sb.buf) > 0 &&
 		    (msg = read_watcher(istate->watcher, NULL, NULL)) != NULL &&
 		    !strcmp(msg, sb.buf)) { /* good */
+			update_watched_files(istate);
 			strbuf_release(&sb);
 			return;
 		}
@@ -1597,6 +1661,21 @@ static void watch_entries(struct index_state *istate)
 	free(sorted);
 }
 
+static void farewell_watcher(struct index_state *istate,
+			     const unsigned char *sha1)
+{
+	int i;
+	if (istate->watcher <= 0)
+		return;
+	send_watcher(istate->watcher, NULL, "bye %s", sha1_to_hex(sha1));
+	if (!istate->updated_entries)
+		return;
+	for (i = 0; i < istate->updated_entries->nr; i++)
+		send_watcher(istate->watcher, NULL, "forget %s %s",
+			     sha1_to_hex(sha1),
+			     istate->updated_entries->items[i].string);
+}
+
 /* remember to discard_cache() before reading a different cache! */
 int read_index_from(struct index_state *istate, const char *path)
 {
@@ -1718,6 +1797,11 @@ int discard_index(struct index_state *istate)
 	free(istate->cache);
 	istate->cache = NULL;
 	istate->cache_alloc = 0;
+	if (istate->updated_entries) {
+		string_list_clear(istate->updated_entries, 0);
+		free(istate->updated_entries);
+		istate->updated_entries = NULL;
+	}
 	return 0;
 }
 
@@ -1778,7 +1862,7 @@ static int write_index_ext_header(git_SHA_CTX *context, int fd,
 		(ce_write(context, fd, &sz, 4) < 0)) ? -1 : 0;
 }
 
-static int ce_flush(git_SHA_CTX *context, int fd)
+static int ce_flush(git_SHA_CTX *context, int fd, unsigned char *sha1)
 {
 	unsigned int left = write_buffer_len;
 
@@ -1796,6 +1880,8 @@ static int ce_flush(git_SHA_CTX *context, int fd)
 
 	/* Append the SHA1 signature at the end */
 	git_SHA1_Final(write_buffer + left, context);
+	if (sha1)
+		hashcpy(sha1, write_buffer + left);
 	left += 20;
 	return (write_in_full(fd, write_buffer, left) != left) ? -1 : 0;
 }
@@ -1960,12 +2046,21 @@ int write_index(struct index_state *istate, int newfd)
 	int entries = istate->cache_nr;
 	struct stat st;
 	struct strbuf previous_name_buf = STRBUF_INIT, *previous_name;
+	unsigned char sha1[20];
 
 	for (i = removed = extended = 0; i < entries; i++) {
 		if (cache[i]->ce_flags & CE_REMOVE)
 			removed++;
 		else if (cache[i]->ce_flags & CE_WATCHED) {
 			/*
+			 * CE_VALID when used with CE_WATCHED is not
+			 * supposed to be persistent. Next time git
+			 * runs, if this entry is still watched and
+			 * nothing has changed, CE_VALID will be
+			 * reinstated.
+			 */
+			cache[i]->ce_flags &= ~CE_VALID;
+			/*
 			 * We may set CE_WATCHED (but not CE_VALID)
 			 * early when refresh has not been done
 			 * yet. At that time we had no idea if the
@@ -2073,8 +2168,9 @@ int write_index(struct index_state *istate, int newfd)
 			return -1;
 	}
 
-	if (ce_flush(&c, newfd) || fstat(newfd, &st))
+	if (ce_flush(&c, newfd, sha1) || fstat(newfd, &st))
 		return -1;
+	farewell_watcher(istate, sha1);
 	istate->timestamp.sec = (unsigned int)st.st_mtime;
 	istate->timestamp.nsec = ST_MTIME_NSEC(st);
 	return 0;
-- 
1.8.5.1.208.g05b12ea

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html




[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]