Re: [PATCH v4 3/6] fsmonitor: implement filesystem change listener for Linux

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Wed, Nov 23 2022, Eric DeCosta via GitGitGadget wrote:

> From: Eric DeCosta <edecosta@xxxxxxxxxxxxx>
> [...]
> +}
> +
> +/*
> + * Remove the inotify watch, the watch descriptor to path mapping
> + * and the reverse mapping.
> + */
> +static void remove_watch(struct watch_entry *w,
> +	struct fsm_listen_data *data)
> +{
> +	struct watch_entry k1, k2, *w1, *w2;
> +
> +	/* remove watch, ignore error if kernel already did it */
> +	if (inotify_rm_watch(data->fd_inotify, w->wd) && errno != EINVAL)
> +		error_errno("inotify_rm_watch() failed");

If we error_errno(), shouldn't this function have a return value?

> +
> +	hashmap_entry_init(&k1.ent, memhash(&w->wd, sizeof(int)));
> +	w1 = hashmap_remove_entry(&data->watches, &k1, ent, NULL);
> +	if (!w1)
> +		BUG("Double remove of watch for '%s'", w->dir);
> +
> +	if (w1->cookie)
> +		BUG("Removing watch for '%s' which has a pending rename", w1->dir);
> +
> +	hashmap_entry_init(&k2.ent, memhash(w->dir, strlen(w->dir)));
> +	w2 = hashmap_remove_entry(&data->revwatches, &k2, ent, NULL);
> +	if (!w2)
> +		BUG("Double remove of reverse watch for '%s'", w->dir);

For the BUG() additions: Start with lower-case in messages, see
CodingGuidelines. I.e. "double remove of ..." etc. Ditto below..

> [...]
> +/*
> + * Handle directory renames
> + *
> + * Once a IN_MOVED_TO event is received, lookup the rename tracking information
> + * via the event cookie and use this information to update the watch.
> + */
> +static void rename_dir(uint32_t cookie, const char *path,
> +	struct fsm_listen_data *data)
> +{
> +	struct rename_entry rek, *re;
> +	struct watch_entry k, *w;
> +
> +	/* lookup a pending rename to match */
> +	rek.cookie = cookie;
> +	hashmap_entry_init(&rek.ent, memhash(&rek.cookie, sizeof(uint32_t)));
> +	re = hashmap_get_entry(&data->renames, &rek, ent, NULL);
> +	if (re) {
> +		k.dir = re->dir;
> +		hashmap_entry_init(&k.ent, memhash(k.dir, strlen(k.dir)));
> +		w = hashmap_get_entry(&data->revwatches, &k, ent, NULL);
> +		if (w) {
> +			w->cookie = 0; /* rename handled */
> +			remove_watch(w, data);
> +			add_watch(path, data);

Elsewhere you check the add_watch() return value, but not here?

> +		} else {
> +			BUG("No matching watch");
> +		}
> +	} else {
> +		BUG("No matching cookie");
> +	}
> +}
> +
> +/*
> + * Recursively add watches to every directory under path
> + */
> +static int register_inotify(const char *path,
> +	struct fsmonitor_daemon_state *state,
> +	struct fsmonitor_batch *batch)
> +{
> +	DIR *dir;
> +	const char *rel;
> +	struct strbuf current = STRBUF_INIT;
> +	struct dirent *de;
> +	struct stat fs;
> +	int ret = -1;
> +
> +	dir = opendir(path);
> +	if (!dir)
> +		return error_errno("opendir('%s') failed", path);
> +
> +	while ((de = readdir_skip_dot_and_dotdot(dir)) != NULL) {

ditto drop the "!= NULL"

> +	struct strbuf msg = STRBUF_INIT;
> +
> +	if (mask & IN_ACCESS)
> +		strbuf_addstr(&msg, "IN_ACCESS|");
> +	if (mask & IN_MODIFY)
> +		strbuf_addstr(&msg, "IN_MODIFY|");
> +	if (mask & IN_ATTRIB)
> +		strbuf_addstr(&msg, "IN_ATTRIB|");
> +	if (mask & IN_CLOSE_WRITE)
> +		strbuf_addstr(&msg, "IN_CLOSE_WRITE|");
> +	if (mask & IN_CLOSE_NOWRITE)
> +		strbuf_addstr(&msg, "IN_CLOSE_NOWRITE|");
> +	if (mask & IN_OPEN)
> +		strbuf_addstr(&msg, "IN_OPEN|");
> +	if (mask & IN_MOVED_FROM)
> +		strbuf_addstr(&msg, "IN_MOVED_FROM|");
> +	if (mask & IN_MOVED_TO)
> +		strbuf_addstr(&msg, "IN_MOVED_TO|");
> +	if (mask & IN_CREATE)
> +		strbuf_addstr(&msg, "IN_CREATE|");
> +	if (mask & IN_DELETE)
> +		strbuf_addstr(&msg, "IN_DELETE|");
> +	if (mask & IN_DELETE_SELF)
> +		strbuf_addstr(&msg, "IN_DELETE_SELF|");
> +	if (mask & IN_MOVE_SELF)
> +		strbuf_addstr(&msg, "IN_MOVE_SELF|");
> +	if (mask & IN_UNMOUNT)
> +		strbuf_addstr(&msg, "IN_UNMOUNT|");
> +	if (mask & IN_Q_OVERFLOW)
> +		strbuf_addstr(&msg, "IN_Q_OVERFLOW|");
> +	if (mask & IN_IGNORED)
> +		strbuf_addstr(&msg, "IN_IGNORED|");
> +	if (mask & IN_ISDIR)
> +		strbuf_addstr(&msg, "IN_ISDIR|");

Shouldn't the very last addition to mask omit the "|"?

I think it would be worth just making this a NULL-delimited "int, const char *" array like:

	{
		{ IN_ACCESS, "IN_ACCESS" },
		...
		NULL,
	};

Then looping over it, or maybe not...

> +	trace_printf_key(&trace_fsmonitor, "inotify_event: '%s', mask=%#8.8x %s",
> +				path, mask, msg.buf);
> +
> +	strbuf_release(&msg);
> +}
> +
> +int fsm_listen__ctor(struct fsmonitor_daemon_state *state)
> +{
> +	int fd;
> +	int ret = 0;
> +	struct fsm_listen_data *data;
> +
> +	CALLOC_ARRAY(data, 1);
> +	state->listen_data = data;
> +	state->listen_error_code = -1;
> +	data->shutdown = SHUTDOWN_ERROR;
> +
> +	fd = inotify_init1(O_NONBLOCK);
> +	if (fd < 0)
> +		return error_errno("inotify_init1() failed");

Here you leak the "data" you just allocated on error.

> +
> +	data->fd_inotify = fd;
> +
> +	hashmap_init(&data->watches, watch_entry_cmp, NULL, 0);
> +	hashmap_init(&data->renames, rename_entry_cmp, NULL, 0);
> +	hashmap_init(&data->revwatches, revwatches_entry_cmp, NULL, 0);
> +
> +	if (add_watch(state->path_worktree_watch.buf, data))

I.e. we should only avoid free()-ing it if we can successfully hand it
over to add_watch(), shouldn't we?n

> +		ret = -1;
> +	else if (register_inotify(state->path_worktree_watch.buf, state, NULL))
> +		ret = -1;

We add {}'s to all if/else if branches if one needs it, see CodingGuidelines.
> +	else if (state->nr_paths_watching > 1) {
> +		if (add_watch(state->path_gitdir_watch.buf, data))
> +			ret = -1;
> +		else if (register_inotify(state->path_gitdir_watch.buf, state, NULL))
> +			ret = -1;

Can't this be:

	else if (state->nr_paths_watching > 1 &&
		 (add_watch(...) || register_inotify(...)))
		ret = -1;

> +	switch (fsmonitor_classify_path_absolute(state, path)) {
> +		case IS_INSIDE_DOT_GIT_WITH_COOKIE_PREFIX:

Don't indent the "case" label here more than the "switch", see
CodingGuidelines.


> +static void handle_events(struct fsmonitor_daemon_state *state)
> +{
> +	 /* See https://man7.org/linux/man-pages/man7/inotify.7.html */

Extra " " here after the "\t".

> +	char buf[4096]
> +		__attribute__ ((aligned(__alignof__(struct inotify_event))));
> +
> +	struct hashmap watches = state->listen_data->watches;
> +	struct fsmonitor_batch *batch = NULL;
> +	struct string_list cookie_list = STRING_LIST_INIT_DUP;
> +	struct watch_entry k, *w;
> +	struct strbuf path;
> +	const struct inotify_event *event;
> +	int fd = state->listen_data->fd_inotify;
> +	ssize_t len;
> +	char *ptr, *p;
> +
> +	strbuf_init(&path, PATH_MAX);

Just use "struct strbuf path = STRBUF_INIT" instead? I.e...
> +
> +	for(;;) {
> +		len = read(fd, buf, sizeof(buf));
> +		if (len == -1 && errno != EAGAIN) {
> +			error_errno(_("reading inotify message stream failed"));
> +			state->listen_data->shutdown = SHUTDOWN_ERROR;
> +			goto done;
> +		}
> +
> +		/* nothing to read */
> +		if (len <= 0)
> +			goto done;
> +
> +		/* Loop over all events in the buffer. */
> +		for (ptr = buf; ptr < buf + len;
> +			 ptr += sizeof(struct inotify_event) + event->len) {
> +
> +			event = (const struct inotify_event *) ptr;
> +
> +			if (em_ignore(event->mask))
> +				continue;
> +
> +			/* File system was unmounted or event queue overflowed */
> +			if (em_force_shutdown(event->mask)) {
> +				if (trace_pass_fl(&trace_fsmonitor))
> +					log_mask_set("Forcing shutdown", event->mask);
> +				state->listen_data->shutdown = SHUTDOWN_FORCE;
> +				goto done;
> +			}
> +
> +			hashmap_entry_init(&k.ent, memhash(&event->wd, sizeof(int)));
> +			k.wd = event->wd;
> +
> +			w = hashmap_get_entry(&watches, &k, ent, NULL);
> +			if (!w) /* should never happen */
> +				BUG("No watch for '%s'", event->name);
> +
> +			/* directory watch was removed */
> +			if (em_remove_watch(event->mask)) {
> +				remove_watch(w, state->listen_data);
> +				continue;
> +			}
> +
> +			strbuf_reset(&path);
> +			strbuf_add(&path, w->dir, strlen(w->dir));
> +			strbuf_addch(&path, '/');
> +			strbuf_addstr(&path, event->name);

... we may not even get to this, so we may pointlessly pre-grow it, if
we're considering the micro-optimization.

But if we do need it it'll quickly get up to size in the loop, which is
probably smaller than PATH_MAX, and paths can exceed PATH_MAX....

> +	for(;;) {
> +		switch (state->listen_data->shutdown) {
> +			case SHUTDOWN_CONTINUE:
> +				poll_num = poll(fds, 1, 1);
> +				if (poll_num == -1) {
> +					if (errno == EINTR)
> +						continue;
> +					error_errno(_("polling inotify message stream failed"));
> +					state->listen_data->shutdown = SHUTDOWN_ERROR;
> +					continue;
> +				}
> +
> +				if ((time(NULL) - checked) >= interval) {
> +					checked = time(NULL);

As this is linux-specific code, shouldn't we use the linux-specific API
to get the guaranteed atomically growing time here, arther than time()?

Maybe not, but I wonder if this has funny side-effects with ntpd
adjusting the time concurrently...



[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]

  Powered by Linux