Hi Jeff, On Fri, 22 Apr 2022, Jeff Hostetler via GitGitGadget wrote: > From: Jeff Hostetler <jeffhost@xxxxxxxxxxxxx> > > Teach FSMonitor daemon on Windows to recognize shortname paths as > aliases of normal longname paths. FSMonitor clients, such as `git > status`, should receive the longname spelling of changed files (when > possible). > > Sometimes we receive FS events using the shortname, such as when a CMD > shell runs "RENAME GIT~1 FOO" or "RMDIR GIT~1". The FS notification > arrives using whatever combination of long and shortnames were used by > the other process. (Shortnames do seem to be case normalized, > however.) > > Use Windows GetLongPathNameW() to try to map the pathname spelling in > the notification event into the normalized longname spelling. (This > can fail if the file/directory is deleted, moved, or renamed, because > we are asking the FS for the mapping in response to the event and > after it has already happened, but we try.) > > Special case the shortname spelling of ".git" to avoid under-reporting > these events. > > Signed-off-by: Jeff Hostetler <jeffhost@xxxxxxxxxxxxx> > --- > compat/fsmonitor/fsm-listen-win32.c | 363 +++++++++++++++++++++++----- > t/t7527-builtin-fsmonitor.sh | 65 +++++ > 2 files changed, 374 insertions(+), 54 deletions(-) > > diff --git a/compat/fsmonitor/fsm-listen-win32.c b/compat/fsmonitor/fsm-listen-win32.c > index 5b928ab66e5..3f1b68267bd 100644 > --- a/compat/fsmonitor/fsm-listen-win32.c > +++ b/compat/fsmonitor/fsm-listen-win32.c > @@ -25,6 +25,9 @@ struct one_watch > DWORD count; > > struct strbuf path; > + wchar_t wpath_longname[MAX_PATH + 1]; > + DWORD wpath_longname_len; > + > HANDLE hDir; > HANDLE hEvent; > OVERLAPPED overlapped; > @@ -34,6 +37,21 @@ struct one_watch > * need to later call GetOverlappedResult() and possibly CancelIoEx(). > */ > BOOL is_active; > + > + /* > + * Are shortnames enabled on the containing drive? This is > + * always true for "C:/" drives and usually never true for > + * other drives. > + * > + * We only set this for the worktree because we only need to > + * convert shortname paths to longname paths for items we send > + * to clients. (We don't care about shortname expansion for > + * paths inside a GITDIR because we never send them to > + * clients.) > + */ > + BOOL has_shortnames; > + BOOL has_tilda; As much as I am a fan of Tilda Swinson, the thing to which we're referring here is a tilde (with an "e" in the end, not an "a"). > + wchar_t dotgit_shortname[16]; /* for 8.3 name */ > }; > > struct fsmonitor_daemon_backend_data > @@ -51,17 +69,18 @@ struct fsmonitor_daemon_backend_data > }; > > /* > - * Convert the WCHAR path from the notification into UTF8 and > - * then normalize it. > + * Convert the WCHAR path from the event into UTF8 and normalize it. > + * > + * `wpath_len` is in WCHARS not bytes. > */ > -static int normalize_path_in_utf8(FILE_NOTIFY_INFORMATION *info, > +static int normalize_path_in_utf8(wchar_t *wpath, DWORD wpath_len, > struct strbuf *normalized_path) > { > int reserve; > int len = 0; > > strbuf_reset(normalized_path); > - if (!info->FileNameLength) > + if (!wpath_len) > goto normalize; > > /* > @@ -70,12 +89,12 @@ static int normalize_path_in_utf8(FILE_NOTIFY_INFORMATION *info, > * sequence of 2 UTF8 characters. That should let us > * avoid ERROR_INSUFFICIENT_BUFFER 99.9+% of the time. > */ > - reserve = info->FileNameLength + 1; > + reserve = 2 * wpath_len + 1; > strbuf_grow(normalized_path, reserve); > > for (;;) { > - len = WideCharToMultiByte(CP_UTF8, 0, info->FileName, > - info->FileNameLength / sizeof(WCHAR), > + len = WideCharToMultiByte(CP_UTF8, 0, > + wpath, wpath_len, > normalized_path->buf, > strbuf_avail(normalized_path) - 1, > NULL, NULL); > @@ -83,9 +102,7 @@ static int normalize_path_in_utf8(FILE_NOTIFY_INFORMATION *info, > goto normalize; > if (GetLastError() != ERROR_INSUFFICIENT_BUFFER) { > error(_("[GLE %ld] could not convert path to UTF-8: '%.*ls'"), > - GetLastError(), > - (int)(info->FileNameLength / sizeof(WCHAR)), > - info->FileName); > + GetLastError(), (int)wpath_len, wpath); > return -1; > } > > @@ -98,6 +115,152 @@ normalize: > return strbuf_normalize_path(normalized_path); > } > > +/* > + * See if the worktree root directory has shortnames enabled. > + * This will help us decide if we need to do an expensive shortname > + * to longname conversion on every notification event. > + * > + * We do not want to create a file to test this, so we assume that the > + * root directory contains a ".git" file or directory. (Our caller > + * only calls us for the worktree root, so this should be fine.) > + * > + * Remember the spelling of the shortname for ".git" if it exists. > + */ > +static void check_for_shortnames(struct one_watch *watch) > +{ > + wchar_t buf_in[MAX_PATH + 1]; > + wchar_t buf_out[MAX_PATH + 1]; > + wchar_t *last_slash = NULL; > + wchar_t *last_bslash = NULL; > + wchar_t *last; > + > + /* build L"<wt-root-path>/.git" */ > + wcscpy(buf_in, watch->wpath_longname); > + wcscpy(buf_in + watch->wpath_longname_len, L".git"); Could you use `wcsncpy()` here (with the appropriate length designed not to overrun the `buf_in` buffer? Or even better: use `swprintf()` (which has a `count` parameter)? The performance impact should be negligible because we only do this once, right? > + > + if (!GetShortPathNameW(buf_in, buf_out, MAX_PATH)) > + return; > + > + last_slash = wcsrchr(buf_out, L'/'); > + last_bslash = wcsrchr(buf_out, L'\\'); > + if (last_slash > last_bslash) > + last = last_slash + 1; > + else if (last_bslash) > + last = last_bslash + 1; > + else > + last = buf_out; While this is all correct, I would find it clearer to write this as following: for (filename = p = buf_out; *p; p++) /* We can be sure that `buf_out` does not end in a slash */ if (*p == L'/' || *p == '\\') filename = p + 1; > + > + if (!wcscmp(last, L".git")) > + return; > + > + watch->has_shortnames = 1; > + wcsncpy(watch->dotgit_shortname, last, > + ARRAY_SIZE(watch->dotgit_shortname)); > + > + /* > + * The shortname for ".git" is usually of the form "GIT~1", so > + * we should be able to avoid shortname to longname mapping on > + * every notification event if the source string does not > + * contain a "~". > + * > + * However, the documentation for GetLongPathNameW() says > + * that there are filesystems that don't follow that pattern > + * and warns against this optimization. > + * > + * Lets test this. > + */ > + if (wcschr(watch->dotgit_shortname, L'~')) > + watch->has_tilda = 1; > +} > + > +enum get_relative_result { > + GRR_NO_CONVERSION_NEEDED, > + GRR_HAVE_CONVERSION, > + GRR_SHUTDOWN, > +}; > + > +/* > + * Info notification paths are relative to the root of the watch. > + * If our CWD is still at the root, then we can use relative paths > + * to convert from shortnames to longnames. If our process has a > + * different CWD, then we need to construct an absolute path, do > + * the conversion, and then return the root-relative portion. > + * > + * We use the longname form of the root as our basis and assume that > + * it already has a trailing slash. > + * > + * `wpath_len` is in WCHARS not bytes. > + */ > +static enum get_relative_result get_relative_longname( > + struct one_watch *watch, > + const wchar_t *wpath, DWORD wpath_len, > + wchar_t *wpath_longname) > +{ > + wchar_t buf_in[2 * MAX_PATH + 1]; > + wchar_t buf_out[MAX_PATH + 1]; > + DWORD root_len; > + > + /* Build L"<wt-root-path>/<event-rel-path>" */ > + root_len = watch->wpath_longname_len; > + wcsncpy(buf_in, watch->wpath_longname, root_len); > + wcsncpy(buf_in + root_len, wpath, wpath_len); Here, too, I would like to have a check to prevent an overrun. Maybe `swprintf()` again? I guess we could invent `xswprintf()` which would return an error if the return value is -1 or if it used up the entire buffer (i.e. if it overran). > + buf_in[root_len + wpath_len] = 0; > + > + /* > + * We don't actually know if the source pathname is a > + * shortname or a longname. This routine allows either to be > + * given as input. > + */ > + if (!GetLongPathNameW(buf_in, buf_out, MAX_PATH)) { > + /* > + * The shortname to longname conversion can fail for > + * various reasons, for example if the file has been > + * deleted. (That is, if we just received a > + * delete-file notification event and the file is > + * already gone, we can't ask the file system to > + * lookup the longname for it. Likewise, for moves > + * and renames where we are given the old name.) > + * > + * Since deleting or moving a file or directory by its > + * shortname is rather obscure, I'm going ignore the > + * failure and ask the caller to report the original > + * relative path. This seems kinder than failing here > + * and forcing a resync. Besides, forcing a resync on > + * every file/directory delete would effectively > + * cripple monitoring. > + * > + * We might revisit this in the future. > + */ > + return GRR_NO_CONVERSION_NEEDED; > + } > + > + if (!wcscmp(buf_in, buf_out)) { > + /* > + * The path does not have a shortname alias. > + */ > + return GRR_NO_CONVERSION_NEEDED; > + } > + > + if (wcsncmp(buf_in, buf_out, root_len)) { > + /* > + * The spelling of the root directory portion of the computed > + * longname has changed. This should not happen. Basically, > + * it means that we don't know where (without recomputing the > + * longname of just the root directory) to split out the > + * relative path. Since this should not happen, I'm just > + * going to let this fail and force a shutdown (because all > + * subsequent events are probably going to see the same > + * mismatch). > + */ > + return GRR_SHUTDOWN; > + } > + > + /* Return the worktree root-relative portion of the longname. */ > + > + wcscpy(wpath_longname, buf_out + root_len); > + return GRR_HAVE_CONVERSION; > +} > + > void fsm_listen__stop_async(struct fsmonitor_daemon_state *state) > { > SetEvent(state->backend_data->hListener[LISTENER_SHUTDOWN]); > @@ -111,7 +274,9 @@ static struct one_watch *create_watch(struct fsmonitor_daemon_state *state, > DWORD share_mode = > FILE_SHARE_WRITE | FILE_SHARE_READ | FILE_SHARE_DELETE; > HANDLE hDir; > - wchar_t wpath[MAX_PATH]; > + DWORD len_longname; > + wchar_t wpath[MAX_PATH + 1]; > + wchar_t wpath_longname[MAX_PATH + 1]; > > if (xutftowcs_path(wpath, path) < 0) { > error(_("could not convert to wide characters: '%s'"), path); > @@ -128,6 +293,20 @@ static struct one_watch *create_watch(struct fsmonitor_daemon_state *state, > return NULL; > } > > + if (!GetLongPathNameW(wpath, wpath_longname, MAX_PATH)) { > + error(_("[GLE %ld] could not get longname of '%s'"), > + GetLastError(), path); > + CloseHandle(hDir); > + return NULL; > + } > + > + len_longname = wcslen(wpath_longname); Let's assign the return value of `GetLongPathNameW()` to `len_longname`, in case of success it contains the number of characters, too. The rest of the patch looks good to me! Thank you, Dscho > + if (wpath_longname[len_longname - 1] != L'/' && > + wpath_longname[len_longname - 1] != L'\\') { > + wpath_longname[len_longname++] = L'/'; > + wpath_longname[len_longname] = 0; > + } > + > CALLOC_ARRAY(watch, 1); > > watch->buf_len = sizeof(watch->buffer); /* assume full MAX_RDCW_BUF */ > @@ -135,6 +314,9 @@ static struct one_watch *create_watch(struct fsmonitor_daemon_state *state, > strbuf_init(&watch->path, 0); > strbuf_addstr(&watch->path, path); > > + wcscpy(watch->wpath_longname, wpath_longname); > + watch->wpath_longname_len = len_longname; > + > watch->hDir = hDir; > watch->hEvent = CreateEvent(NULL, TRUE, FALSE, NULL); > > @@ -258,6 +440,62 @@ static void cancel_rdcw_watch(struct one_watch *watch) > watch->is_active = FALSE; > } > > +/* > + * Process a single relative pathname event. > + * Return 1 if we should shutdown. > + */ > +static int process_1_worktree_event( > + struct string_list *cookie_list, > + struct fsmonitor_batch **batch, > + const struct strbuf *path, > + enum fsmonitor_path_type t, > + DWORD info_action) > +{ > + const char *slash; > + > + switch (t) { > + case IS_INSIDE_DOT_GIT_WITH_COOKIE_PREFIX: > + /* special case cookie files within .git */ > + > + /* Use just the filename of the cookie file. */ > + slash = find_last_dir_sep(path->buf); > + string_list_append(cookie_list, > + slash ? slash + 1 : path->buf); > + break; > + > + case IS_INSIDE_DOT_GIT: > + /* ignore everything inside of "<worktree>/.git/" */ > + break; > + > + case IS_DOT_GIT: > + /* "<worktree>/.git" was deleted (or renamed away) */ > + if ((info_action == FILE_ACTION_REMOVED) || > + (info_action == FILE_ACTION_RENAMED_OLD_NAME)) { > + trace2_data_string("fsmonitor", NULL, > + "fsm-listen/dotgit", > + "removed"); > + return 1; > + } > + break; > + > + case IS_WORKDIR_PATH: > + /* queue normal pathname */ > + if (!*batch) > + *batch = fsmonitor_batch__new(); > + fsmonitor_batch__add_path(*batch, path->buf); > + break; > + > + case IS_GITDIR: > + case IS_INSIDE_GITDIR: > + case IS_INSIDE_GITDIR_WITH_COOKIE_PREFIX: > + default: > + BUG("unexpected path classification '%d' for '%s'", > + t, path->buf); > + } > + > + return 0; > +} > + > /* > * Process filesystem events that happen anywhere (recursively) under the > * <worktree> root directory. For a normal working directory, this includes > @@ -274,6 +512,7 @@ static int process_worktree_events(struct fsmonitor_daemon_state *state) > struct string_list cookie_list = STRING_LIST_INIT_DUP; > struct fsmonitor_batch *batch = NULL; > const char *p = watch->buffer; > + wchar_t wpath_longname[MAX_PATH + 1]; > > /* > * If the kernel gets more events than will fit in the kernel > @@ -306,54 +545,63 @@ static int process_worktree_events(struct fsmonitor_daemon_state *state) > */ > for (;;) { > FILE_NOTIFY_INFORMATION *info = (void *)p; > - const char *slash; > + wchar_t *wpath = info->FileName; > + DWORD wpath_len = info->FileNameLength / sizeof(WCHAR); > enum fsmonitor_path_type t; > + enum get_relative_result grr; > + > + if (watch->has_shortnames) { > + if (!wcscmp(wpath, watch->dotgit_shortname)) { > + /* > + * This event exactly matches the > + * spelling of the shortname of > + * ".git", so we can skip some steps. > + * > + * (This case is odd because the user > + * can "rm -rf GIT~1" and we cannot > + * use the filesystem to map it back > + * to ".git".) > + */ > + strbuf_reset(&path); > + strbuf_addstr(&path, ".git"); > + t = IS_DOT_GIT; > + goto process_it; > + } > > - strbuf_reset(&path); > - if (normalize_path_in_utf8(info, &path) == -1) > - goto skip_this_path; > - > - t = fsmonitor_classify_path_workdir_relative(path.buf); > - > - switch (t) { > - case IS_INSIDE_DOT_GIT_WITH_COOKIE_PREFIX: > - /* special case cookie files within .git */ > - > - /* Use just the filename of the cookie file. */ > - slash = find_last_dir_sep(path.buf); > - string_list_append(&cookie_list, > - slash ? slash + 1 : path.buf); > - break; > - > - case IS_INSIDE_DOT_GIT: > - /* ignore everything inside of "<worktree>/.git/" */ > - break; > + if (watch->has_tilda && !wcschr(wpath, L'~')) { > + /* > + * Shortnames on this filesystem have tildas > + * and the notification path does not have > + * one, so we assume that it is a longname. > + */ > + goto normalize_it; > + } > > - case IS_DOT_GIT: > - /* "<worktree>/.git" was deleted (or renamed away) */ > - if ((info->Action == FILE_ACTION_REMOVED) || > - (info->Action == FILE_ACTION_RENAMED_OLD_NAME)) { > - trace2_data_string("fsmonitor", NULL, > - "fsm-listen/dotgit", > - "removed"); > + grr = get_relative_longname(watch, wpath, wpath_len, > + wpath_longname); > + switch (grr) { > + case GRR_NO_CONVERSION_NEEDED: /* use info buffer as is */ > + break; > + case GRR_HAVE_CONVERSION: > + wpath = wpath_longname; > + wpath_len = wcslen(wpath); > + break; > + default: > + case GRR_SHUTDOWN: > goto force_shutdown; > } > - break; > + } > > - case IS_WORKDIR_PATH: > - /* queue normal pathname */ > - if (!batch) > - batch = fsmonitor_batch__new(); > - fsmonitor_batch__add_path(batch, path.buf); > - break; > +normalize_it: > + if (normalize_path_in_utf8(wpath, wpath_len, &path) == -1) > + goto skip_this_path; > > - case IS_GITDIR: > - case IS_INSIDE_GITDIR: > - case IS_INSIDE_GITDIR_WITH_COOKIE_PREFIX: > - default: > - BUG("unexpected path classification '%d' for '%s'", > - t, path.buf); > - } > + t = fsmonitor_classify_path_workdir_relative(path.buf); > + > +process_it: > + if (process_1_worktree_event(&cookie_list, &batch, &path, t, > + info->Action)) > + goto force_shutdown; > > skip_this_path: > if (!info->NextEntryOffset) > @@ -382,6 +630,9 @@ force_shutdown: > * Note that we DO NOT get filesystem events on the external <gitdir> > * itself (it is not inside something that we are watching). In particular, > * we do not get an event if the external <gitdir> is deleted. > + * > + * Also, we do not care about shortnames within the external <gitdir>, since > + * we never send these paths to clients. > */ > static int process_gitdir_events(struct fsmonitor_daemon_state *state) > { > @@ -403,8 +654,10 @@ static int process_gitdir_events(struct fsmonitor_daemon_state *state) > const char *slash; > enum fsmonitor_path_type t; > > - strbuf_reset(&path); > - if (normalize_path_in_utf8(info, &path) == -1) > + if (normalize_path_in_utf8( > + info->FileName, > + info->FileNameLength / sizeof(WCHAR), > + &path) == -1) > goto skip_this_path; > > t = fsmonitor_classify_path_gitdir_relative(path.buf); > @@ -538,6 +791,8 @@ int fsm_listen__ctor(struct fsmonitor_daemon_state *state) > if (!data->watch_worktree) > goto failed; > > + check_for_shortnames(data->watch_worktree); > + > if (state->nr_paths_watching > 1) { > data->watch_gitdir = create_watch(state, > state->path_gitdir_watch.buf); > diff --git a/t/t7527-builtin-fsmonitor.sh b/t/t7527-builtin-fsmonitor.sh > index bd0c952a116..1be21785162 100755 > --- a/t/t7527-builtin-fsmonitor.sh > +++ b/t/t7527-builtin-fsmonitor.sh > @@ -166,6 +166,71 @@ test_expect_success 'implicit daemon stop (rename .git)' ' > test_must_fail git -C test_implicit_2 fsmonitor--daemon status > ' > > +# File systems on Windows may or may not have shortnames. > +# This is a volume-specific setting on modern systems. > +# "C:/" drives are required to have them enabled. Other > +# hard drives default to disabled. > +# > +# This is a crude test to see if shortnames are enabled > +# on the volume containing the test directory. It is > +# crude, but it does not require elevation like `fsutil`. > +# > +test_lazy_prereq SHORTNAMES ' > + mkdir .foo && > + test -d "FOO~1" > +' > + > +# Here we assume that the shortname of ".git" is "GIT~1". > +test_expect_success MINGW,SHORTNAMES 'implicit daemon stop (rename GIT~1)' ' > + test_when_finished "stop_daemon_delete_repo test_implicit_1s" && > + > + git init test_implicit_1s && > + > + start_daemon -C test_implicit_1s && > + > + # renaming the .git directory will implicitly stop the daemon. > + # this moves {.git, GIT~1} to {.gitxyz, GITXYZ~1}. > + # the rename-from FS Event will contain the shortname. > + # > + mv test_implicit_1s/GIT~1 test_implicit_1s/.gitxyz && > + > + sleep 1 && > + # put it back so that our status will not crawl out to our > + # parent directory. > + # this moves {.gitxyz, GITXYZ~1} to {.git, GIT~1}. > + mv test_implicit_1s/.gitxyz test_implicit_1s/.git && > + > + test_must_fail git -C test_implicit_1s fsmonitor--daemon status > +' > + > +# Here we first create a file with LONGNAME of "GIT~1" before > +# we create the repo. This will cause the shortname of ".git" > +# to be "GIT~2". > +test_expect_success MINGW,SHORTNAMES 'implicit daemon stop (rename GIT~2)' ' > + test_when_finished "stop_daemon_delete_repo test_implicit_1s2" && > + > + mkdir test_implicit_1s2 && > + echo HELLO >test_implicit_1s2/GIT~1 && > + git init test_implicit_1s2 && > + > + test_path_is_file test_implicit_1s2/GIT~1 && > + test_path_is_dir test_implicit_1s2/GIT~2 && > + > + start_daemon -C test_implicit_1s2 && > + > + # renaming the .git directory will implicitly stop the daemon. > + # the rename-from FS Event will contain the shortname. > + # > + mv test_implicit_1s2/GIT~2 test_implicit_1s2/.gitxyz && > + > + sleep 1 && > + # put it back so that our status will not crawl out to our > + # parent directory. > + mv test_implicit_1s2/.gitxyz test_implicit_1s2/.git && > + > + test_must_fail git -C test_implicit_1s2 fsmonitor--daemon status > +' > + > test_expect_success 'cannot start multiple daemons' ' > test_when_finished "stop_daemon_delete_repo test_multiple" && > > -- > gitgitgadget > >