On Mon, Mar 04, 2019 at 06:19:15PM +0700, Duy Nguyen wrote: > On Wed, Feb 27, 2019 at 11:05 PM Jeff King <peff@xxxxxxxx> wrote: > > > > On Wed, Feb 27, 2019 at 09:23:33AM -0500, Eric Sunshine wrote: > > > > > > If we just cared about saying "is this worktree name valid", I'd suggest > > > > actually constructing a sample refname with the worktree name embedded > > > > in it and feeding that to check_refname_format(). But because you want > > > > to actually sanitize, I don't think there's an easy way to reuse it. > > > > > > > > So this approach is probably the best we can do, though I do still think > > > > it's worth renaming that function (and/or putting a big warning comment > > > > in front of it). > > > > > > The above arguments seem to suggest the introduction of a companion to > > > check_refname_format() for sanitizing, perhaps named > > > sanitize_refname_format(), in ref.[hc]. The potential difficulty with > > > that is defining exactly what "sanitize" means. Will it be contextual? > > > (That is, will git-worktree have differently sanitation needs than > > > some other facility?) If so, perhaps a 'flags' argument could control > > > how sanitization is done. > > > > I agree that sanitize_refname_format() would be nice, but I'm pretty > > sure it's going to end up having to duplicate many of the rules from > > check_refname_format(). Which is ugly if the two ever get out of sync. > > > > But if we could write it in a way that keeps the actual policy logic in > > one factored-out portion, I think it would be worth doing. > > I think we could make check_refname_format() returns the bad position > and several different error codes depending on context. Then > sanitize_.. can just repeatedly call check_refname_format and fix up > whatever error it reports. Performance goes straight to hell but I > don't think that's a big deal for git-worktree, and it keeps > check_refname_format() simple (relatively speaking). The new refs.c code would look something like this. do_check_refname_component() does not look so bad. -- 8< -- diff --git a/builtin/worktree.c b/builtin/worktree.c index 21469eb52c..ca63dd3df6 100644 --- a/builtin/worktree.c +++ b/builtin/worktree.c @@ -262,36 +262,6 @@ static void validate_worktree_add(const char *path, const struct add_opts *opts) free_worktrees(worktrees); } -static void sanitize_worktree_name(struct strbuf *name) -{ - struct strbuf sb = STRBUF_INIT; - int i; - - for (i = 0; i < name->len; i++) { - int ch = name->buf[i]; - - if (char_allowed_in_refname(ch)) - strbuf_addch(&sb, ch); - else if (sb.len > 0 && sb.buf[sb.len - 1] != '-') - strbuf_addch(&sb, '-'); - } - if (sb.len > 0 && sb.buf[sb.len - 1] == '-') - strbuf_setlen(&sb, sb.len - 1); - /* - * a worktree name of only special chars would be reduced to - * an empty string - */ - if (sb.len == 0) - strbuf_addstr(&sb, "worktree"); - - if (check_refname_format(sb.buf, REFNAME_ALLOW_ONELEVEL)) - BUG("worktree name '%s' (from '%s') is not a valid refname", - sb.buf, name->buf); - - strbuf_swap(&sb, name); - strbuf_release(&sb); -} - static int add_worktree(const char *path, const char *refname, const struct add_opts *opts) { @@ -322,7 +292,7 @@ static int add_worktree(const char *path, const char *refname, name = worktree_basename(path, &len); strbuf_add(&sb_name, name, path + len - name); - sanitize_worktree_name(&sb_name); + sanitize_worktree_refname(&sb_name); name = sb_name.buf; git_path_buf(&sb_repo, "worktrees/%s", name); len = sb_repo.len; diff --git a/refs.c b/refs.c index f23f583db1..2d9730e792 100644 --- a/refs.c +++ b/refs.c @@ -63,6 +63,17 @@ int char_allowed_in_refname(int ch) refname_disposition[ch] == 0; } +enum check_code { + refname_ok = 0, + refname_contains_dotdot, + refname_contains_atopen, + refname_has_badchar, + refname_contains_wildcard, + refname_starts_with_dot, + refname_ends_with_dotlock, + refname_component_has_zero_length +}; + /* * Try to read one refname component from the front of refname. * Return the length of the component found, or -1 if the component is @@ -78,10 +89,11 @@ int char_allowed_in_refname(int ch) * - it ends with ".lock", or * - it contains a "@{" portion */ -static int check_refname_component(const char *refname, int *flags) +static enum check_code do_check_refname_component(const char *refname, int *flags, const char **cp_out) { const char *cp; char last = '\0'; + enum check_code ret = refname_ok; for (cp = refname; ; cp++) { int ch = *cp & 255; @@ -90,18 +102,26 @@ static int check_refname_component(const char *refname, int *flags) case 1: goto out; case 2: - if (last == '.') - return -1; /* Refname contains "..". */ + if (last == '.') { + ret = refname_contains_dotdot; + goto done; + } break; case 3: - if (last == '@') - return -1; /* Refname contains "@{". */ + if (last == '@') { + ret = refname_contains_atopen; /* @{ */ + goto done; + } break; case 4: - return -1; + ret = refname_has_badchar; + goto done; case 5: - if (!(*flags & REFNAME_REFSPEC_PATTERN)) - return -1; /* refspec can't be a pattern */ + if (!(*flags & REFNAME_REFSPEC_PATTERN)) { + /* refspec can't be a pattern */ + ret = refname_contains_wildcard; + goto done; + } /* * Unset the pattern flag so that we only accept @@ -113,16 +133,67 @@ static int check_refname_component(const char *refname, int *flags) last = ch; } out: - if (cp == refname) - return 0; /* Component has zero length. */ - if (refname[0] == '.') - return -1; /* Component starts with '.'. */ + if (cp == refname) { + ret = refname_component_has_zero_length; + goto done; + } + if (refname[0] == '.') { + ret = refname_starts_with_dot; + cp = refname; + goto done; + } if (cp - refname >= LOCK_SUFFIX_LEN && - !memcmp(cp - LOCK_SUFFIX_LEN, LOCK_SUFFIX, LOCK_SUFFIX_LEN)) - return -1; /* Refname ends with ".lock". */ + !memcmp(cp - LOCK_SUFFIX_LEN, LOCK_SUFFIX, LOCK_SUFFIX_LEN)) { + cp -= LOCK_SUFFIX_LEN; + ret = refname_ends_with_dotlock; + } +done: + *cp_out = cp; + return ret; +} + +static int check_refname_component(const char *refname, int *flags) +{ + const char *cp; + enum check_code ret; + + ret = do_check_refname_component(refname, flags, &cp); + if (ret) + return -1; return cp - refname; } +void sanitize_worktree_refname(struct strbuf *name) +{ + int last_length = -1; + int flags = 0; + + while (1) { + const char *cp; + + enum check_code ret = do_check_refname_component(name->buf, &flags, &cp); + if (last_length != -1 && cp - name->buf == last_length) + BUG("stuck in infinite loop! pos = %d buf = %s", + last_length, name->buf); + last_length = cp - name->buf; + switch (ret) { + case refname_ok: + return; + case refname_contains_dotdot: + case refname_contains_atopen: + case refname_has_badchar: + case refname_contains_wildcard: + case refname_ends_with_dotlock: + case refname_starts_with_dot: + name->buf[last_length] = '-'; + break; + case refname_component_has_zero_length: + strbuf_addstr(name, "worktree"); + return; + } + } +} + int check_refname_format(const char *refname, int flags) { int component_len, component_count = 0; diff --git a/refs.h b/refs.h index 61b4073f76..3b65b8d27a 100644 --- a/refs.h +++ b/refs.h @@ -459,7 +459,7 @@ int for_each_reflog(each_ref_fn fn, void *cb_data); * repeated slashes are accepted. */ int check_refname_format(const char *refname, int flags); -int char_allowed_in_refname(int ch); +void sanitize_worktree_refname(struct strbuf *name); const char *prettify_refname(const char *refname); diff --git a/t/t2025-worktree-add.sh b/t/t2025-worktree-add.sh index ea22207361..24c574f365 100755 --- a/t/t2025-worktree-add.sh +++ b/t/t2025-worktree-add.sh @@ -571,10 +571,10 @@ test_expect_success '"add" an existing locked but missing worktree' ' ' test_expect_success 'sanitize generated worktree name' ' - git worktree add --detach ". weird*..?.lock.lock." && - test -d .git/worktrees/weird-lock-lock && + git worktree add --detach ". weird*..?.lock.lock" && + test -d .git/worktrees/---weird-.--.lock-lock && git worktree add --detach .... && - test -d .git/worktrees/worktree + test -d .git/worktrees/--.- ' test_done -- 8< -- -- Duy