Re: [PATCH v3 1/1] worktree add: sanitize worktree names

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Mon, Mar 04, 2019 at 06:19:15PM +0700, Duy Nguyen wrote:
> On Wed, Feb 27, 2019 at 11:05 PM Jeff King <peff@xxxxxxxx> wrote:
> >
> > On Wed, Feb 27, 2019 at 09:23:33AM -0500, Eric Sunshine wrote:
> >
> > > > If we just cared about saying "is this worktree name valid", I'd suggest
> > > > actually constructing a sample refname with the worktree name embedded
> > > > in it and feeding that to check_refname_format(). But because you want
> > > > to actually sanitize, I don't think there's an easy way to reuse it.
> > > >
> > > > So this approach is probably the best we can do, though I do still think
> > > > it's worth renaming that function (and/or putting a big warning comment
> > > > in front of it).
> > >
> > > The above arguments seem to suggest the introduction of a companion to
> > > check_refname_format() for sanitizing, perhaps named
> > > sanitize_refname_format(), in ref.[hc]. The potential difficulty with
> > > that is defining exactly what "sanitize" means. Will it be contextual?
> > > (That is, will git-worktree have differently sanitation needs than
> > > some other facility?) If so, perhaps a 'flags' argument could control
> > > how sanitization is done.
> >
> > I agree that sanitize_refname_format() would be nice, but I'm pretty
> > sure it's going to end up having to duplicate many of the rules from
> > check_refname_format(). Which is ugly if the two ever get out of sync.
> >
> > But if we could write it in a way that keeps the actual policy logic in
> > one factored-out portion, I think it would be worth doing.
> 
> I think we could make check_refname_format() returns the bad position
> and several different error codes depending on context. Then
> sanitize_.. can just repeatedly call check_refname_format and fix up
> whatever error it reports. Performance goes straight to hell but I
> don't think that's a big deal for git-worktree, and it keeps
> check_refname_format() simple (relatively speaking).

The new refs.c code would look something like this.
do_check_refname_component() does not look so bad.

-- 8< --
diff --git a/builtin/worktree.c b/builtin/worktree.c
index 21469eb52c..ca63dd3df6 100644
--- a/builtin/worktree.c
+++ b/builtin/worktree.c
@@ -262,36 +262,6 @@ static void validate_worktree_add(const char *path, const struct add_opts *opts)
 	free_worktrees(worktrees);
 }
 
-static void sanitize_worktree_name(struct strbuf *name)
-{
-	struct strbuf sb = STRBUF_INIT;
-	int i;
-
-	for (i = 0; i < name->len; i++) {
-		int ch = name->buf[i];
-
-		if (char_allowed_in_refname(ch))
-			strbuf_addch(&sb, ch);
-		else if (sb.len > 0 && sb.buf[sb.len - 1] != '-')
-			strbuf_addch(&sb, '-');
-	}
-	if (sb.len > 0 && sb.buf[sb.len - 1] == '-')
-		strbuf_setlen(&sb, sb.len - 1);
-	/*
-	 * a worktree name of only special chars would be reduced to
-	 * an empty string
-	 */
-	if (sb.len == 0)
-		strbuf_addstr(&sb, "worktree");
-
-	if (check_refname_format(sb.buf, REFNAME_ALLOW_ONELEVEL))
-		BUG("worktree name '%s' (from '%s') is not a valid refname",
-		    sb.buf, name->buf);
-
-	strbuf_swap(&sb, name);
-	strbuf_release(&sb);
-}
-
 static int add_worktree(const char *path, const char *refname,
 			const struct add_opts *opts)
 {
@@ -322,7 +292,7 @@ static int add_worktree(const char *path, const char *refname,
 
 	name = worktree_basename(path, &len);
 	strbuf_add(&sb_name, name, path + len - name);
-	sanitize_worktree_name(&sb_name);
+	sanitize_worktree_refname(&sb_name);
 	name = sb_name.buf;
 	git_path_buf(&sb_repo, "worktrees/%s", name);
 	len = sb_repo.len;
diff --git a/refs.c b/refs.c
index f23f583db1..2d9730e792 100644
--- a/refs.c
+++ b/refs.c
@@ -63,6 +63,17 @@ int char_allowed_in_refname(int ch)
 		refname_disposition[ch] == 0;
 }
 
+enum check_code {
+	 refname_ok = 0,
+	 refname_contains_dotdot,
+	 refname_contains_atopen,
+	 refname_has_badchar,
+	 refname_contains_wildcard,
+	 refname_starts_with_dot,
+	 refname_ends_with_dotlock,
+	 refname_component_has_zero_length
+};
+
 /*
  * Try to read one refname component from the front of refname.
  * Return the length of the component found, or -1 if the component is
@@ -78,10 +89,11 @@ int char_allowed_in_refname(int ch)
  * - it ends with ".lock", or
  * - it contains a "@{" portion
  */
-static int check_refname_component(const char *refname, int *flags)
+static enum check_code do_check_refname_component(const char *refname, int *flags, const char **cp_out)
 {
 	const char *cp;
 	char last = '\0';
+	enum check_code ret = refname_ok;
 
 	for (cp = refname; ; cp++) {
 		int ch = *cp & 255;
@@ -90,18 +102,26 @@ static int check_refname_component(const char *refname, int *flags)
 		case 1:
 			goto out;
 		case 2:
-			if (last == '.')
-				return -1; /* Refname contains "..". */
+			if (last == '.') {
+				ret = refname_contains_dotdot;
+				goto done;
+			}
 			break;
 		case 3:
-			if (last == '@')
-				return -1; /* Refname contains "@{". */
+			if (last == '@') {
+				ret = refname_contains_atopen; /* @{ */
+				goto done;
+			}
 			break;
 		case 4:
-			return -1;
+			ret = refname_has_badchar;
+			goto done;
 		case 5:
-			if (!(*flags & REFNAME_REFSPEC_PATTERN))
-				return -1; /* refspec can't be a pattern */
+			if (!(*flags & REFNAME_REFSPEC_PATTERN)) {
+				/* refspec can't be a pattern */
+				ret = refname_contains_wildcard;
+				goto done;
+			}
 
 			/*
 			 * Unset the pattern flag so that we only accept
@@ -113,16 +133,67 @@ static int check_refname_component(const char *refname, int *flags)
 		last = ch;
 	}
 out:
-	if (cp == refname)
-		return 0; /* Component has zero length. */
-	if (refname[0] == '.')
-		return -1; /* Component starts with '.'. */
+	if (cp == refname) {
+		ret = refname_component_has_zero_length;
+		goto done;
+	}
+	if (refname[0] == '.') {
+		ret = refname_starts_with_dot;
+		cp = refname;
+		goto done;
+	}
 	if (cp - refname >= LOCK_SUFFIX_LEN &&
-	    !memcmp(cp - LOCK_SUFFIX_LEN, LOCK_SUFFIX, LOCK_SUFFIX_LEN))
-		return -1; /* Refname ends with ".lock". */
+	    !memcmp(cp - LOCK_SUFFIX_LEN, LOCK_SUFFIX, LOCK_SUFFIX_LEN)) {
+		cp -= LOCK_SUFFIX_LEN;
+		ret = refname_ends_with_dotlock;
+	}
+done:
+	*cp_out = cp;
+	return ret;
+}
+
+static int check_refname_component(const char *refname, int *flags)
+{
+	const char *cp;
+	enum check_code ret;
+
+	ret = do_check_refname_component(refname, flags, &cp);
+	if (ret)
+		return -1;
 	return cp - refname;
 }
 
+void sanitize_worktree_refname(struct strbuf *name)
+{
+	int last_length = -1;
+	int flags = 0;
+
+	while (1) {
+		const char *cp;
+
+		enum check_code ret = do_check_refname_component(name->buf, &flags, &cp);
+		if (last_length != -1 && cp - name->buf == last_length)
+			BUG("stuck in infinite loop! pos = %d buf = %s",
+			    last_length, name->buf);
+		last_length = cp - name->buf;
+		switch (ret) {
+		case refname_ok:
+			return;
+		case refname_contains_dotdot:
+		case refname_contains_atopen:
+		case refname_has_badchar:
+		case refname_contains_wildcard:
+		case refname_ends_with_dotlock:
+		case refname_starts_with_dot:
+			name->buf[last_length] = '-';
+			break;
+		case refname_component_has_zero_length:
+			strbuf_addstr(name, "worktree");
+			return;
+		}
+	}
+}
+
 int check_refname_format(const char *refname, int flags)
 {
 	int component_len, component_count = 0;
diff --git a/refs.h b/refs.h
index 61b4073f76..3b65b8d27a 100644
--- a/refs.h
+++ b/refs.h
@@ -459,7 +459,7 @@ int for_each_reflog(each_ref_fn fn, void *cb_data);
  * repeated slashes are accepted.
  */
 int check_refname_format(const char *refname, int flags);
-int char_allowed_in_refname(int ch);
+void sanitize_worktree_refname(struct strbuf *name);
 
 const char *prettify_refname(const char *refname);
 
diff --git a/t/t2025-worktree-add.sh b/t/t2025-worktree-add.sh
index ea22207361..24c574f365 100755
--- a/t/t2025-worktree-add.sh
+++ b/t/t2025-worktree-add.sh
@@ -571,10 +571,10 @@ test_expect_success '"add" an existing locked but missing worktree' '
 '
 
 test_expect_success 'sanitize generated worktree name' '
-	git worktree add --detach ".  weird*..?.lock.lock." &&
-	test -d .git/worktrees/weird-lock-lock &&
+	git worktree add --detach ".  weird*..?.lock.lock" &&
+	test -d .git/worktrees/---weird-.--.lock-lock &&
 	git worktree add --detach .... &&
-	test -d .git/worktrees/worktree
+	test -d .git/worktrees/--.-
 '
 
 test_done
-- 8< --

--
Duy



[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]

  Powered by Linux