[PATCH v2 1/6] string-list: introduce `string_list_split_in_place_multi()`

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Introduce a variant of the `string_list_split_in_place()` function that
takes a string of accepted delimiters.

By contrast to its cousin `string_list_split_in_place()` which splits
the given string at every instance of the single character `delim`, the
`_multi` variant splits the given string any any character appearing in
the string `delim`.

Like `strtok()`, the `_multi` variant skips past sequential delimiting
characters. For example:

    string_list_split_in_place(&xs, xstrdup("foo::bar::baz"), ":", -1);

would place in `xs` the elements "foo", "bar", and "baz".

Instead of using `strchr(2)` to locate the first occurrence of the given
delimiter character, `string_list_split_in_place_multi()` uses
`strcspn(2)` to move past the initial segment of characters comprised of
any characters in the delimiting set.

When only a single delimiting character is provided, `strcspn(2)` has
equivalent performance to `strchr(2)`. Modern `strcspn(2)`
implementations treat an empty delimiter or the singleton delimiter as a
special case and fall back to calling strchrnul(). Both glibc[1] and
musl[2] implement `strcspn(2)` this way.

Since the `_multi` variant is a generalization of the original
implementation, reimplement `string_list_split_in_place()` in terms of
the more general function by providing a single-character string for the
list of accepted delimiters.

To avoid regressions, update t0063 in this patch as well. Any "common"
test cases (i.e., those that produce the same result whether you call
`string_list_split()` or `string_list_split_in_place_multi()`) are
grouped into a loop which is parameterized over the function to test.

Any cases which aren't common (of which there is one existing case, and
a handful of new ones added which are specific to the `_multi` variant)
are tested independently.

[1]: https://sourceware.org/git/?p=glibc.git;a=blob;f=string/strcspn.c;hb=glibc-2.37#l35
[2]: https://git.musl-libc.org/cgit/musl/tree/src/string/strcspn.c?h=v1.2.3#n11

Signed-off-by: Taylor Blau <me@xxxxxxxxxxxx>
---
 string-list.c               |  26 +++++++--
 string-list.h               |   7 +++
 t/helper/test-string-list.c |  15 ++++++
 t/t0063-string-list.sh      | 105 +++++++++++++++++++++++++-----------
 4 files changed, 118 insertions(+), 35 deletions(-)

diff --git a/string-list.c b/string-list.c
index db473f273e..b27a53f2e1 100644
--- a/string-list.c
+++ b/string-list.c
@@ -300,8 +300,9 @@ int string_list_split(struct string_list *list, const char *string,
 	}
 }
 
-int string_list_split_in_place(struct string_list *list, char *string,
-			       int delim, int maxsplit)
+static int string_list_split_in_place_1(struct string_list *list, char *string,
+					const char *delim, int maxsplit,
+					unsigned runs)
 {
 	int count = 0;
 	char *p = string, *end;
@@ -310,13 +311,16 @@ int string_list_split_in_place(struct string_list *list, char *string,
 		die("internal error in string_list_split_in_place(): "
 		    "list->strdup_strings must not be set");
 	for (;;) {
+		if (runs)
+			p += strspn(p, delim);
+
 		count++;
 		if (maxsplit >= 0 && count > maxsplit) {
 			string_list_append(list, p);
 			return count;
 		}
-		end = strchr(p, delim);
-		if (end) {
+		end = p + strcspn(p, delim);
+		if (end && *end) {
 			*end = '\0';
 			string_list_append(list, p);
 			p = end + 1;
@@ -326,3 +330,17 @@ int string_list_split_in_place(struct string_list *list, char *string,
 		}
 	}
 }
+
+int string_list_split_in_place_multi(struct string_list *list, char *string,
+				     const char *delim, int maxsplit)
+{
+	return string_list_split_in_place_1(list, string, delim, maxsplit, 1);
+}
+
+int string_list_split_in_place(struct string_list *list, char *string,
+			       int delim, int maxsplit)
+{
+	char delim_s[2] = { delim, 0 };
+
+	return string_list_split_in_place_1(list, string, delim_s, maxsplit, 0);
+}
diff --git a/string-list.h b/string-list.h
index c7b0d5d000..f01bbb0bb6 100644
--- a/string-list.h
+++ b/string-list.h
@@ -268,7 +268,14 @@ int string_list_split(struct string_list *list, const char *string,
  * new string_list_items point into string (which therefore must not
  * be modified or freed while the string_list is in use).
  * list->strdup_strings must *not* be set.
+ *
+ * The "_multi" variant splits the given string on any character
+ * appearing in "delim", and the non-"_multi" variant splits only on the
+ * given character. The "_multi" variant behaves like `strtok()` where
+ * no element contains the delimiting byte(s).
  */
+int string_list_split_in_place_multi(struct string_list *list, char *string,
+				     const char *delim, int maxsplit);
 int string_list_split_in_place(struct string_list *list, char *string,
 			       int delim, int maxsplit);
 #endif /* STRING_LIST_H */
diff --git a/t/helper/test-string-list.c b/t/helper/test-string-list.c
index 2123dda85b..119bc9e1c9 100644
--- a/t/helper/test-string-list.c
+++ b/t/helper/test-string-list.c
@@ -73,6 +73,21 @@ int cmd__string_list(int argc, const char **argv)
 		return 0;
 	}
 
+	if (argc == 5 && !strcmp(argv[1], "split_in_place_multi")) {
+		struct string_list list = STRING_LIST_INIT_NODUP;
+		int i;
+		char *s = xstrdup(argv[2]);
+		const char *delim = argv[3];
+		int maxsplit = atoi(argv[4]);
+
+		i = string_list_split_in_place_multi(&list, s, delim, maxsplit);
+		printf("%d\n", i);
+		write_list(&list);
+		string_list_clear(&list, 0);
+		free(s);
+		return 0;
+	}
+
 	if (argc == 4 && !strcmp(argv[1], "filter")) {
 		/*
 		 * Retain only the items that have the specified prefix.
diff --git a/t/t0063-string-list.sh b/t/t0063-string-list.sh
index 46d4839194..9c5094616a 100755
--- a/t/t0063-string-list.sh
+++ b/t/t0063-string-list.sh
@@ -18,42 +18,53 @@ test_split () {
 	"
 }
 
-test_split "foo:bar:baz" ":" "-1" <<EOF
-3
-[0]: "foo"
-[1]: "bar"
-[2]: "baz"
-EOF
+test_split_in_place_multi () {
+	cat >expected &&
+	test_expect_success "split_in_place_multi $1 at $2, max $3" "
+		test-tool string-list split_in_place_multi '$1' '$2' '$3' >actual &&
+		test_cmp expected actual
+	"
+}
 
-test_split "foo:bar:baz" ":" "0" <<EOF
-1
-[0]: "foo:bar:baz"
-EOF
+for test_fn in test_split test_split_in_place_multi
+do
+	$test_fn "foo:bar:baz" ":" "-1" <<-\EOF
+	3
+	[0]: "foo"
+	[1]: "bar"
+	[2]: "baz"
+	EOF
 
-test_split "foo:bar:baz" ":" "1" <<EOF
-2
-[0]: "foo"
-[1]: "bar:baz"
-EOF
+	$test_fn "foo:bar:baz" ":" "0" <<-\EOF
+	1
+	[0]: "foo:bar:baz"
+	EOF
 
-test_split "foo:bar:baz" ":" "2" <<EOF
-3
-[0]: "foo"
-[1]: "bar"
-[2]: "baz"
-EOF
+	$test_fn "foo:bar:baz" ":" "1" <<-\EOF
+	2
+	[0]: "foo"
+	[1]: "bar:baz"
+	EOF
 
-test_split "foo:bar:" ":" "-1" <<EOF
-3
-[0]: "foo"
-[1]: "bar"
-[2]: ""
-EOF
+	$test_fn "foo:bar:baz" ":" "2" <<-\EOF
+	3
+	[0]: "foo"
+	[1]: "bar"
+	[2]: "baz"
+	EOF
 
-test_split "" ":" "-1" <<EOF
-1
-[0]: ""
-EOF
+	$test_fn "foo:bar:" ":" "-1" <<-\EOF
+	3
+	[0]: "foo"
+	[1]: "bar"
+	[2]: ""
+	EOF
+
+	$test_fn "" ":" "-1" <<-\EOF
+	1
+	[0]: ""
+	EOF
+done
 
 test_split ":" ":" "-1" <<EOF
 2
@@ -61,6 +72,38 @@ test_split ":" ":" "-1" <<EOF
 [1]: ""
 EOF
 
+test_split_in_place_multi "foo:;:bar:;:baz" ":;" "-1" <<-\EOF
+3
+[0]: "foo"
+[1]: "bar"
+[2]: "baz"
+EOF
+
+test_split_in_place_multi "foo:;:bar:;:baz" ":;" "0" <<-\EOF
+1
+[0]: "foo:;:bar:;:baz"
+EOF
+
+test_split_in_place_multi "foo:;:bar:;:baz" ":;" "1" <<-\EOF
+2
+[0]: "foo"
+[1]: "bar:;:baz"
+EOF
+
+test_split_in_place_multi "foo:;:bar:;:baz" ":;" "2" <<-\EOF
+3
+[0]: "foo"
+[1]: "bar"
+[2]: "baz"
+EOF
+
+test_split_in_place_multi "foo:;:bar:;:" ":;" "-1" <<-\EOF
+3
+[0]: "foo"
+[1]: "bar"
+[2]: ""
+EOF
+
 test_expect_success "test filter_string_list" '
 	test "x-" = "x$(test-tool string-list filter - y)" &&
 	test "x-" = "x$(test-tool string-list filter no y)" &&
-- 
2.40.0.358.g56d2318a6d




[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]

  Powered by Linux