[PATCH 1/4] Add a new function, string_list_split_in_place()

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Split a string into a string_list on a separator character.

This is similar to the strbuf_split_*() functions except that it works
with the more powerful string_list interface.  If strdup_strings is
false, it reuses the memory from the input string (thereby needing no
string memory allocations, though of course allocations are still
needed for the string_list_items array).

Signed-off-by: Michael Haggerty <mhagger@xxxxxxxxxxxx>
---

In the tests, I use here documents to specify the expected output.  Is
this OK?  (It is certainly convenient.)

 .gitignore                                  |  1 +
 Documentation/technical/api-string-list.txt | 12 ++++++
 Makefile                                    |  1 +
 string-list.c                               | 23 +++++++++++
 string-list.h                               | 19 +++++++++
 t/t0063-string-list.sh                      | 63 +++++++++++++++++++++++++++++
 test-string-list.c                          | 25 ++++++++++++
 7 files changed, 144 insertions(+)
 create mode 100755 t/t0063-string-list.sh
 create mode 100644 test-string-list.c

diff --git a/.gitignore b/.gitignore
index bb5c91e..0ca7df8 100644
--- a/.gitignore
+++ b/.gitignore
@@ -193,6 +193,7 @@
 /test-run-command
 /test-sha1
 /test-sigchain
+/test-string-list
 /test-subprocess
 /test-svn-fe
 /common-cmds.h
diff --git a/Documentation/technical/api-string-list.txt b/Documentation/technical/api-string-list.txt
index 5a0c14f..3b959a2 100644
--- a/Documentation/technical/api-string-list.txt
+++ b/Documentation/technical/api-string-list.txt
@@ -124,6 +124,18 @@ counterpart for sorted lists, which performs a binary search.
 	is set. The third parameter controls if the `util` pointer of the
 	items should be freed or not.
 
+`string_list_split_in_place`::
+
+	Split string into substrings on character delim and append the
+	substrings to a string_list.  The delimiter characters in
+	string are overwritten with NULs in the process.  If maxsplit
+	is a positive integer, then split at most maxsplit times.  If
+	list.strdup_strings is not set, then the new string_list_items
+	point into string, which therefore must not be modified or
+	freed while the string_list is in use.  Return the number of
+	substrings appended to the list.
+
+
 Data structures
 ---------------
 
diff --git a/Makefile b/Makefile
index 66e8216..ebbb381 100644
--- a/Makefile
+++ b/Makefile
@@ -501,6 +501,7 @@ TEST_PROGRAMS_NEED_X += test-run-command
 TEST_PROGRAMS_NEED_X += test-scrap-cache-tree
 TEST_PROGRAMS_NEED_X += test-sha1
 TEST_PROGRAMS_NEED_X += test-sigchain
+TEST_PROGRAMS_NEED_X += test-string-list
 TEST_PROGRAMS_NEED_X += test-subprocess
 TEST_PROGRAMS_NEED_X += test-svn-fe
 
diff --git a/string-list.c b/string-list.c
index d9810ab..110449c 100644
--- a/string-list.c
+++ b/string-list.c
@@ -194,3 +194,26 @@ void unsorted_string_list_delete_item(struct string_list *list, int i, int free_
 	list->items[i] = list->items[list->nr-1];
 	list->nr--;
 }
+
+int string_list_split_in_place(struct string_list *list, char *string,
+			       int delim, int maxsplit)
+{
+	int count = 0;
+	char *p = string, *end;
+	for (;;) {
+		count++;
+		if (maxsplit > 0 && count > maxsplit) {
+			string_list_append(list, p);
+			return count;
+		}
+		end = strchr(p, delim);
+		if (end) {
+			*end = '\0';
+			string_list_append(list, p);
+			p = end + 1;
+		} else {
+			string_list_append(list, p);
+			return count;
+		}
+	}
+}
diff --git a/string-list.h b/string-list.h
index 0684cb7..7e51d03 100644
--- a/string-list.h
+++ b/string-list.h
@@ -45,4 +45,23 @@ int unsorted_string_list_has_string(struct string_list *list, const char *string
 struct string_list_item *unsorted_string_list_lookup(struct string_list *list,
 						     const char *string);
 void unsorted_string_list_delete_item(struct string_list *list, int i, int free_util);
+
+/*
+ * Split string into substrings on character delim and append the
+ * substrings to list.  The delimiter characters in string are
+ * overwritten with NULs in the process.  If maxsplit is a positive
+ * integer, then split at most maxsplit times.  If list.strdup_strings
+ * is not set, then the new string_list_items point into string, which
+ * therefore must not be modified or freed while the string_list
+ * is in use.  Return the number of substrings appended to list.
+ *
+ * Examples:
+ *   string_list_split_in_place(l, "foo:bar:baz", ':', -1) -> ["foo", "bar", "baz"]
+ *   string_list_split_in_place(l, "foo:bar:baz", ':', 1) -> ["foo", "bar:baz"]
+ *   string_list_split_in_place(l, "foo:bar:", ':', -1) -> ["foo", "bar", ""]
+ *   string_list_split_in_place(l, "", ':', -1) -> [""]
+ *   string_list_split_in_place(l, ":", ':', -1) -> ["", ""]
+ */
+int string_list_split_in_place(struct string_list *list, char *string,
+			       int delim, int maxsplit);
 #endif /* STRING_LIST_H */
diff --git a/t/t0063-string-list.sh b/t/t0063-string-list.sh
new file mode 100755
index 0000000..0eede83
--- /dev/null
+++ b/t/t0063-string-list.sh
@@ -0,0 +1,63 @@
+#!/bin/sh
+#
+# Copyright (c) 2012 Michael Haggerty
+#
+
+test_description='Test string list functionality'
+
+. ./test-lib.sh
+
+string_list_split_in_place() {
+	cat >split-expected &&
+	test_expect_success "split $1 $2 $3" "
+		test-string-list split_in_place '$1' '$2' '$3' >split-actual &&
+		test_cmp split-expected split-actual
+	"
+}
+
+string_list_split_in_place "foo:bar:baz" ":" "-1" <<EOF
+3
+[0]: "foo"
+[1]: "bar"
+[2]: "baz"
+EOF
+
+string_list_split_in_place "foo:bar:baz" ":" "0" <<EOF
+3
+[0]: "foo"
+[1]: "bar"
+[2]: "baz"
+EOF
+
+string_list_split_in_place "foo:bar:baz" ":" "1" <<EOF
+2
+[0]: "foo"
+[1]: "bar:baz"
+EOF
+
+string_list_split_in_place "foo:bar:baz" ":" "2" <<EOF
+3
+[0]: "foo"
+[1]: "bar"
+[2]: "baz"
+EOF
+
+string_list_split_in_place "foo:bar:" ":" "-1" <<EOF
+3
+[0]: "foo"
+[1]: "bar"
+[2]: ""
+EOF
+
+string_list_split_in_place "" ":" "-1" <<EOF
+1
+[0]: ""
+EOF
+
+string_list_split_in_place ":" ":" "-1" <<EOF
+2
+[0]: ""
+[1]: ""
+EOF
+
+test_done
diff --git a/test-string-list.c b/test-string-list.c
new file mode 100644
index 0000000..f08d3cc
--- /dev/null
+++ b/test-string-list.c
@@ -0,0 +1,25 @@
+#include "cache.h"
+#include "string-list.h"
+
+int main(int argc, char **argv)
+{
+	if ((argc == 4 || argc == 5) && !strcmp(argv[1], "split_in_place")) {
+		struct string_list list = STRING_LIST_INIT_NODUP;
+		int i;
+		char *s = xstrdup(argv[2]);
+		int delim = *argv[3];
+		int maxsplit = (argc == 5) ? atoi(argv[4]) : -1;
+
+		i = string_list_split_in_place(&list, s, delim, maxsplit);
+		printf("%d\n", i);
+		for (i = 0; i < list.nr; i++)
+			printf("[%d]: \"%s\"\n", i, list.items[i].string);
+		string_list_clear(&list, 0);
+		free(s);
+		return 0;
+	}
+
+	fprintf(stderr, "%s: unknown function name: %s\n", argv[0],
+		argv[1] ? argv[1] : "(there was none)");
+	return 1;
+}
-- 
1.7.11.3

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]