Re: [PATCH v3 08/11] strbuf: introduce strbuf_strip_file_from_path()

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Tue, Dec 06 2022, Ævar Arnfjörð Bjarmason wrote:

> On Mon, Dec 05 2022, Derrick Stolee via GitGitGadget wrote:
>
>> From: Derrick Stolee <derrickstolee@xxxxxxxxxx>
>>
>> The strbuf_parent_directory() method was added as a static method in
>> contrib/scalar by d0feac4e8c0 (scalar: 'register' sets recommended
>> config and starts maintenance, 2021-12-03) and then removed in
>> 65f6a9eb0b9 (scalar: constrain enlistment search, 2022-08-18), but now
>> there is a need for a similar method in the bundle URI feature.
>>
>> Re-add the method, this time in strbuf.c, but with a new name:
>> strbuf_strip_file_from_path(). The method requirements are slightly
>> modified to allow a trailing slash, in which case nothing is done, which
>> makes the name change valuable.
>>
>> Signed-off-by: Derrick Stolee <derrickstolee@xxxxxxxxxx>
>> ---
>>  strbuf.c |  6 ++++++
>>  strbuf.h | 11 +++++++++++
>>  2 files changed, 17 insertions(+)
>>
>> diff --git a/strbuf.c b/strbuf.c
>> index 0890b1405c5..c383f41a3c5 100644
>> --- a/strbuf.c
>> +++ b/strbuf.c
>> @@ -1200,3 +1200,9 @@ int strbuf_edit_interactively(struct strbuf *buffer, const char *path,
>>  	free(path2);
>>  	return res;
>>  }
>> +
>> +void strbuf_strip_file_from_path(struct strbuf *sb)
>> +{
>> +	char *path_sep = find_last_dir_sep(sb->buf);
>> +	strbuf_setlen(sb, path_sep ? path_sep - sb->buf + 1 : 0);
>> +}
>> diff --git a/strbuf.h b/strbuf.h
>> index 76965a17d44..f6dbb9681ee 100644
>> --- a/strbuf.h
>> +++ b/strbuf.h
>> @@ -664,6 +664,17 @@ int launch_sequence_editor(const char *path, struct strbuf *buffer,
>>  int strbuf_edit_interactively(struct strbuf *buffer, const char *path,
>>  			      const char *const *env);
>>  
>> +/*
>> + * Remove the filename from the provided path string. If the path
>> + * contains a trailing separator, then the path is considered a directory
>> + * and nothing is modified.
>> + *
>> + * Examples:
>> + * - "/path/to/file" -> "/path/to/"
>> + * - "/path/to/dir/" -> "/path/to/dir/"
>> + */
>> +void strbuf_strip_file_from_path(struct strbuf *sb);
>> +
>>  void strbuf_add_lines(struct strbuf *sb,
>>  		      const char *prefix,
>>  		      const char *buf,
>
> Re your reply in
> https://lore.kernel.org/git/0980dcd4-30eb-4ef4-9369-279abe5ca5a2@xxxxxxxxxx/
> I still don't get how this is different from a 1-byte change to
> strbuf_trim_trailing_dir_sep(), and if it isn't I think it's confusing
> API design to have two very different ways to return the same data.
>
> There you said "The difference is all about whether or not we start with
> a slash _and_ no other slash appears in the path.".
>
> But I can't find a case where there's any difference between the two. I
> tried this ad-hoc test on top:
> 	
> 	diff --git a/help.c b/help.c
> 	index f1e090a4428..b0866b01439 100644
> 	--- a/help.c
> 	+++ b/help.c
> 	@@ -765,6 +765,16 @@ int cmd_version(int argc, const char **argv, const char *prefix)
> 	 			 "also print build options"),
> 	 		OPT_END()
> 	 	};
> 	+	struct strbuf sb1 = STRBUF_INIT;
> 	+	struct strbuf sb2 = STRBUF_INIT;
> 	+
> 	+	if (getenv("STR")) {
> 	+		strbuf_addstr(&sb1, getenv("STR"));
> 	+		strbuf_addstr(&sb2, getenv("STR"));
> 	+		strbuf_strip_file_from_path(&sb1);
> 	+		strbuf_trim_trailing_not_dir_sep(&sb2);
> 	+		fprintf(stderr, "%s: %s | %s\n", strcmp(sb1.buf, sb2.buf) ? "NEQ" : "EQ", sb1.buf, sb2.buf);
> 	+	}
> 	 
> 	 	argc = parse_options(argc, argv, prefix, options, usage, 0);
> 	 
> 	diff --git a/strbuf.c b/strbuf.c
> 	index c383f41a3c5..f75d94556fc 100644
> 	--- a/strbuf.c
> 	+++ b/strbuf.c
> 	@@ -114,13 +114,23 @@ void strbuf_rtrim(struct strbuf *sb)
> 	 	sb->buf[sb->len] = '\0';
> 	 }
> 	 
> 	-void strbuf_trim_trailing_dir_sep(struct strbuf *sb)
> 	+static void strbuf_trim_trailing_dir_sep_1(struct strbuf *sb, int flip)
> 	 {
> 	-	while (sb->len > 0 && is_dir_sep((unsigned char)sb->buf[sb->len - 1]))
> 	+	while (sb->len > 0 && is_dir_sep((unsigned char)sb->buf[sb->len - 1]) - flip)
> 	 		sb->len--;
> 	 	sb->buf[sb->len] = '\0';
> 	 }
> 	 
> 	+void strbuf_trim_trailing_dir_sep(struct strbuf *sb)
> 	+{
> 	+	strbuf_trim_trailing_dir_sep_1(sb, 1);
> 	+}
> 	+
> 	+void strbuf_trim_trailing_not_dir_sep(struct strbuf *sb)
> 	+{
> 	+	strbuf_trim_trailing_dir_sep_1(sb, 1);
> 	+}
> 	+
> 	 void strbuf_trim_trailing_newline(struct strbuf *sb)
> 	 {
> 	 	if (sb->len > 0 && sb->buf[sb->len - 1] == '\n') {
> 	diff --git a/strbuf.h b/strbuf.h
> 	index f6dbb9681ee..b936f45ffad 100644
> 	--- a/strbuf.h
> 	+++ b/strbuf.h
> 	@@ -189,6 +189,8 @@ void strbuf_ltrim(struct strbuf *sb);
> 	 
> 	 /* Strip trailing directory separators */
> 	 void strbuf_trim_trailing_dir_sep(struct strbuf *sb);
> 	+/* Strip trailing not-directory separators */
> 	+void strbuf_trim_trailing_not_dir_sep(struct strbuf *sb);
> 	 
> 	 /* Strip trailing LF or CR/LF */
> 	 void strbuf_trim_trailing_newline(struct strbuf *sb);
>
> Then:
> 	
> 	$ for str in a / b/ /c /d/ /e/ /f/g /h/i/ j/k l//m n/o/p //q/r/s/t; do STR=$str ./git version; done 2>&1 | grep :
> 	EQ:  | 
> 	EQ: / | /
> 	EQ: b/ | b/
> 	EQ: / | /
> 	EQ: /d/ | /d/
> 	EQ: /e/ | /e/
> 	EQ: /f/ | /f/
> 	EQ: /h/i/ | /h/i/
> 	EQ: j/ | j/
> 	EQ: l// | l//
> 	EQ: n/o/ | n/o/
> 	EQ: //q/r/s/ | //q/r/s/
>
> I.e. for those inputs it's the same as the existing
> strbuf_trim_trailing_dir_sep() with an inverted test. Is there some edge
> case that I'm missing?

FWIW the "overkill" change on top to do this via callbacks is the
below. Which I tested just to see how easy it was, and whether it would
fail your tests (it doesn't).

-- >8 --
Subject: [PATCH] strbuf: generalize "{,r,l}trim" to a callback interface

We've had all three variants of "trim" for isspace(), then since
c64a8d200f4 (worktree move: accept destination as directory,
2018-02-12) we've had a "is_dir_sep" variant.

A preceding change then added a "!is_dir_sep" variant. Let's
generalize this, and have all these functions that want to trim
characters matching some criteria be driven by the same logic.

Signed-off-by: Ævar Arnfjörð Bjarmason <avarab@xxxxxxxxx>
---
 bundle-uri.c      |  7 +------
 git-compat-util.h |  5 +++++
 strbuf.c          | 44 ++++++++++++++++++++++++++++----------------
 strbuf.h          | 41 +++++++++++++++++++++++++++--------------
 4 files changed, 61 insertions(+), 36 deletions(-)

diff --git a/bundle-uri.c b/bundle-uri.c
index c411b871bdd..7240dedcaee 100644
--- a/bundle-uri.c
+++ b/bundle-uri.c
@@ -195,13 +195,8 @@ int bundle_uri_parse_config_format(const char *uri,
 	if (!list->baseURI) {
 		struct strbuf baseURI = STRBUF_INIT;
 		strbuf_addstr(&baseURI, uri);
+		strbuf_trim_trailing_not_dir_sep(&baseURI);
 
-		/*
-		 * If the URI does not end with a trailing slash, then
-		 * remove the filename portion of the path. This is
-		 * important for relative URIs.
-		 */
-		strbuf_strip_file_from_path(&baseURI);
 		list->baseURI = strbuf_detach(&baseURI, NULL);
 	}
 	result = git_config_from_file_with_options(config_to_bundle_list,
diff --git a/git-compat-util.h b/git-compat-util.h
index a76d0526f79..5bce9fa768c 100644
--- a/git-compat-util.h
+++ b/git-compat-util.h
@@ -451,6 +451,11 @@ static inline int git_is_dir_sep(int c)
 #define is_dir_sep git_is_dir_sep
 #endif
 
+static inline int is_not_dir_sep(int c)
+{
+	return !is_dir_sep(c);
+}
+
 #ifndef offset_1st_component
 static inline int git_offset_1st_component(const char *path)
 {
diff --git a/strbuf.c b/strbuf.c
index c383f41a3c5..a5a1c01d539 100644
--- a/strbuf.c
+++ b/strbuf.c
@@ -101,24 +101,37 @@ void strbuf_grow(struct strbuf *sb, size_t extra)
 		sb->buf[0] = '\0';
 }
 
-void strbuf_trim(struct strbuf *sb)
+void strbuf_trim_fn(struct strbuf *sb, strbuf_ctype_fn_t fn)
 {
-	strbuf_rtrim(sb);
-	strbuf_ltrim(sb);
+	strbuf_rtrim_fn(sb, fn);
+	strbuf_ltrim_fn(sb, fn);
 }
 
-void strbuf_rtrim(struct strbuf *sb)
+void strbuf_rtrim_fn(struct strbuf *sb, strbuf_ctype_fn_t fn)
 {
-	while (sb->len > 0 && isspace((unsigned char)sb->buf[sb->len - 1]))
+	while (sb->len > 0 && fn((unsigned char)sb->buf[sb->len - 1]))
 		sb->len--;
 	sb->buf[sb->len] = '\0';
 }
 
+void strbuf_trim(struct strbuf *sb)
+{
+	strbuf_trim_fn(sb, strbuf_ctype_isspace);
+}
+
+void strbuf_rtrim(struct strbuf *sb)
+{
+	strbuf_rtrim_fn(sb, strbuf_ctype_isspace);
+}
+
 void strbuf_trim_trailing_dir_sep(struct strbuf *sb)
 {
-	while (sb->len > 0 && is_dir_sep((unsigned char)sb->buf[sb->len - 1]))
-		sb->len--;
-	sb->buf[sb->len] = '\0';
+	strbuf_rtrim_fn(sb, is_dir_sep);
+}
+
+void strbuf_trim_trailing_not_dir_sep(struct strbuf *sb)
+{
+	strbuf_rtrim_fn(sb, is_not_dir_sep);
 }
 
 void strbuf_trim_trailing_newline(struct strbuf *sb)
@@ -130,10 +143,10 @@ void strbuf_trim_trailing_newline(struct strbuf *sb)
 	}
 }
 
-void strbuf_ltrim(struct strbuf *sb)
+void strbuf_ltrim_fn(struct strbuf *sb, strbuf_ctype_fn_t fn)
 {
 	char *b = sb->buf;
-	while (sb->len > 0 && isspace(*b)) {
+	while (sb->len > 0 && fn(*b)) {
 		b++;
 		sb->len--;
 	}
@@ -141,6 +154,11 @@ void strbuf_ltrim(struct strbuf *sb)
 	sb->buf[sb->len] = '\0';
 }
 
+void strbuf_ltrim(struct strbuf *sb)
+{
+	strbuf_ltrim_fn(sb, strbuf_ctype_isspace);
+}
+
 int strbuf_reencode(struct strbuf *sb, const char *from, const char *to)
 {
 	char *out;
@@ -1200,9 +1218,3 @@ int strbuf_edit_interactively(struct strbuf *buffer, const char *path,
 	free(path2);
 	return res;
 }
-
-void strbuf_strip_file_from_path(struct strbuf *sb)
-{
-	char *path_sep = find_last_dir_sep(sb->buf);
-	strbuf_setlen(sb, path_sep ? path_sep - sb->buf + 1 : 0);
-}
diff --git a/strbuf.h b/strbuf.h
index f6dbb9681ee..bb7aa38816f 100644
--- a/strbuf.h
+++ b/strbuf.h
@@ -180,15 +180,39 @@ static inline void strbuf_setlen(struct strbuf *sb, size_t len)
  */
 
 /**
- * Strip whitespace from the beginning (`ltrim`), end (`rtrim`), or both side
- * (`trim`) of a string.
+ * A callback function that acts like the macros defined in
+ * <ctype.h>. To be given to strbuf_{,r,l}trim() below.
+ */
+typedef int (*strbuf_ctype_fn_t)(int c);
+static inline int strbuf_ctype_isspace(int c) { return isspace(c); }
+
+/**
+ * Strip characters matching the 'strbuf_ctype_fn_t' from the
+ * beginning (`ltrim`), end (`rtrim`) or both sides (`trim`) of a
+ * string.
+ */
+void strbuf_trim_fn(struct strbuf *sb, strbuf_ctype_fn_t fn);
+void strbuf_rtrim_fn(struct strbuf *sb, strbuf_ctype_fn_t fn);
+void strbuf_ltrim_fn(struct strbuf *sb, strbuf_ctype_fn_t fn);
+
+/**
+ * The common-case wrapper for strbuf_{,r,l}trim_fn() uses the
+ * strbuf_ctype_isspace() callback function.
  */
 void strbuf_trim(struct strbuf *sb);
 void strbuf_rtrim(struct strbuf *sb);
 void strbuf_ltrim(struct strbuf *sb);
 
-/* Strip trailing directory separators */
+/**
+ * Strip trailing directory separators. This is strbuf_rtrim_fn() with
+ * is_dir_sep() as the callback..
+ */
 void strbuf_trim_trailing_dir_sep(struct strbuf *sb);
+/**
+ * Strip trailing not-directory separators. This is strbuf_rtrim_fn()
+ * with is_not_dir_sep() as the callback.
+ */
+void strbuf_trim_trailing_not_dir_sep(struct strbuf *sb);
 
 /* Strip trailing LF or CR/LF */
 void strbuf_trim_trailing_newline(struct strbuf *sb);
@@ -664,17 +688,6 @@ int launch_sequence_editor(const char *path, struct strbuf *buffer,
 int strbuf_edit_interactively(struct strbuf *buffer, const char *path,
 			      const char *const *env);
 
-/*
- * Remove the filename from the provided path string. If the path
- * contains a trailing separator, then the path is considered a directory
- * and nothing is modified.
- *
- * Examples:
- * - "/path/to/file" -> "/path/to/"
- * - "/path/to/dir/" -> "/path/to/dir/"
- */
-void strbuf_strip_file_from_path(struct strbuf *sb);
-
 void strbuf_add_lines(struct strbuf *sb,
 		      const char *prefix,
 		      const char *buf,
-- 
2.39.0.rc1.1014.gc37e9814e18





[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]

  Powered by Linux