[PATCH v3 2/2] diff.c: More changes and tests around utf8_strwidth()

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



From: Torsten Bögershausen <tboegi@xxxxxx>

When unicode filenames (encoded in UTF-8) are used, the visible width
on the screen is not the same as strlen(filename).

For example, `git log --stat` may produce an output like this:

[snip the header]

 Arger.txt  | 1 +
 Ärger.txt | 1 +
 2 files changed, 2 insertions(+)

The last commit uses utf8_strwidth() instead of strlen() in diff.c
and it is time to test the change.

And now we detect another problem that is fixed here: code like this
strbuf_addf(&out, "%-*s", len, name);
(or using the underlying snprintf() function) does not align the
buffer to a minimum of len measured in screen-width, but uses the
memory count.

One could be tempted to wish that snprintf() was UTF-8 aware.
That doesn't seem to be the case anywhere (tested on Linux and Mac),
probably snprintf() uses the "bytes in memory"/strlen() approach to be
compatible with older versions and this will never change.

The chosen solution is to split code in diff.c like this
strbuf_addf(&out, "%-*s", len, name);

into something like this:
size_t num_padding_spaces = 0;
// [snip]
if (len > utf8_strwidth(name))
    num_padding_spaces = len - utf8_strwidth(name);
strbuf_addf(&out, "%s", name);
if (num_padding_spaces)
    strbuf_addchars(&out, ' ', num_padding_spaces);

I couldn't convince myself to write a wrapper here that is
"easy to read and understandable" and would fit nicely into the chain of
strbuf_addX() calls used in diff.c

Tests:
Two things need to be tested:
 - The calculation of the maximum width
 - The calculation of num_padding_spaces

The name "textfile" is changed into "tëxtfilë", both have a width of 8.
If strlen() was used, to get the maximum width, the shorter "binfile" would
have been mis-aligned:
 binfile    |  [snip]
 tëxtfilë | [snip]

If only "binfile" would be renamed into "binfilë":
 binfilë |  [snip]
 textfile | [snip]

In order to verify that the width is calculated correctly everywhere,
"binfile" is renamed into "binfilë", giving 1 bytes more in strlen()
"tëxtfile" is renamed into "tëxtfilë", 2 byte more in strlen().

The updated t4012-diff-binary.sh checks the correct aligment:
 binfilë  | [snip]
 tëxtfilë | [snip]

Signed-off-by: Torsten Bögershausen <tboegi@xxxxxx>
---
 diff.c                 | 33 +++++++++++++++++++++------------
 t/t4012-diff-binary.sh | 14 +++++++-------
 2 files changed, 28 insertions(+), 19 deletions(-)

diff --git a/diff.c b/diff.c
index b5df464de5..cf38e1dc88 100644
--- a/diff.c
+++ b/diff.c
@@ -2591,7 +2591,7 @@ void print_stat_summary(FILE *fp, int files,
 static void show_stats(struct diffstat_t *data, struct diff_options *options)
 {
 	int i, len, add, del, adds = 0, dels = 0;
-	uintmax_t max_change = 0, max_len = 0;
+	uintmax_t max_change = 0, max_width = 0;
 	int total_files = data->nr, count;
 	int width, name_width, graph_width, number_width = 0, bin_width = 0;
 	const char *reset, *add_c, *del_c;
@@ -2621,8 +2621,8 @@ static void show_stats(struct diffstat_t *data, struct diff_options *options)
 		}
 		fill_print_name(file);
 		len = utf8_strwidth(file->print_name);
-		if (max_len < len)
-			max_len = len;
+		if (max_width < len)
+			max_width = len;

 		if (file->is_unmerged) {
 			/* "Unmerged" is 8 characters */
@@ -2646,7 +2646,7 @@ static void show_stats(struct diffstat_t *data, struct diff_options *options)

 	/*
 	 * We have width = stat_width or term_columns() columns total.
-	 * We want a maximum of min(max_len, stat_name_width) for the name part.
+	 * We want a maximum of min(max_width, stat_name_width) for the name part.
 	 * We want a maximum of min(max_change, stat_graph_width) for the +- part.
 	 * We also need 1 for " " and 4 + decimal_width(max_change)
 	 * for " | NNNN " and one the empty column at the end, altogether
@@ -2701,8 +2701,8 @@ static void show_stats(struct diffstat_t *data, struct diff_options *options)
 		graph_width = options->stat_graph_width;

 	name_width = (options->stat_name_width > 0 &&
-		      options->stat_name_width < max_len) ?
-		options->stat_name_width : max_len;
+		      options->stat_name_width < max_width) ?
+		options->stat_name_width : max_width;

 	/*
 	 * Adjust adjustable widths not to exceed maximum width
@@ -2734,6 +2734,7 @@ static void show_stats(struct diffstat_t *data, struct diff_options *options)
 		char *name = file->print_name;
 		uintmax_t added = file->added;
 		uintmax_t deleted = file->deleted;
+		size_t num_padding_spaces = 0;
 		int name_len;

 		if (!file->is_interesting && (added + deleted == 0))
@@ -2753,10 +2754,14 @@ static void show_stats(struct diffstat_t *data, struct diff_options *options)
 			if (slash)
 				name = slash;
 		}
+		if (len > utf8_strwidth(name))
+			num_padding_spaces = len - utf8_strwidth(name);

 		if (file->is_binary) {
-			strbuf_addf(&out, " %s%-*s |", prefix, len, name);
-			strbuf_addf(&out, " %*s", number_width, "Bin");
+			strbuf_addf(&out, " %s%s ", prefix,  name);
+			if (num_padding_spaces)
+				strbuf_addchars(&out, ' ', num_padding_spaces);
+			strbuf_addf(&out, "| %*s", number_width, "Bin");
 			if (!added && !deleted) {
 				strbuf_addch(&out, '\n');
 				emit_diff_symbol(options, DIFF_SYMBOL_STATS_LINE,
@@ -2776,8 +2781,10 @@ static void show_stats(struct diffstat_t *data, struct diff_options *options)
 			continue;
 		}
 		else if (file->is_unmerged) {
-			strbuf_addf(&out, " %s%-*s |", prefix, len, name);
-			strbuf_addstr(&out, " Unmerged\n");
+			strbuf_addf(&out, " %s%s ", prefix,  name);
+			if (num_padding_spaces)
+				strbuf_addchars(&out, ' ', num_padding_spaces);
+			strbuf_addstr(&out, "| Unmerged\n");
 			emit_diff_symbol(options, DIFF_SYMBOL_STATS_LINE,
 					 out.buf, out.len, 0);
 			strbuf_reset(&out);
@@ -2803,8 +2810,10 @@ static void show_stats(struct diffstat_t *data, struct diff_options *options)
 				add = total - del;
 			}
 		}
-		strbuf_addf(&out, " %s%-*s |", prefix, len, name);
-		strbuf_addf(&out, " %*"PRIuMAX"%s",
+		strbuf_addf(&out, " %s%s ", prefix,  name);
+		if (num_padding_spaces)
+			strbuf_addchars(&out, ' ', num_padding_spaces);
+		strbuf_addf(&out, "| %*"PRIuMAX"%s",
 			number_width, added + deleted,
 			added + deleted ? " " : "");
 		show_graph(&out, '+', add, add_c, reset);
diff --git a/t/t4012-diff-binary.sh b/t/t4012-diff-binary.sh
index c509143c81..c64d9d2f40 100755
--- a/t/t4012-diff-binary.sh
+++ b/t/t4012-diff-binary.sh
@@ -113,20 +113,20 @@ test_expect_success 'diff --no-index with binary creation' '
 '

 cat >expect <<EOF
- binfile  |   Bin 0 -> 1026 bytes
- textfile | 10000 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+ binfilë  |   Bin 0 -> 1026 bytes
+ tëxtfilë | 10000 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 EOF

 test_expect_success 'diff --stat with binary files and big change count' '
-	printf "\01\00%1024d" 1 >binfile &&
-	git add binfile &&
+	printf "\01\00%1024d" 1 >binfilë &&
+	git add binfilë &&
 	i=0 &&
 	while test $i -lt 10000; do
 		echo $i &&
 		i=$(($i + 1)) || return 1
-	done >textfile &&
-	git add textfile &&
-	git diff --cached --stat binfile textfile >output &&
+	done >tëxtfilë &&
+	git add tëxtfilë &&
+	git -c core.quotepath=false diff --cached --stat binfilë tëxtfilë >output &&
 	grep " | " output >actual &&
 	test_cmp expect actual
 '
--
2.34.0





[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]

  Powered by Linux