From: Diomidis Spinellis <dds@xxxxxxx> The 2013 commit 29de20504e9 fixed t0070-fundamental.sh under Darwin (macOS) by adopting Git's regex library. However, this library is compiled with NO_MBSUPPORT, which causes git-grep to work incorrectly on multibyte (e.g. UTF-8) files. Current macOS versions pass t0070-fundamental.sh with the native macOS regex library, which also supports multibyte characters. Adjust the Makefile to use the native regex library, and call setlocale(3) to set CTYPE according to the user's preference. The setlocale(3) call is required on all platforms, but in platforms supporting gettext(3), setlocale(3) is called as a side-effect of initializing gettext(3). To avoid running the new tests on platforms still using the compatibility library, which is compiled without multibyte support, store the corresponding NO_REGEX setting in the GIT-BUILD-OPTIONS file. This makes it available to the test scripts. In addition, adjust the test-tool regex command to work with multibyte regexes to further test a platform's support for them. Signed-off-by: Diomidis Spinellis <dds@xxxxxxx> --- grep: fix multibyte regex handling under macOS The 2013 commit 29de20504e9 fixed t0070-fundamental.sh under Darwin (macOS) by adopting Git's regex library. However, this library is compiled with NO_MBSUPPORT, which causes git-grep to work incorrectly on multibyte (e.g. UTF-8) files. Current macOS versions pass t0070-fundamental.sh with the native macOS regex library, which also supports multibyte characters. Adjust the Makefile to use the native regex library, and call setlocale(3) to set CTYPE according to the user's preference. The setlocale(3) call is required on all platforms, but in platforms supporting gettext(3), setlocale(3) is called as a side-effect of initializing gettext(3). To avoid running the new tests on platforms still using the compatibility library, which is compiled without multibyte support, store the corresponding NO_REGEX setting in the GIT-BUILD-OPTIONS file. This makes it available to the test scripts. In addition, adjust the test-tool regex command to work with multibyte regexes to further test a platform's support for them. Signed-off-by: Diomidis Spinellis dds@xxxxxxx Published-As: https://github.com/gitgitgadget/git/releases/tag/pr-git-1313%2Fdspinellis%2Ffix-macos-mb-grep-v1 Fetch-It-Via: git fetch https://github.com/gitgitgadget/git pr-git-1313/dspinellis/fix-macos-mb-grep-v1 Pull-Request: https://github.com/git/git/pull/1313 Makefile | 2 +- grep.c | 6 +++++- t/helper/test-regex.c | 2 ++ t/t7818-grep-multibyte.sh | 34 ++++++++++++++++++++++++++++++++++ 4 files changed, 42 insertions(+), 2 deletions(-) create mode 100755 t/t7818-grep-multibyte.sh diff --git a/Makefile b/Makefile index 04d0fd1fe60..d1a98257150 100644 --- a/Makefile +++ b/Makefile @@ -1427,7 +1427,6 @@ ifeq ($(uname_S),Darwin) APPLE_COMMON_CRYPTO = YesPlease COMPAT_CFLAGS += -DAPPLE_COMMON_CRYPTO endif - NO_REGEX = YesPlease PTHREAD_LIBS = endif @@ -2970,6 +2969,7 @@ GIT-BUILD-OPTIONS: FORCE @echo NO_PERL=\''$(subst ','\'',$(subst ','\'',$(NO_PERL)))'\' >>$@+ @echo NO_PTHREADS=\''$(subst ','\'',$(subst ','\'',$(NO_PTHREADS)))'\' >>$@+ @echo NO_PYTHON=\''$(subst ','\'',$(subst ','\'',$(NO_PYTHON)))'\' >>$@+ + @echo NO_REGEX=\''$(subst ','\'',$(subst ','\'',$(NO_REGEX)))'\' >>$@+ @echo NO_UNIX_SOCKETS=\''$(subst ','\'',$(subst ','\'',$(NO_UNIX_SOCKETS)))'\' >>$@+ @echo PAGER_ENV=\''$(subst ','\'',$(subst ','\'',$(PAGER_ENV)))'\' >>$@+ @echo DC_SHA1=\''$(subst ','\'',$(subst ','\'',$(DC_SHA1)))'\' >>$@+ diff --git a/grep.c b/grep.c index 82eb7da1022..c31657c3da3 100644 --- a/grep.c +++ b/grep.c @@ -10,6 +10,8 @@ #include "quote.h" #include "help.h" +#include <locale.h> + static int grep_source_load(struct grep_source *gs); static int grep_source_is_binary(struct grep_source *gs, struct index_state *istate); @@ -707,8 +709,10 @@ static struct grep_expr *grep_splice_or(struct grep_expr *x, struct grep_expr *y void compile_grep_patterns(struct grep_opt *opt) { struct grep_pat *p; - struct grep_expr *header_expr = prep_header_patterns(opt); + struct grep_expr *header_expr; + setlocale(LC_CTYPE, ""); + header_expr = prep_header_patterns(opt); for (p = opt->pattern_list; p; p = p->next) { switch (p->token) { case GREP_PATTERN: /* atom */ diff --git a/t/helper/test-regex.c b/t/helper/test-regex.c index d6f28ca8d14..ae4d7854abd 100644 --- a/t/helper/test-regex.c +++ b/t/helper/test-regex.c @@ -1,5 +1,6 @@ #include "test-tool.h" #include "gettext.h" +#include <locale.h> struct reg_flag { const char *name; @@ -85,6 +86,7 @@ int cmd__regex(int argc, const char **argv) } git_setup_gettext(); + setlocale(LC_CTYPE, ""); ret = regcomp(&r, pat, flags); if (ret) { if (silent) diff --git a/t/t7818-grep-multibyte.sh b/t/t7818-grep-multibyte.sh new file mode 100755 index 00000000000..72129880fac --- /dev/null +++ b/t/t7818-grep-multibyte.sh @@ -0,0 +1,34 @@ +#!/bin/sh + +test_description='grep multibyte characters' + +. ./test-lib.sh + +# Multibyte regex search is only supported with a native regex library +# that supports them. +# (The included library is compiled with NO_MBSUPPORT) and only if it +test -z "$NO_REGEX" && + LC_ALL=en_US.UTF-8 test-tool regex '^.$' '¿' && + test_set_prereq MB_REGEX + +if ! test_have_prereq MB_REGEX +then + skip_all='multibyte grep tests; Git compiled with NO_REGEX, NO_MBSUPPORT' + test_done +fi + +test_expect_success 'setup' ' + test_write_lines "¿" >file && + git add file && + LC_ALL="en_US.UTF-8" && + export LC_ALL +' +test_expect_success 'grep exactly one char in single-char multibyte file' ' + git grep "^.$" +' + +test_expect_code 1 'grep two chars in single-char multibyte file' ' + git grep ".." +' + +test_done base-commit: ad60dddad72dfb8367bd695028b5b8dc6c33661b -- gitgitgadget