Optimize check_refname_component using SSE4.2, where available. git rev-parse HEAD is a good test-case for this, since it does almost nothing except parse refs. For one particular repo with about 60k refs, almost all packed, the timings are: Look up table: 29 ms SSE4.2: 25 ms This is about a 15% improvement. The configure.ac changes include code from the GNU C Library written by Joseph S. Myers <joseph at codesourcery dot com>. Signed-off-by: David Turner <dturner@xxxxxxxxxxx> --- Makefile | 6 +++ aclocal.m4 | 6 +++ configure.ac | 17 ++++++++ git-compat-util.h | 22 ++++++++++ refs.c | 117 ++++++++++++++++++++++++++++++++++++++++++++++------- t/t5511-refspec.sh | 13 ++++++ 6 files changed, 166 insertions(+), 15 deletions(-) diff --git a/Makefile b/Makefile index a53f3a8..dd2127a 100644 --- a/Makefile +++ b/Makefile @@ -1326,6 +1326,11 @@ else COMPAT_OBJS += compat/win32mmap.o endif endif +ifdef NO_SSE42 + BASIC_CFLAGS += -DNO_SSE42 +else + BASIC_CFLAGS += -msse4.2 +endif ifdef OBJECT_CREATION_USES_RENAMES COMPAT_CFLAGS += -DOBJECT_CREATION_MODE=1 endif @@ -2199,6 +2204,7 @@ GIT-BUILD-OPTIONS: FORCE @echo NO_PERL=\''$(subst ','\'',$(subst ','\'',$(NO_PERL)))'\' >>$@ @echo NO_PYTHON=\''$(subst ','\'',$(subst ','\'',$(NO_PYTHON)))'\' >>$@ @echo NO_UNIX_SOCKETS=\''$(subst ','\'',$(subst ','\'',$(NO_UNIX_SOCKETS)))'\' >>$@ + @echo NO_SSE42=\''$(subst ','\'',$(subst ','\'',$(NO_SSE42)))'\' >>$@ ifdef TEST_OUTPUT_DIRECTORY @echo TEST_OUTPUT_DIRECTORY=\''$(subst ','\'',$(subst ','\'',$(TEST_OUTPUT_DIRECTORY)))'\' >>$@ endif diff --git a/aclocal.m4 b/aclocal.m4 index f11bc7e..d9f3f19 100644 --- a/aclocal.m4 +++ b/aclocal.m4 @@ -38,3 +38,9 @@ AC_DEFUN([TYPE_SOCKLEN_T], [#include <sys/types.h> #include <sys/socket.h>]) ]) + +dnl Test a compiler option or options with an empty input file. +dnl LIBC_TRY_CC_OPTION([options], [action-if-true], [action-if-false]) +AC_DEFUN([LIBC_TRY_CC_OPTION], +[AS_IF([AC_TRY_COMMAND([${CC-cc} $1 -xc /dev/null -S -o /dev/null])], + [$2], [$3])]) diff --git a/configure.ac b/configure.ac index b711254..3a5bda9 100644 --- a/configure.ac +++ b/configure.ac @@ -382,6 +382,23 @@ AS_HELP_STRING([],[Tcl/Tk interpreter will be found in a system.]), GIT_PARSE_WITH(tcltk)) # +# Declare the with-sse42/without-sse42 options. +AC_ARG_WITH(sse42, +AS_HELP_STRING([--with-sse42],[use SSE4.2 instructions]) +AS_HELP_STRING([],[(default is YES if your compiler supports -msse4.2)]), +GIT_PARSE_WITH(sse42)) + +if test "$NO_SSE42" != "YesPlease"; then + dnl Check if -msse4.2 works. + AC_CACHE_CHECK(for SSE4.2 support, cc_cv_sse42, [dnl + LIBC_TRY_CC_OPTION([-msse4.2], [cc_cv_sse42=yes], [cc_cv_sse42=no]) + ]) + if test $cc_cv_sse42 = no; then + NO_SSE42=1 + fi +fi + +GIT_CONF_SUBST([NO_SSE42]) ## Checks for programs. AC_MSG_NOTICE([CHECKS for programs]) diff --git a/git-compat-util.h b/git-compat-util.h index f6d3a46..218d510 100644 --- a/git-compat-util.h +++ b/git-compat-util.h @@ -668,6 +668,28 @@ void git_qsort(void *base, size_t nmemb, size_t size, #endif #endif +#ifndef NO_SSE42 +#include <nmmintrin.h> +/* + * Clang ships with a version of nmmintrin.h that's incomplete; if + * necessary, we define the constants that we're going to use. + */ +#ifndef _SIDD_UBYTE_OPS +#define _SIDD_UBYTE_OPS 0x00 +#define _SIDD_CMP_EQUAL_ANY 0x00 +#define _SIDD_CMP_RANGES 0x04 +#define _SIDD_CMP_EQUAL_ORDERED 0x0c +#define _SIDD_NEGATIVE_POLARITY 0x10 +#endif + +/* This is the system memory page size; it's used so that we can read + * outside the bounds of an allocation without segfaulting. + */ +#ifndef PAGE_SIZE +#define PAGE_SIZE 4096 +#endif +#endif + #ifdef UNRELIABLE_FSTAT #define fstat_is_reliable() 0 #else diff --git a/refs.c b/refs.c index 46139d2..2fe0075 100644 --- a/refs.c +++ b/refs.c @@ -24,6 +24,25 @@ static unsigned char refname_disposition[256] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 4, 4 }; +static int check_refname_component_trailer(const char *cp, const char *refname, int flags) +{ + if (cp == refname) + return 0; /* Component has zero length. */ + if (refname[0] == '.') { + if (!(flags & REFNAME_DOT_COMPONENT)) + return -1; /* Component starts with '.'. */ + /* + * Even if leading dots are allowed, don't allow "." + * as a component (".." is prevented by a rule above). + */ + if (refname[1] == '\0') + return -1; /* Component equals ".". */ + } + if (cp - refname >= 5 && !memcmp(cp - 5, ".lock", 5)) + return -1; /* Refname ends with ".lock". */ + return cp - refname; +} + /* * Try to read one refname component from the front of refname. * Return the length of the component found, or -1 if the component is @@ -37,7 +56,7 @@ static unsigned char refname_disposition[256] = { * - it ends with ".lock" * - it contains a "\" (backslash) */ -static int check_refname_component(const char *refname, int flags) +static int check_refname_component_1(const char *refname, int flags) { const char *cp; char last = '\0'; @@ -47,7 +66,7 @@ static int check_refname_component(const char *refname, int flags) unsigned char disp = refname_disposition[ch]; switch(disp) { case 1: - goto out; + return check_refname_component_trailer(cp, refname, flags); case 2: if (last == '.') return -1; /* Refname contains "..". */ @@ -61,23 +80,91 @@ static int check_refname_component(const char *refname, int flags) } last = ch; } -out: - if (cp == refname) - return 0; /* Component has zero length. */ - if (refname[0] == '.') { - if (!(flags & REFNAME_DOT_COMPONENT)) - return -1; /* Component starts with '.'. */ +} + +#ifdef NO_SSE42 +#define check_refname_component check_refname_component_1 +#else +#define SSE_VECTOR_BYTES 16 + +/* Vectorized version of check_refname_component */ +static int check_refname_component(const char *refname, int flags) +{ + const __m128i *refname_vec = (__m128i*) refname; + + /* Character ranges for characters forbidden in refs; see above */ + static const __v16qi bad = { + 0x01, 0x20, 0x7e, 0x7f, 0x5e, 0x5e, 0x3a, 0x3a, + 0x5b, 0x5c, 0x2a, 0x2a, 0x3f, 0x3f, 0x3f, 0x3f}; + + static const __v16qi nonslashes = { + '\001', '/' -1, '/' + 1, 0xff, + }; + + static const __v16qi dotdot = {'.', '.', 0}; + static const __v16qi atcurly = {'@', '{', 0}; + + const __m128i *vp; + const char *cp = (const char *)refname_vec; + + int dotdotpos = SSE_VECTOR_BYTES, atcurlypos = SSE_VECTOR_BYTES; + for (vp = refname_vec; ; vp++) { + __m128i tmp; + int endpos; + /* - * Even if leading dots are allowed, don't allow "." - * as a component (".." is prevented by a rule above). + * Handle case of forbidden substrings .. and @{ crossing + * sixteen-byte boundaries */ - if (refname[1] == '\0') - return -1; /* Component equals ".". */ + if (dotdotpos == 15 && *cp == '.') + return -1; + + if (atcurlypos == 15 && *cp == '{') + return -1; + + if (((uintptr_t) vp % PAGE_SIZE) > PAGE_SIZE - SSE_VECTOR_BYTES) + /* + * End-of-page; fall back to slow method for + * this entire component. + */ + return check_refname_component_1(refname, flags); + + tmp = _mm_lddqu_si128(vp); + + /* + * Find slashes or end-of-string. The double-negative + * (negative-polarity search for non-slashes) is + * necessary so that \0 will also be counted. + */ + endpos = _mm_cmpistri((__m128i) nonslashes, tmp, + _SIDD_UBYTE_OPS | _SIDD_CMP_RANGES | + _SIDD_NEGATIVE_POLARITY); + + if (_mm_cmpestrc((__m128i) bad, SSE_VECTOR_BYTES, tmp, endpos, + _SIDD_UBYTE_OPS | _SIDD_CMP_RANGES)) + return -1; + + dotdotpos = _mm_cmpestri((__m128i) dotdot, 2, tmp, endpos, + _SIDD_UBYTE_OPS | + _SIDD_CMP_EQUAL_ORDERED); + if (dotdotpos < 15) + return -1; + + atcurlypos = _mm_cmpestri((__m128i) atcurly, 2, tmp, endpos, + _SIDD_UBYTE_OPS | + _SIDD_CMP_EQUAL_ORDERED); + if (atcurlypos < 15) + return -1; + + if (endpos < SSE_VECTOR_BYTES) { + cp = ((const char*) vp) + endpos; + break; + } + cp = (const char*) vp + SSE_VECTOR_BYTES; } - if (cp - refname >= 5 && !memcmp(cp - 5, ".lock", 5)) - return -1; /* Refname ends with ".lock". */ - return cp - refname; + return check_refname_component_trailer(cp, refname, flags); } +#endif int check_refname_format(const char *refname, int flags) { diff --git a/t/t5511-refspec.sh b/t/t5511-refspec.sh index de6db86..7f1bd74 100755 --- a/t/t5511-refspec.sh +++ b/t/t5511-refspec.sh @@ -88,4 +88,17 @@ test_refspec fetch "refs/heads/${good}" bad=$(printf '\011tab') test_refspec fetch "refs/heads/${bad}" invalid +test_refspec fetch 'refs/heads/a-very-long-refname' +test_refspec fetch 'refs/heads/.a-very-long-refname' invalid +test_refspec fetch 'refs/heads/abcdefgh0123..' invalid +test_refspec fetch 'refs/heads/abcdefgh01234..' invalid +test_refspec fetch 'refs/heads/abcdefgh012345..' invalid +test_refspec fetch 'refs/heads/abcdefgh0123456..' invalid +test_refspec fetch 'refs/heads/abcdefgh01234567..' invalid +test_refspec fetch 'refs/heads/abcdefgh0123.a' +test_refspec fetch 'refs/heads/abcdefgh01234.a' +test_refspec fetch 'refs/heads/abcdefgh012345.a' +test_refspec fetch 'refs/heads/abcdefgh0123456.a' +test_refspec fetch 'refs/heads/abcdefgh01234567.a' + test_done -- 2.0.0.rc1.18.gf763c0f -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html