Optimize check_refname_component using SSE2 on x86_64. git rev-parse HEAD is a good test-case for this, since it does almost nothing except parse refs. For one particular repo with about 60k refs, almost all packed, the timings are: Look up table: 29 ms SSE2: 23 ms This cuts about 20% off of the runtime. The configure.ac changes include code from the GNU C Library written by Joseph S. Myers <joseph at codesourcery dot com>. Ondřej Bílka <neleai@xxxxxxxxx> suggested an SSE2 approach to the substring searches, which netted a speed boost over the SSE4.2 code I had initially written. Signed-off-by: David Turner <dturner@xxxxxxxxxxx> --- git-compat-util.h | 10 +++ refs.c | 244 +++++++++++++++++++++++++++++++++++++++++++++-------- t/t5511-refspec.sh | 19 +++++ 3 files changed, 239 insertions(+), 34 deletions(-) diff --git a/git-compat-util.h b/git-compat-util.h index f6d3a46..291d46b 100644 --- a/git-compat-util.h +++ b/git-compat-util.h @@ -668,6 +668,16 @@ void git_qsort(void *base, size_t nmemb, size_t size, #endif #endif +#if defined(__GNUC__) && defined(__x86_64__) +#include <emmintrin.h> +/* This is the system memory page size; it's used so that we can read + * outside the bounds of an allocation without segfaulting. + */ +#ifndef PAGE_SIZE +#define PAGE_SIZE 4096 +#endif +#endif + #ifdef UNRELIABLE_FSTAT #define fstat_is_reliable() 0 #else diff --git a/refs.c b/refs.c index 46139d2..4c39af7 100644 --- a/refs.c +++ b/refs.c @@ -8,22 +8,43 @@ /* * How to handle various characters in refnames: * 0: An acceptable character for refs - * 1: End-of-component - * 2: ., look for a preceding . to reject .. in refs - * 3: {, look for a preceding @ to reject @{ in refs - * 4: A bad character: ASCII control characters, "~", "^", ":" or SP + * 1: \0: End-of-component and string + * 2: /: End-of-component + * 3: ., look for a preceding . to reject .. in refs + * 4: {, look for a preceding @ to reject @{ in refs + * 5: *, usually a bad character except, once as a wildcard + * 6: A bad character except * (see check_refname_component below) */ static unsigned char refname_disposition[256] = { - 1, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, - 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 2, 1, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 0, 0, 4, + 1, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, + 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 3, 2, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 0, 0, 0, 0, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 0, 4, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 6, 0, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 4, 4 + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 0, 6, 6 }; +static int check_refname_component_trailer(const char *cp, const char *refname, int flags) +{ + if (cp == refname) + return 0; /* Component has zero length. */ + if (refname[0] == '.') { + if (!(flags & REFNAME_DOT_COMPONENT)) + return -1; /* Component starts with '.'. */ + /* + * Even if leading dots are allowed, don't allow "." + * as a component (".." is prevented by a rule above). + */ + if (refname[1] == '\0') + return -1; /* Component equals ".". */ + } + if (cp - refname >= 5 && !memcmp(cp - 5, ".lock", 5)) + return -1; /* Refname ends with ".lock". */ + return cp - refname; +} + /* * Try to read one refname component from the front of refname. * Return the length of the component found, or -1 if the component is @@ -33,8 +54,9 @@ static unsigned char refname_disposition[256] = { * - any path component of it begins with ".", or * - it has double dots "..", or * - it has ASCII control character, "~", "^", ":" or SP, anywhere, or - * - it ends with a "/". - * - it ends with ".lock" + * - it has pattern-matching notation "*", "?", "[", anywhere, or + * - it ends with a "/", or + * - it ends with ".lock", or * - it contains a "\" (backslash) */ static int check_refname_component(const char *refname, int flags) @@ -46,47 +68,32 @@ static int check_refname_component(const char *refname, int flags) int ch = *cp & 255; unsigned char disp = refname_disposition[ch]; switch(disp) { - case 1: - goto out; + case 1: /* fall-through */ case 2: + return check_refname_component_trailer(cp, refname, flags); + case 3: if (last == '.') return -1; /* Refname contains "..". */ break; - case 3: + case 4: if (last == '@') return -1; /* Refname contains "@{". */ break; - case 4: + case 5: /* fall-through */ + case 6: return -1; } last = ch; } -out: - if (cp == refname) - return 0; /* Component has zero length. */ - if (refname[0] == '.') { - if (!(flags & REFNAME_DOT_COMPONENT)) - return -1; /* Component starts with '.'. */ - /* - * Even if leading dots are allowed, don't allow "." - * as a component (".." is prevented by a rule above). - */ - if (refname[1] == '\0') - return -1; /* Component equals ".". */ - } - if (cp - refname >= 5 && !memcmp(cp - 5, ".lock", 5)) - return -1; /* Refname ends with ".lock". */ - return cp - refname; } -int check_refname_format(const char *refname, int flags) +static int check_refname_format_bytewise (const char *refname, int flags) { int component_len, component_count = 0; if (!strcmp(refname, "@")) /* Refname is a single character '@'. */ return -1; - while (1) { /* We are at the start of a path component. */ component_len = check_refname_component(refname, flags); @@ -115,6 +122,175 @@ int check_refname_format(const char *refname, int flags) return 0; } +#if defined(__GNUC__) && defined(__x86_64__) +#define SSE_VECTOR_BYTES 16 + +/* Vectorized version of check_refname_format. */ +int check_refname_format(const char *refname, int flags) +{ + const char *cp = refname; + + const __m128i dot = _mm_set1_epi8 ('.'); + const __m128i at = _mm_set1_epi8 ('@'); + const __m128i curly = _mm_set1_epi8 ('{'); + const __m128i slash = _mm_set1_epi8 ('/'); + const __m128i zero = _mm_set1_epi8 ('\000'); + + /* below '*', all characters are forbidden or rare */ + const __m128i star_ub = _mm_set1_epi8 ('*' + 1); + + const __m128i colon = _mm_set1_epi8 (':'); + const __m128i question = _mm_set1_epi8 ('?'); + + /* '['..'^' contains 4 characters: 3 forbidden and 1 rare */ + const __m128i bracket_lb = _mm_set1_epi8 ('[' - 1); + const __m128i caret_ub = _mm_set1_epi8 ('^' + 1); + + /* '~' and above are forbidden */ + const __m128i tilde_lb = _mm_set1_epi8 ('~' - 1); + + if (refname[0] == 0 || refname[0] == '/') { + /* entirely empty ref or initial ref component */ + return -1; + } + + /* Initial ref component of '.'; below we look for /. so we'll + * miss this. */ + if (refname[0] == '.') { + if (refname[1] == '/' || refname[1] == '\0') + return -1; + if (!(flags & REFNAME_DOT_COMPONENT)) + return -1; + } + while(1) { + __m128i tmp, tmp1, result; + uint64_t mask; + + if ((uintptr_t) cp % PAGE_SIZE > PAGE_SIZE - SSE_VECTOR_BYTES - 1) + /* End-of-page; fall back to slow method for + * this entire ref. */ + return check_refname_format_bytewise(refname, flags); + + tmp = _mm_loadu_si128((__m128i *)cp); + tmp1 = _mm_loadu_si128((__m128i *)(cp + 1)); + + /* + * This range (note the lt) contains some + * permissible-but-rare characters (including all + * characters >= 128), which we handle later. It also + * includes \000. + */ + result = _mm_cmplt_epi8(tmp, star_ub); + + result = _mm_or_si128(result, _mm_cmpeq_epi8(tmp, question)); + result = _mm_or_si128(result, _mm_cmpeq_epi8(tmp, colon)); + + /* This range contains the permissible ] as bycatch */ + result = _mm_or_si128(result, _mm_and_si128 ( + _mm_cmpgt_epi8(tmp, bracket_lb), + _mm_cmplt_epi8(tmp, caret_ub))); + + result = _mm_or_si128(result, _mm_cmpgt_epi8(tmp, tilde_lb)); + + /* .. */ + result = _mm_or_si128(result, _mm_and_si128 ( + _mm_cmpeq_epi8(tmp, dot), + _mm_cmpeq_epi8(tmp1, dot))); + /* @{ */ + result = _mm_or_si128(result, _mm_and_si128 ( + _mm_cmpeq_epi8(tmp, at), + _mm_cmpeq_epi8(tmp1, curly))); + /* // */ + result = _mm_or_si128(result, _mm_and_si128 ( + _mm_cmpeq_epi8(tmp, slash), + _mm_cmpeq_epi8(tmp1, slash))); + /* trailing / */ + result = _mm_or_si128(result, _mm_and_si128 ( + _mm_cmpeq_epi8(tmp, slash), + _mm_cmpeq_epi8(tmp1, zero))); + /* + * Even though /. is not necessarily an error, we flag + * it anyway. If we find it, we'll check if it's valid + * and if so we'll advance just past it. + */ + result = _mm_or_si128(result, _mm_and_si128 ( + _mm_cmpeq_epi8(tmp, slash), + _mm_cmpeq_epi8(tmp1, dot))); + + mask = _mm_movemask_epi8(result); + if (mask) { + /* We've found either end-of-string, or some + probably-bad character or substring. */ + int i = __builtin_ctz(mask); + switch (refname_disposition[cp[i] & 255]) { + case 0: + /* + * bycatch: a good character that's in + * one of the ranges of mostly-forbidden + * characters + */ + cp += i + 1; + continue; + case 1: + cp += i; + goto success; + case 2: + /* + * Even if leading dots are allowed, don't + * allow "." as a component (".." is + * prevented by case 3 below). + */ + if (cp[i + 1] == '.') { + if (cp[i + 2] == '\0') + return -1; + if (flags & REFNAME_DOT_COMPONENT) { + /* skip to just after the /. */ + cp += i + 2; + continue; + } + return -1; + } else if (cp[i + 1] == '/' || cp[i + 1] == '\0') + return -1; + break; + case 3: + if (cp[i + 1] == '.' || cp[i + 1] == '\0') + return -1; + break; + case 4: + if (cp[i + 1] == '{') + return -1; + break; + case 5: + if (((cp == refname + i) || cp[i - 1] == '/') + && (cp[i + 1] == '/' || cp[i + 1] == 0)) + if (flags & REFNAME_REFSPEC_PATTERN) { + flags &= ~REFNAME_REFSPEC_PATTERN; + /* restart after the * */ + cp += i + 1; + continue; + } + /* fall-through */ + case 6: + return -1; + } + } else + cp += SSE_VECTOR_BYTES; + } +success: + if (cp - refname >= 5 && !memcmp(cp - 5, ".lock", 5)) + return -1; /* Refname ends with ".lock". */ + return 0; +} + +#else + +int check_refname_format (const char *refname, int flags) +{ + return check_refname_format_bytewise(refname, flags); +} + +#endif + struct ref_entry; /* diff --git a/t/t5511-refspec.sh b/t/t5511-refspec.sh index de6db86..ef0f095 100755 --- a/t/t5511-refspec.sh +++ b/t/t5511-refspec.sh @@ -29,6 +29,10 @@ test_refspec push '+:' test_refspec fetch '' test_refspec fetch ':' test_refspec fetch '::' invalid +test_refspec fetch '.' invalid +test_refspec fetch './frotz' invalid +test_refspec fetch 'refs/.' invalid +test_refspec push '*//:refs/remotes/frotz/*' invalid test_refspec push 'refs/heads/*:refs/remotes/frotz/*' test_refspec push 'refs/heads/*:refs/remotes/frotz' invalid @@ -71,6 +75,8 @@ test_refspec fetch ':refs/remotes/frotz/HEAD-to-me' test_refspec push ':refs/remotes/frotz/delete me' invalid test_refspec fetch ':refs/remotes/frotz/HEAD to me' invalid +test_refspec fetch ':refs/remotes/frotz/something.lock' invalid + test_refspec fetch 'refs/heads/*/for-linus:refs/remotes/mine/*-blah' invalid test_refspec push 'refs/heads/*/for-linus:refs/remotes/mine/*-blah' invalid @@ -88,4 +94,17 @@ test_refspec fetch "refs/heads/${good}" bad=$(printf '\011tab') test_refspec fetch "refs/heads/${bad}" invalid +test_refspec fetch 'refs/heads/a-very-long-refname' +test_refspec fetch 'refs/heads/.a-very-long-refname' invalid +test_refspec fetch 'refs/heads/abcdefgh0123..' invalid +test_refspec fetch 'refs/heads/abcdefgh01234..' invalid +test_refspec fetch 'refs/heads/abcdefgh012345..' invalid +test_refspec fetch 'refs/heads/abcdefgh0123456..' invalid +test_refspec fetch 'refs/heads/abcdefgh01234567..' invalid +test_refspec fetch 'refs/heads/abcdefgh0123.a' +test_refspec fetch 'refs/heads/abcdefgh01234.a' +test_refspec fetch 'refs/heads/abcdefgh012345.a' +test_refspec fetch 'refs/heads/abcdefgh0123456.a' +test_refspec fetch 'refs/heads/abcdefgh01234567.a' + test_done -- 2.0.0.rc1.24.g0588c94.dirty -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html