David Turner <dturner@xxxxxxxxxxxxxxxx> writes: > diff --git a/git-compat-util.h b/git-compat-util.h > index f6d3a46..254487a 100644 > --- a/git-compat-util.h > +++ b/git-compat-util.h > @@ -668,6 +668,26 @@ void git_qsort(void *base, size_t nmemb, size_t size, > #endif > #endif > > +#ifndef NO_SSE42 > +#include <nmmintrin.h> > +/* Clang ships with a version of nmmintrin.h that's incomplete; if > + * necessary, we define the constants that we're going to use. */ As pointed out by Michael already, we format multiline comments with no text on the opening line: /* * Clang ships * ... to use. */ > +#ifndef _SIDD_UBYTE_OPS > +#define _SIDD_UBYTE_OPS 0x00 > +#define _SIDD_CMP_EQUAL_ANY 0x00 > +#define _SIDD_CMP_RANGES 0x04 > +#define _SIDD_CMP_EQUAL_ORDERED 0x0c > +#define _SIDD_NEGATIVE_POLARITY 0x10 > +#endif > + > +/* This is the system memory page size; it's used so that we can read > + * outside the bounds of an allocation without segfaulting. It is > + * assumed to be a power of 2. */ > +#ifndef PAGE_SIZE > +#define PAGE_SIZE 4096 > +#endif > +#endif > + > #ifdef UNRELIABLE_FSTAT > #define fstat_is_reliable() 0 > #else > diff --git a/refs.c b/refs.c > index dd28f2a..22a2dae 100644 > --- a/refs.c > +++ b/refs.c > @@ -24,6 +24,25 @@ static unsigned char refname_disposition[256] = { > 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, 4, 4 > }; > > +static int check_refname_component_trailer(const char *cp, const char *refname, int flags) > +{ > + if (cp == refname) > + return 0; /* Component has zero length. */ > + if (refname[0] == '.') { > + if (!(flags & REFNAME_DOT_COMPONENT)) > + return -1; /* Component starts with '.'. */ > + /* > + * Even if leading dots are allowed, don't allow "." > + * as a component (".." is prevented by a rule above). > + */ > + if (refname[1] == '\0') > + return -1; /* Component equals ".". */ > + } > + if (cp - refname >= 5 && !memcmp(cp - 5, ".lock", 5)) > + return -1; /* Refname ends with ".lock". */ > + return cp - refname; > +} > + > /* > * Try to read one refname component from the front of refname. > * Return the length of the component found, or -1 if the component is > @@ -37,7 +56,7 @@ static unsigned char refname_disposition[256] = { > * - it ends with ".lock" > * - it contains a "\" (backslash) > */ > -static int check_refname_component(const char *refname, int flags) > +static int check_refname_component_1(const char *refname, int flags) > { > const char *cp; > char last = '\0'; > @@ -47,7 +66,7 @@ static int check_refname_component(const char *refname, int flags) > unsigned char disp = refname_disposition[ch]; > switch(disp) { > case 1: > - goto out; > + return check_refname_component_trailer(cp, refname, flags); > case 2: > if (last == '.') > return -1; /* Refname contains "..". */ > @@ -61,23 +80,86 @@ static int check_refname_component(const char *refname, int flags) > } > last = ch; > } > -out: > - if (cp == refname) > - return 0; /* Component has zero length. */ > - if (refname[0] == '.') { > - if (!(flags & REFNAME_DOT_COMPONENT)) > - return -1; /* Component starts with '.'. */ > - /* > - * Even if leading dots are allowed, don't allow "." > - * as a component (".." is prevented by a rule above). > - */ > - if (refname[1] == '\0') > - return -1; /* Component equals ".". */ > +} > + > +#ifdef NO_SSE42 > +#define check_refname_component check_refname_component_1 > +#else > +#define BLOCK_SIZE 16 Is this macro name safe? It feels a bit too generic/broad and asking to collide with some system-defined block size constant coming from random <*.h> header file, but maybe it's just me. > +/* Vectorized version of check_refname_component */ > +static int check_refname_component(const char *refname, int flags) > +{ > + const __m128i *refname_vec = (__m128i*) refname; > + > + /* Character ranges for characters forbidden in refs; see > + * above */ > + static const __v16qi bad = { > + 0x01, 0x20, 0x7e, 0x7f, 0x5e, 0x5e, 0x3a, 0x3a, > + 0x5b, 0x5c, 0x2a, 0x2a, 0x3f, 0x3f, 0x3f, 0x3f}; > + > + static const __v16qi nonslashes = { > + '\001', '/' -1, '/' + 1, 0xff, > + }; > + > + static const __v16qi dotdot = {'.','.',0}; > + static const __v16qi atcurly = {'@','{',0}; s/,/, /g; please. > + const __m128i *vp; > + const char *cp = (const char *)refname_vec; > + > + int dotdotpos = BLOCK_SIZE, atcurlypos = BLOCK_SIZE; > + for (vp = refname_vec; ; vp++) { > + __m128i tmp; > + int endpos; > + > + /* Handle case of forbidden substrings .. and @{ crossing > + * sixteen-byte boundaries */ > + if (dotdotpos == 15 && *cp == '.') > + return -1; > + > + if (atcurlypos == 15 && *cp == '{') > + return -1; > + > + if (((uintptr_t) vp & (PAGE_SIZE - 1)) > PAGE_SIZE - BLOCK_SIZE) > + /* End-of-page; fall back to slow method for > + * this entire component. */ > + return check_refname_component_1(refname, flags); It is somewhat sad that we have to redo the whole thing, but nobody higher in the callchain knows how long the refname will be before calling us, so this cannot be avoided. -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html