On Thu, Feb 04, 2016 at 12:06:57PM +0000, Derek Morton wrote: > Added extended wildcard support when specifying --run-subtest. > > Wildcard format is as specified in rfc3977 and the uwildmat() implementation > is taken from libinn. > See https://tools.ietf.org/html/rfc3977#section-4 for a description of > allowed wildcard expressions. > > v2: Use comma as list separator (Ville Syrjala) > support both ^ and ! as not operators (Dave Gordon) > > v3: Updated to use uwildmat() (Dave Gordon) > > Signed-off-by: Derek Morton <derek.j.morton@xxxxxxxxx> > --- > COPYING | 21 +++ > lib/Makefile.sources | 2 + > lib/igt_core.c | 17 +- > lib/uwildmat/uwildmat.c | 474 ++++++++++++++++++++++++++++++++++++++++++++++++ > lib/uwildmat/uwildmat.h | 24 +++ Not really a fan of copying other sources into ours. Don't we have something ready-made that's generally available, or can we at least pull it in as a build-dep? Thanks, Daniel > 5 files changed, 536 insertions(+), 2 deletions(-) > create mode 100644 lib/uwildmat/uwildmat.c > create mode 100644 lib/uwildmat/uwildmat.h > > diff --git a/COPYING b/COPYING > index b8f6753..16375f2 100644 > --- a/COPYING > +++ b/COPYING > @@ -106,3 +106,24 @@ THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER > LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING > FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS > IN THE SOFTWARE. > + > +Copyright (c) 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, > + 2013, 2014 by Internet Systems Consortium, Inc. ("ISC") > +Copyright (c) 1991, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, > + 2002, 2003 by The Internet Software Consortium and Rich Salz > + > +This code is derived from software contributed to the Internet Software > +Consortium by Rich Salz. > + > +Permission to use, copy, modify, and distribute this software for any > +purpose with or without fee is hereby granted, provided that the above > +copyright notice and this permission notice appear in all copies. > + > +THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH > +REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF > +MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY > +SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES > +WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN > +ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF > +OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. > + > diff --git a/lib/Makefile.sources b/lib/Makefile.sources > index 4999868..e33861e 100644 > --- a/lib/Makefile.sources > +++ b/lib/Makefile.sources > @@ -60,6 +60,8 @@ libintel_tools_la_SOURCES = \ > igt_core.h \ > igt_draw.c \ > igt_draw.h \ > + uwildmat/uwildmat.h \ > + uwildmat/uwildmat.c \ > $(NULL) > > .PHONY: version.h.tmp > diff --git a/lib/igt_core.c b/lib/igt_core.c > index 6b69bb7..8e0bd2e 100644 > --- a/lib/igt_core.c > +++ b/lib/igt_core.c > @@ -56,7 +56,7 @@ > #include <ctype.h> > #include <limits.h> > #include <locale.h> > -#include <fnmatch.h> > +#include <uwildmat/uwildmat.h> > > #include "drmtest.h" > #include "intel_chipset.h" > @@ -209,6 +209,19 @@ > * intel gpu to be present). Then individual subtests can be run with > * "--run-subtest". Usage help for tests with subtests can be obtained with the > * "--help" command line option. > + * > + * A wildcard expression can be given to --run-subtest to specify a subset of > + * subtests to run. See https://tools.ietf.org/html/rfc3977#section-4 for a > + * description of allowed wildcard expressions. > + * Some examples of allowed wildcard expressions are: > + * > + * - '*basic*' match any subtest containing basic > + * - 'basic-???' match any subtest named basic- with 3 characters after - > + * - 'basic-[0-9]' match any subtest named basic- with a single number after - > + * - 'basic-[^0-9]' match any subtest named basic- with a single non numerical character after - > + * - 'basic*,advanced*' match any subtest starting basic or advanced > + * - '*,!basic*' match any subtest not starting basic > + * - 'basic*,!basic-render*' match any subtest starting basic but not starting basic-render > */ > > static unsigned int exit_handler_count; > @@ -814,7 +827,7 @@ bool __igt_run_subtest(const char *subtest_name) > } > > if (run_single_subtest) { > - if (fnmatch(run_single_subtest, subtest_name, 0) != 0) > + if (uwildmat(subtest_name, run_single_subtest) == 0) > return false; > else > run_single_subtest_found = true; > diff --git a/lib/uwildmat/uwildmat.c b/lib/uwildmat/uwildmat.c > new file mode 100644 > index 0000000..2d34742 > --- /dev/null > +++ b/lib/uwildmat/uwildmat.c > @@ -0,0 +1,474 @@ > +/* uwildmat.c is reused from libinn - https://launchpad.net/ubuntu/+source/inn2/2.5.4-1 > + > +This provides wild card matching originally used in InterNetNews and is > +described in https://tools.ietf.org/html/rfc3977#section-4 > + > +INN licence: > +INN as a whole and all code contained in it not otherwise marked with > +different licenses and/or copyrights is covered by the following copyright > +and license: > + > + Copyright (c) 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, > + 2013, 2014 by Internet Systems Consortium, Inc. ("ISC") > + Copyright (c) 1991, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, > + 2002, 2003 by The Internet Software Consortium and Rich Salz > + > + This code is derived from software contributed to the Internet Software > + Consortium by Rich Salz. > + > + Permission to use, copy, modify, and distribute this software for any > + purpose with or without fee is hereby granted, provided that the above > + copyright notice and this permission notice appear in all copies. > + > + THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH > + REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF > + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY > + SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES > + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN > + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF > + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. > + > +*/ > + > +/* $Id: uwildmat.c 8918 2010-01-22 23:28:28Z iulius $ > +** > +** wildmat pattern matching with Unicode UTF-8 extensions. > +** > +** Do shell-style pattern matching for ?, \, [], and * characters. Might not > +** be robust in face of malformed patterns; e.g., "foo[a-" could cause a > +** segmentation violation. It is 8-bit clean. (Robustness hopefully fixed > +** July 2000; all malformed patterns should now just fail to match anything.) > +** > +** Original by Rich $alz, mirror!rs, Wed Nov 26 19:03:17 EST 1986. > +** Rich $alz is now <rsalz@xxxxxxx>. > +** > +** April, 1991: Replaced mutually-recursive calls with in-line code for the > +** star character. > +** > +** Special thanks to Lars Mathiesen <thorinn@xxxxxxx> for the ABORT code. > +** This can greatly speed up failing wildcard patterns. For example: > +** > +** pattern: -*-*-*-*-*-*-12-*-*-*-m-*-*-* > +** text 1: -adobe-courier-bold-o-normal--12-120-75-75-m-70-iso8859-1 > +** text 2: -adobe-courier-bold-o-normal--12-120-75-75-X-70-iso8859-1 > +** > +** Text 1 matches with 51 calls, while text 2 fails with 54 calls. Without > +** the ABORT code, it takes 22310 calls to fail. Ugh. The following > +** explanation is from Lars: > +** > +** The precondition that must be fulfilled is that DoMatch will consume at > +** least one character in text. This is true if *p is neither '*' nor '\0'.) > +** The last return has ABORT instead of false to avoid quadratic behaviour in > +** cases like pattern "*a*b*c*d" with text "abcxxxxx". With false, each > +** star-loop has to run to the end of the text; with ABORT only the last one > +** does. > +** > +** Once the control of one instance of DoMatch enters the star-loop, that > +** instance will return either true or ABORT, and any calling instance will > +** therefore return immediately after (without calling recursively again). > +** In effect, only one star-loop is ever active. It would be possible to > +** modify the code to maintain this context explicitly, eliminating all > +** recursive calls at the cost of some complication and loss of clarity (and > +** the ABORT stuff seems to be unclear enough by itself). I think it would > +** be unwise to try to get this into a released version unless you have a > +** good test data base to try it out on. > +** > +** June, 1991: Robert Elz <kre@xxxxxxxxxxxxx> added minus and close bracket > +** handling for character sets. > +** > +** July, 2000: Largely rewritten by Russ Allbery <rra@xxxxxxxxxxxx> to add > +** support for ',', '!', and optionally '@' to the core wildmat routine. > +** Broke the character class matching into a separate function for clarity > +** since it's infrequently used in practice, and added some simple lookahead > +** to significantly decrease the recursive calls in the '*' matching code. > +** Added support for UTF-8 as the default character set for any high-bit > +** characters. > +** > +** For more information on UTF-8, see RFC 3629. > +** > +** Please note that this file is intentionally written so that conditionally > +** executed expressions are on separate lines from the condition to > +** facilitate analysis of the coverage of the test suite using purecov. > +** Please preserve this. As of March 11, 2001, purecov reports that the > +** accompanying test suite achieves 100% coverage of this file. > +*/ > + > +#include <string.h> > +#include "uwildmat.h" > + > +#define ABORT -1 > + > +/* Whether or not an octet looks like the start of a UTF-8 character. */ > +#define ISUTF8(c) (((c) & 0xc0) == 0xc0) > + > + > +/* > +** Determine the length of a non-ASCII character in octets (for advancing > +** pointers when skipping over characters). Takes a pointer to the start of > +** the character and to the last octet of the string. If end is NULL, expect > +** the string pointed to by start to be nul-terminated. If the character is > +** malformed UTF-8, return 1 to treat it like an eight-bit local character. > +*/ > +static int > +utf8_length(const unsigned char *start, const unsigned char *end) > +{ > + unsigned char mask = 0x80; > + const unsigned char *p; > + int length = 0; > + int left; > + > + for (; mask > 0 && (*start & mask) == mask; mask >>= 1) > + length++; > + if (length < 2 || length > 6) > + return 1; > + if (end != NULL && (end - start + 1) < length) > + return 1; > + left = length - 1; > + for (p = start + 1; left > 0 && (*p & 0xc0) == 0x80; p++) > + left--; > + return (left == 0) ? length : 1; > +} > + > + > +/* > +** Check whether a string contains only valid UTF-8 characters. > +*/ > +bool > +is_valid_utf8(const char *text) > +{ > + unsigned char mask; > + const unsigned char *p; > + int length; > + int left; > + > + for (p = (const unsigned char *)text; *p != '\0';) { > + mask = 0x80; > + length = 0; > + > + /* Find out the expected length of the character. */ > + for (; mask > 0 && (*p & mask) == mask; mask >>= 1) > + length++; > + > + p++; > + > + /* Valid ASCII. */ > + if (length == 0) > + continue; > + > + /* Invalid length. */ > + if (length < 2 || length > 6) > + return false; > + > + /* Check that each byte looks like 10xxxxxx, except for the first. */ > + left = length - 1; > + for (; left > 0 && (*p & 0xc0) == 0x80; p++) > + left--; > + > + if (left > 0) > + return false; > + } > + > + return true; > +} > + > + > +/* > +** Convert a UTF-8 character to UCS-4. Takes a pointer to the start of the > +** character and to the last octet of the string, and to a uint32_t into > +** which to put the decoded UCS-4 value. If end is NULL, expect the string > +** pointed to by start to be nul-terminated. Returns the number of octets in > +** the UTF-8 encoding. If the UTF-8 character is malformed, set result to > +** the decimal value of the first octet; this is wrong, but it will generally > +** cause the rest of the wildmat matching to do the right thing for non-UTF-8 > +** input. > +*/ > +static int > +utf8_decode(const unsigned char *start, const unsigned char *end, > + uint32_t *result) > +{ > + uint32_t value = 0; > + int length, i; > + const unsigned char *p = start; > + unsigned char mask; > + > + length = utf8_length(start, end); > + if (length < 2) { > + *result = *start; > + return 1; > + } > + mask = (1 << (7 - length)) - 1; > + value = *p & mask; > + p++; > + for (i = length - 1; i > 0; i--) { > + value = (value << 6) | (*p & 0x3f); > + p++; > + } > + *result = value; > + return length; > +} > + > + > +/* > +** Match a character class against text, a UCS-4 character. start is a > +** pointer to the first character of the character class, end a pointer to > +** the last. Returns whether the class matches that character. > +*/ > +static bool > +match_class(uint32_t text, const unsigned char *start, > + const unsigned char *end) > +{ > + bool reversed, allowrange; > + const unsigned char *p = start; > + uint32_t first = 0; > + uint32_t last; > + > + /* Check for an inverted character class (starting with ^). If the > + character matches the character class, we return !reversed; that way, > + we return true if it's a regular character class and false if it's a > + reversed one. If the character doesn't match, we return reversed. */ > + reversed = (*p == '^'); > + if (reversed) > + p++; > + > + /* Walk through the character class until we reach the end or find a > + match, handling character ranges as we go. Only permit a range to > + start when allowrange is true; this allows - to be treated like a > + normal character as the first character of the class and catches > + malformed ranges like a-e-n. We treat the character at the beginning > + of a range as both a regular member of the class and the beginning of > + the range; this is harmless (although it means that malformed ranges > + like m-a will match m and nothing else). */ > + allowrange = false; > + while (p <= end) { > + if (allowrange && *p == '-' && p < end) { > + p++; > + p += utf8_decode(p, end, &last); > + if (text >= first && text <= last) > + return !reversed; > + allowrange = false; > + } else { > + p += utf8_decode(p, end, &first); > + if (text == first) > + return !reversed; > + allowrange = true; > + } > + } > + return reversed; > +} > + > + > +/* > +** Match the text against the pattern between start and end. This is a > +** single pattern; a leading ! or @ must already be taken care of, and > +** commas must be dealt with outside of this routine. > +*/ > +static int > +match_pattern(const unsigned char *text, const unsigned char *start, > + const unsigned char *end) > +{ > + const unsigned char *q, *endclass; > + const unsigned char *p = start; > + bool ismeta; > + int matched, width; > + uint32_t c; > + > + for (; p <= end; p++) { > + if (!*text && *p != '*') > + return ABORT; > + > + switch (*p) { > + case '\\': > + if (!*++p) > + return ABORT; > + /* Fall through. */ > + > + default: > + if (*text++ != *p) > + return false; > + break; > + > + case '?': > + text += ISUTF8(*text) ? utf8_length(text, NULL) : 1; > + break; > + > + case '*': > + /* Consecutive stars are equivalent to one. Advance pattern to > + the character after the star. */ > + for (++p; *p == '*'; p++) > + ; > + > + /* A trailing star will match anything. */ > + if (p > end) > + return true; > + > + /* Basic algorithm: Recurse at each point where the * could > + possibly match. If the match succeeds or aborts, return > + immediately; otherwise, try the next position. > + > + Optimization: If the character after the * in the pattern > + isn't a metacharacter (the common case), then the * has to > + consume characters at least up to the next occurrence of that > + character in the text. Scan forward for those points rather > + than recursing at every possible point to save the extra > + function call overhead. */ > + ismeta = (*p == '[' || *p == '?' || *p == '\\'); > + while (*text) { > + width = ISUTF8(*text) ? utf8_length(text, NULL) : 1; > + if (ismeta) { > + matched = match_pattern(text, p, end); > + text += width; > + } else { > + while (*text && *text != *p) { > + text += width; > + width = ISUTF8(*text) ? utf8_length(text, NULL) : 1; > + } > + if (!*text) > + return ABORT; > + matched = match_pattern(++text, p + 1, end); > + } > + if (matched != false) > + return matched; > + } > + return ABORT; > + > + case '[': > + /* Find the end of the character class, making sure not to pick > + up a close bracket at the beginning of the class. */ > + p++; > + q = p + (*p == '^') + 1; > + if (q > end) > + return ABORT; > + endclass = memchr(q, ']', (size_t) (end - q + 1)); > + if (!endclass) > + return ABORT; > + > + /* Do the heavy lifting in another function for clarity, since > + character classes are an uncommon case. */ > + text += utf8_decode(text, NULL, &c); > + if (!match_class(c, p, endclass - 1)) > + return false; > + p = endclass; > + break; > + } > + } > + > + return (*text == '\0'); > +} > + > + > +/* > +** Takes text and a wildmat expression; a wildmat expression is a > +** comma-separated list of wildmat patterns, optionally preceded by ! to > +** invert the sense of the expression. Returns UWILDMAT_MATCH if that > +** expression matches the text, UWILDMAT_FAIL otherwise. If allowpoison is > +** set, allow @ to introduce a poison expression (the same as !, but if it > +** triggers the failed match the routine returns UWILDMAT_POISON instead). > +*/ > +static enum uwildmat > +match_expression(const unsigned char *text, const unsigned char *start, > + bool allowpoison) > +{ > + const unsigned char *end, *split; > + const unsigned char *p = start; > + bool reverse, escaped; > + bool match = false; > + bool poison = false; > + bool poisoned = false; > + > + /* Handle the empty expression separately, since otherwise end will be > + set to an invalid pointer. */ > + if (!*p) > + return !*text ? UWILDMAT_MATCH : UWILDMAT_FAIL; > + end = start + strlen((const char *) start) - 1; > + > + /* Main match loop. Find each comma that separates patterns, and attempt > + to match the text with each pattern in order. The last matching > + pattern determines whether the whole expression matches. */ > + for (; p <= end + 1; p = split + 1) { > + if (allowpoison) > + poison = (*p == '@'); > + reverse = (*p == '!') || poison; > + if (reverse) > + p++; > + > + /* Find the first unescaped comma, if any. If there is none, split > + will be one greater than end and point at the nul at the end of > + the string. */ > + for (escaped = false, split = p; split <= end; split++) { > + if (*split == '[') { > + split++; > + if (*split == ']') > + split++; > + while (split <= end && *split != ']') > + split++; > + } > + if (*split == ',' && !escaped) > + break; > + escaped = (*split == '\\') ? !escaped : false; > + } > + > + /* Optimization: If match == !reverse and poison == poisoned, this > + pattern can't change the result, so don't do any work. */ > + if (match == !reverse && poison == poisoned) > + continue; > + if (match_pattern(text, p, split - 1) == true) { > + poisoned = poison; > + match = !reverse; > + } > + } > + if (poisoned) > + return UWILDMAT_POISON; > + return match ? UWILDMAT_MATCH : UWILDMAT_FAIL; > +} > + > + > +/* > +** User-level routine used for wildmats where @ should be treated as a > +** regular character. > +*/ > +bool > +uwildmat(const char *text, const char *pat) > +{ > + const unsigned char *utext = (const unsigned char *) text; > + const unsigned char *upat = (const unsigned char *) pat; > + > + if (upat[0] == '*' && upat[1] == '\0') > + return true; > + else > + return (match_expression(utext, upat, false) == UWILDMAT_MATCH); > +} > + > + > +/* > +** User-level routine used for wildmats that support poison matches. > +*/ > +enum uwildmat > +uwildmat_poison(const char *text, const char *pat) > +{ > + const unsigned char *utext = (const unsigned char *) text; > + const unsigned char *upat = (const unsigned char *) pat; > + > + if (upat[0] == '*' && upat[1] == '\0') > + return UWILDMAT_MATCH; > + else > + return match_expression(utext, upat, true); > +} > + > + > +/* > +** User-level routine for simple expressions (neither , nor ! are special). > +*/ > +bool > +uwildmat_simple(const char *text, const char *pat) > +{ > + const unsigned char *utext = (const unsigned char *) text; > + const unsigned char *upat = (const unsigned char *) pat; > + size_t length; > + > + if (upat[0] == '*' && upat[1] == '\0') > + return true; > + else { > + length = strlen(pat); > + return (match_pattern(utext, upat, upat + length - 1) == true); > + } > +} > diff --git a/lib/uwildmat/uwildmat.h b/lib/uwildmat/uwildmat.h > new file mode 100644 > index 0000000..2e47189 > --- /dev/null > +++ b/lib/uwildmat/uwildmat.h > @@ -0,0 +1,24 @@ > + > +#ifndef UWILDMAT_H > +#define UWILDMAT_H 1 > + > +#include <stdio.h> > +#include <sys/types.h> > +#include <stdbool.h> > + > +/* > +** WILDMAT MATCHING > +*/ > +enum uwildmat { > + UWILDMAT_FAIL = 0, > + UWILDMAT_MATCH = 1, > + UWILDMAT_POISON > +}; > + > +extern bool is_valid_utf8(const char *start); > +extern bool uwildmat(const char *text, const char *pat); > +extern bool uwildmat_simple(const char *text, const char *pat); > +extern enum uwildmat uwildmat_poison(const char *text, const char *pat); > + > + > +#endif /* UWILDMAT_H */ > -- > 1.9.1 > -- Daniel Vetter Software Engineer, Intel Corporation http://blog.ffwll.ch _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/intel-gfx