Re: [Outreachy][PATCH v4] Port helper/test-ctype.c to unit-tests/t-ctype.c

René Scharfe <l.s.r@xxxxxx> · Sun, 7 Jan 2024 13:45:59 +0100

Am 05.01.24 um 17:14 schrieb Achu Luma:
> In the recent codebase update (8bf6fbd00d (Merge branch
> 'js/doc-unit-tests', 2023-12-09)), a new unit testing framework was
> merged, providing a standardized approach for testing C code. Prior to
> this update, some unit tests relied on the test helper mechanism,
> lacking a dedicated unit testing framework. It's more natural to perform
> these unit tests using the new unit test framework.
>
> This commit migrates the unit tests for C character classification
> functions (isdigit(), isspace(), etc) from the legacy approach
> using the test-tool command `test-tool ctype` in t/helper/test-ctype.c
> to the new unit testing framework (t/unit-tests/test-lib.h).
>
> The migration involves refactoring the tests to utilize the testing
> macros provided by the framework (TEST() and check_*()).
>
> Mentored-by: Christian Couder <chriscool@xxxxxxxxxxxxx>
> Helped-by: René Scharfe <l.s.r@xxxxxx>
> Helped-by: Phillip Wood <phillip.wood123@xxxxxxxxx>
> Helped-by: Taylor Blau <me@xxxxxxxxxxxx>
> Signed-off-by: Achu Luma <ach.lumap@xxxxxxxxx>
> ---

[snip]

> diff --git a/t/helper/test-ctype.c b/t/helper/test-ctype.c
> deleted file mode 100644
> index e5659df40b..0000000000
> --- a/t/helper/test-ctype.c
> +++ /dev/null
> @@ -1,70 +0,0 @@
> -#include "test-tool.h"
> -
> -static int rc;
> -
> -static void report_error(const char *class, int ch)
> -{
> -	printf("%s classifies char %d (0x%02x) wrongly\n", class, ch, ch);
> -	rc = 1;
> -}
> -
> -static int is_in(const char *s, int ch)
> -{
> -	/*
> -	 * We can't find NUL using strchr. Accept it as the first
> -	 * character in the spec -- there are no empty classes.
> -	 */
> -	if (ch == '\0')
> -		return ch == *s;
> -	if (*s == '\0')
> -		s++;
> -	return !!strchr(s, ch);
> -}
> -
> -#define TEST_CLASS(t,s) {			\
> -	int i;					\
> -	for (i = 0; i < 256; i++) {		\
> -		if (is_in(s, i) != t(i))	\
> -			report_error(#t, i);	\
> -	}					\
> -	if (t(EOF))				\
> -		report_error(#t, EOF);		\
> -}
> -
> -#define DIGIT "0123456789"
> -#define LOWER "abcdefghijklmnopqrstuvwxyz"
> -#define UPPER "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
> -#define PUNCT "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"
> -#define ASCII \
> -	"\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" \
> -	"\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" \
> -	"\x20\x21\x22\x23\x24\x25\x26\x27\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f" \
> -	"\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f" \
> -	"\x40\x41\x42\x43\x44\x45\x46\x47\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f" \
> -	"\x50\x51\x52\x53\x54\x55\x56\x57\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f" \
> -	"\x60\x61\x62\x63\x64\x65\x66\x67\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f" \
> -	"\x70\x71\x72\x73\x74\x75\x76\x77\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f"
> -#define CNTRL \
> -	"\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" \
> -	"\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" \
> -	"\x7f"
> -
> -int cmd__ctype(int argc UNUSED, const char **argv UNUSED)
> -{
> -	TEST_CLASS(isdigit, DIGIT);
> -	TEST_CLASS(isspace, " \n\r\t");
> -	TEST_CLASS(isalpha, LOWER UPPER);
> -	TEST_CLASS(isalnum, LOWER UPPER DIGIT);
> -	TEST_CLASS(is_glob_special, "*?[\\");
> -	TEST_CLASS(is_regex_special, "$()*+.?[\\^{|");
> -	TEST_CLASS(is_pathspec_magic, "!\"#%&',-/:;<=>@_`~");
> -	TEST_CLASS(isascii, ASCII);
> -	TEST_CLASS(islower, LOWER);
> -	TEST_CLASS(isupper, UPPER);
> -	TEST_CLASS(iscntrl, CNTRL);
> -	TEST_CLASS(ispunct, PUNCT);
> -	TEST_CLASS(isxdigit, DIGIT "abcdefABCDEF");
> -	TEST_CLASS(isprint, LOWER UPPER DIGIT PUNCT " ");
> -
> -	return rc;
> -}

[snip]

> diff --git a/t/unit-tests/t-ctype.c b/t/unit-tests/t-ctype.c
> new file mode 100644
> index 0000000000..3a338df541
> --- /dev/null
> +++ b/t/unit-tests/t-ctype.c
> @@ -0,0 +1,78 @@
> +#include "test-lib.h"
> +
> +static int is_in(const char *s, int ch)
> +{
> +	/*
> +	 * We can't find NUL using strchr. Accept it as the first
> +	 * character in the spec -- there are no empty classes.
> +	 */
> +	if (ch == '\0')
> +		return ch == *s;
> +	if (*s == '\0')
> +		s++;
> +	return !!strchr(s, ch);
> +}
> +
> +/* Macro to test a character type */
> +#define TEST_CTYPE_FUNC(func, string) \
> +static void test_ctype_##func(void) { \
> +	for (int i = 0; i < 256; i++) { \
> +		if (!check_int(func(i), ==, is_in(string, i))) \
> +			test_msg("       i: 0x%02x", i); \
> +	} \
> +}
> +
> +#define TEST_CHAR_CLASS(class) TEST(test_ctype_##class(), #class " works")
> +
> +#define DIGIT "0123456789"
> +#define LOWER "abcdefghijklmnopqrstuvwxyz"
> +#define UPPER "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
> +#define PUNCT "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~"
> +#define ASCII \
> +	"\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" \
> +	"\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" \
> +	"\x20\x21\x22\x23\x24\x25\x26\x27\x28\x29\x2a\x2b\x2c\x2d\x2e\x2f" \
> +	"\x30\x31\x32\x33\x34\x35\x36\x37\x38\x39\x3a\x3b\x3c\x3d\x3e\x3f" \
> +	"\x40\x41\x42\x43\x44\x45\x46\x47\x48\x49\x4a\x4b\x4c\x4d\x4e\x4f" \
> +	"\x50\x51\x52\x53\x54\x55\x56\x57\x58\x59\x5a\x5b\x5c\x5d\x5e\x5f" \
> +	"\x60\x61\x62\x63\x64\x65\x66\x67\x68\x69\x6a\x6b\x6c\x6d\x6e\x6f" \
> +	"\x70\x71\x72\x73\x74\x75\x76\x77\x78\x79\x7a\x7b\x7c\x7d\x7e\x7f"
> +#define CNTRL \
> +	"\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" \
> +	"\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" \
> +	"\x7f"
> +
> +TEST_CTYPE_FUNC(isdigit, DIGIT)
> +TEST_CTYPE_FUNC(isspace, " \n\r\t")
> +TEST_CTYPE_FUNC(isalpha, LOWER UPPER)
> +TEST_CTYPE_FUNC(isalnum, LOWER UPPER DIGIT)
> +TEST_CTYPE_FUNC(is_glob_special, "*?[\\")
> +TEST_CTYPE_FUNC(is_regex_special, "$()*+.?[\\^{|")
> +TEST_CTYPE_FUNC(is_pathspec_magic, "!\"#%&',-/:;<=>@_`~")
> +TEST_CTYPE_FUNC(isascii, ASCII)
> +TEST_CTYPE_FUNC(islower, LOWER)
> +TEST_CTYPE_FUNC(isupper, UPPER)
> +TEST_CTYPE_FUNC(iscntrl, CNTRL)
> +TEST_CTYPE_FUNC(ispunct, PUNCT)
> +TEST_CTYPE_FUNC(isxdigit, DIGIT "abcdefABCDEF")
> +TEST_CTYPE_FUNC(isprint, LOWER UPPER DIGIT PUNCT " ")
> +
> +int cmd_main(int argc, const char **argv) {
> +	/* Run all character type tests */
> +	TEST_CHAR_CLASS(isspace);
> +	TEST_CHAR_CLASS(isdigit);
> +	TEST_CHAR_CLASS(isalpha);
> +	TEST_CHAR_CLASS(isalnum);
> +	TEST_CHAR_CLASS(is_glob_special);
> +	TEST_CHAR_CLASS(is_regex_special);
> +	TEST_CHAR_CLASS(is_pathspec_magic);
> +	TEST_CHAR_CLASS(isascii);
> +	TEST_CHAR_CLASS(islower);
> +	TEST_CHAR_CLASS(isupper);
> +	TEST_CHAR_CLASS(iscntrl);
> +	TEST_CHAR_CLASS(ispunct);
> +	TEST_CHAR_CLASS(isxdigit);
> +	TEST_CHAR_CLASS(isprint);
> +
> +	return test_done();
> +}
> --
> 2.42.0.windows.2
>

Quite an improvement over v3!  Now you only need to repeat the class
names once.

Can we do any better?  We could simply have one test per character per
class like this:

#define TEST_CHAR_CLASS(class, expect) \
	for (int i = 0; i < 256; i++) \
		TEST(check_int(class(i), ==, is_in(expect, i)),	\
		     "%s(0x%02x) works", #class, i)

Which would be used like this:

	TEST_CHAR_CLASS(isspace, " \n\r\t");

With that there is no need to define any functions anymore.  We also
don't need any custom output, as the test name includes the character
code.  Downside: We'd have thousands of tests.  But is that actually
a downside or is that how the unit test framework is supposed to be
used?

If we need to aggregate the results by class for some reason, we
could use strings, like we already do for defining the expected class
members.  We need special handling for NUL, as that character
terminates C strings, but we can put all other characters into a
string and then use check_str:

#define TEST_CHAR_CLASS(class, expect) \
	do { \
		int expect_nul = expect[0] == '\0'; \
		char expect_rest[256] = {0}; \
		char actual_rest[256] = {0}; \
		for (int i = 1, j = 0; i < 256; i++) \
			if (strchr(&expect[expect_nul], i)) \
				expect_rest[j++] = i; \
		for (int i = 1, j = 0; i < 256; i++) \
			if (class(i) == 1) \
				actual_rest[j++] = i; \
		TEST(check_int(class(0), ==, expect_nul) && \
		     check_str(actual_rest, expect_rest), \
		     #class " works"); \
	} while (0)

check_str escapes non-printable characters when reporting a mismatch,
so this shouldn't mess up your terminal.

By the way: Like the original code these checks are stricter than
required by the C standard in requiring the result to be 1 instead of
just true (any non-zero value).  Perhaps they should be relaxed.  But
that's a tangent and independent of the convergence to a unit test.

René