It offers a limited set of POSIX tr, in particular: no character class support and no [n*m] operators. Only 8bit. C-escapes supported, and character ranges. Deletion and squeezing should work, but -s does not match what the GNU tr from coreutils (which, in turn, does not match SuSv2). Signed-off-by: Alex Riesen <raa.lkml@xxxxxxxxx> --- Makefile | 2 +- t/t0000-test-progs.sh | 72 +++++++++++++++ test-tr.c | 236 +++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 309 insertions(+), 1 deletions(-) create mode 100755 t/t0000-test-progs.sh create mode 100644 test-tr.c diff --git a/Makefile b/Makefile index 7c70b00..a458c30 100644 --- a/Makefile +++ b/Makefile @@ -1177,7 +1177,7 @@ endif ### Testing rules -TEST_PROGRAMS = test-chmtime$X test-genrandom$X test-date$X test-delta$X test-sha1$X test-match-trees$X test-absolute-path$X test-parse-options$X +TEST_PROGRAMS = test-chmtime$X test-genrandom$X test-date$X test-delta$X test-sha1$X test-match-trees$X test-absolute-path$X test-parse-options$X test-tr$X all:: $(TEST_PROGRAMS) diff --git a/t/t0000-test-progs.sh b/t/t0000-test-progs.sh new file mode 100755 index 0000000..d8d339a --- /dev/null +++ b/t/t0000-test-progs.sh @@ -0,0 +1,72 @@ +#!/bin/sh + +test_description='Test the test support programs for sanity' + +. ./test-lib.sh + +test_expect_success 'test-tr character range' ' + + echo def >expected && + echo abc | test-tr a-c d-f >result && + cmp expected result && + + printf def >expected && + printf def | test-tr a-c d-f >result && + cmp expected result + + printf def >expected && + printf abc | test-tr \\141-\\143 \\144-\\146 >result && + cmp expected result + + printf \\r\\n >expected && + printf \\r\\n | test-tr rn \\r\\n >result && + cmp expected result + + printf \\000\\n >expected && + printf 0n | test-tr 0n \\000\\n >result && + cmp expected result + + printf 0n >expected && + printf \\000\\n | test-tr \\000\\n 0n >result && + cmp expected result && + + echo nopqrstuvwxyzbcdefghklm >expected && + echo abcdefghijklmopqrstuxyz | test-tr A-Za-z N-ZA-Mn-za-m >result && + cmp expected result +' + +test_expect_success 'test-tr delete characters' ' + + echo ac >expected && + echo abc | test-tr -d b >result && + cmp expected result && + + echo abcghijk >expected && + echo abcdefghijk | test-tr -d d-f >result && + cmp expected result + +' + +test_expect_success 'test-tr squeeze repeating characters' ' + + echo abc >expected && + echo abbbbc | test-tr -s b >result && + cmp expected result && + + echo def >expected && + echo abbbbc | test-tr -s a-c d-f >result && + cmp expected result + +' + +test_expect_success 'test-tr sanity' ' + + test_must_fail test-tr -d a-z A-Z && + test_must_fail test-tr \\477 a && + test_must_fail test-tr z-a a && + test_must_fail test-tr a-z "" && + test_must_fail test-tr a-z + +' + +test_done diff --git a/test-tr.c b/test-tr.c new file mode 100644 index 0000000..477ae16 --- /dev/null +++ b/test-tr.c @@ -0,0 +1,236 @@ +/* + * Simplified tr + * + * Supports: + * + * CHAR1-CHAR2 + * Escape sequences + * -d (delete) + * -s (squeeze) + * + * No unicode + * No characters classes + * No -c (complement) support + * Behavior of "-s -d" not tested (never used in tests) + * + * Squeeze of of repeating characters follows the behavior + * described in the Single Unix Specification (as good as I + * understand it), not what tr does (it squeezes every character + * found in set1 OR set2, whereas the SuS says: + * + * "When the -s option is specified, after any deletions or + * translations have taken place, repeated sequences of the same + * character will be replaced by one occurrence of the same + * character, if the character is found in the array specified by the + * last operand." + * + * So this tr squeezes only characters matched the set1. + * + */ +#include "cache.h" + +static int squeeze, delete; + +static unsigned char *unquote(const char *s, unsigned *len) +{ + unsigned char *result = malloc(strlen(s)), *r = result; + + while (*s) { + switch (*s) { + case '\\': + ++s; +#define ISOCT(c) (((c) >= '0' && (c) <= '7')) + if (ISOCT(*s)) { + unsigned int c; + char oct[4] = {0, 0, 0, 0}; + oct[0] = *s++; + c = (oct[0] - '0'); + if (ISOCT(*s)) { + oct[1] = *s++; + c = (c << 3) |(oct[1] - '0'); + if (ISOCT(*s)) { + oct[2] = *s++; + c = (c << 3) |(oct[2] - '0'); + } + } + if (c > 255) { + fprintf(stderr, "invalid octal character specification: \\%s\n", oct); + exit(1); + } + *r++ = c & 0xff; + } else { + switch (*s) { + case '\0': + *r++ = '\\'; + break; + case '\\': + *r++ = *s++; + break; + case 'a': + *r++ = '\a'; + ++s; + break; + case 'b': + *r++ = '\b'; + ++s; + break; + case 'f': + *r++ = '\f'; + ++s; + break; + case 'n': + *r++ = '\n'; + ++s; + break; + case 'r': + *r++ = '\r'; + ++s; + break; + case 't': + *r++ = '\t'; + ++s; + break; + case 'v': + *r++ = '\v'; + ++s; + break; + default: + *r++ = '\\'; + *r++ = *s++; + break; + } + } + break; + default: + *r++ = *s++; + } + } + + *len = r - result; + *r = '\0'; + return result; +} + +#define MAX_PATTERN 256 +static void put_ch(unsigned char *conv, unsigned char ch, unsigned *len) +{ + unsigned i = (*len)++; + if (*len > MAX_PATTERN) { + fprintf(stderr, "pattern too long\n"); + exit(1); + } + conv[i] = ch; +} + +static void parse(const unsigned char *rule, unsigned rule_len, + unsigned char *set, unsigned *set_len) +{ + const unsigned char *p = rule; + while (p < rule + rule_len) { + if ('-' == *p && p > rule && p[1]) { + unsigned c; + if (p[-1] > p[1]) { + fprintf(stderr, "%c%c%c: range is reversed\n", + p[-1], *p, p[1]); + exit(1); + } + c = p[-1] + 1u; + for (; c <= p[1]; ++c) + put_ch(set, c, set_len); + ++p; + ++p; + continue; + } + put_ch(set, *p, set_len); + ++p; + } +} + +int main(int argc, char *argv[]) +{ + unsigned set1_len = 0, set2_len = 0; + unsigned char set1[MAX_PATTERN]; + unsigned char set2[MAX_PATTERN]; + + ssize_t n; + unsigned char last = 0, have_last = 0; + unsigned char buf[BUFSIZ]; + + char *rule1 = NULL, *rule2 = NULL; + unsigned char *urule1, *urule2; + unsigned urule1_len, urule2_len; + int opt; + + for (opt = 1; opt < argc; ++opt) { + if (!strcmp("-s", argv[opt])) + squeeze = 1; + else if (!strcmp("-d", argv[opt])) + delete = 1; + else if (!rule1) { + rule1 = argv[opt]; + } else if (!rule2) + rule2 = argv[opt]; + } + if (!rule1) { + fprintf(stderr, "no source set given\n" + "test-tr [-s] [-d] set1 [set2]\n" + "\"set\" supports only \\NNN, \\a-\\v and CHAR1-CHAR2 rules\n"); + exit(1); + } + if (delete && rule2) { + fprintf(stderr, "extra operand %s when deleting\n", rule2); + exit(1); + } + urule1 = unquote(rule1, &urule1_len); + urule2 = NULL; + urule2_len = 0; + if ((!rule2 || !*rule2) && !delete && !squeeze) { + fprintf(stderr, "set2 must be non-empty\n"); + exit(1); + } + + parse(urule1, urule1_len, set1, &set1_len); + + if (rule2) { + unsigned i; + urule2 = unquote(rule2, &urule2_len); + parse(urule2, urule2_len, set2, &set2_len); + i = set2[set2_len - 1]; + while (set2_len < set1_len) + put_ch(set2, i, &set2_len); + } + + while ((n = read(STDIN_FILENO, buf, sizeof(buf)))) { + if (n < 0) { + int err = errno; + if (EINTR == err || EAGAIN == err) + continue; + fprintf(stderr, "%s: %s\n", argv[0], strerror(err)); + exit(1); + } + if (set1_len) { + unsigned i, o = 0; + for (i = 0; i < (unsigned)n; ++i) { + unsigned char *p, ch = buf[i]; + p = memchr(set1, ch, set1_len); + if (p) { + if (delete) + continue; + if (set2_len) + ch = set2[p - set1]; + if (!(squeeze && + have_last && + ch == last)) + buf[o++] = ch; + } else + buf[o++] = ch; + + have_last = 1; + last = ch; + } + n = o; + } + write(STDOUT_FILENO, buf, n); + } + return 0; +} -- 1.5.5.rc0.53.g97734 -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html