[PATCH] Add test-tr: poor-man tr

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



It offers a limited set of POSIX tr, in particular: no character class
support and no [n*m] operators. Only 8bit. C-escapes supported, and
character ranges. Deletion and squeezing should work, but -s does not
match the GNU tr from coreutils (which, in turn, does not match POSIX).

Even if the program does not offers the full tr features, what's left
is guaranteed to work the same on every platform where you manage to
compile it (unless the platform does something really nasty at low
level). Originally written to overcome brokenness of ActiveState Perl
on Windows, it should also improve (assuming it will be used instead
of local "tr" and "perl -pe y///") portability of the test suite on
other platforms.

Signed-off-by: Alex Riesen <raa.lkml@xxxxxxxxx>
---
Johannes Schindelin, Thu, Jun 12, 2008 09:25:09 +0200:
> On Thu, 12 Jun 2008, Alf Clement wrote:
> 
> > > +static unsigned char *unquote(const char *s, unsigned *len) +{
> > > +       unsigned char *result = malloc(strlen(s)), *r = result;
> > 
> > are you sure that the buffer is big enough?? Nomally you do a
> > malloc(strlen(s)+1).
> 
> AFAIU a "tr" really only ever replaces single characters by single 
> characters.  So, not even looking at the code -- just like you -- I would 
> expect it to get the exact number of bytes read, and to write the same 
> number of bytes.
> 
> I certainly would not expect it to do something string based, introducing 
> a silly expectation of NUL-terminations (which tr(1) does not expect 
> either).

In any case, Alf is right. The unquote is called unconditionally with
its arguments, even if there is nothing to unquote. In which case the
size of result is off by one. Corrected test-tr.c (also with a
sensible header) below.

 Makefile  |    2 +-
 test-tr.c |  212 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 213 insertions(+), 1 deletions(-)
 create mode 100644 test-tr.c

diff --git a/Makefile b/Makefile
index 1937507..5f208d6 100644
--- a/Makefile
+++ b/Makefile
@@ -1229,7 +1229,7 @@ endif
 
 ### Testing rules
 
-TEST_PROGRAMS = test-chmtime$X test-genrandom$X test-date$X test-delta$X test-sha1$X test-match-trees$X test-absolute-path$X test-parse-options$X
+TEST_PROGRAMS = test-chmtime$X test-genrandom$X test-date$X test-delta$X test-sha1$X test-match-trees$X test-absolute-path$X test-parse-options$X test-tr$X
 
 all:: $(TEST_PROGRAMS)
 
diff --git a/test-tr.c b/test-tr.c
new file mode 100644
index 0000000..2248535
--- /dev/null
+++ b/test-tr.c
@@ -0,0 +1,212 @@
+/*
+ * test-tr  A simplified tr(1) implementation for testing purposes
+ *
+ * It offers a limited set of POSIX tr, in particular: no character
+ * class support and no [n*m] operators. Only 8bit. C-escapes
+ * supported, and character ranges. Deletion and squeezing should
+ * work, but -s does not match the GNU tr from coreutils (which, in
+ * turn, does not match POSIX).
+ */
+#include "cache.h"
+
+static int squeeze, delete;
+
+static unsigned char *unquote(const char *s, unsigned *len)
+{
+	unsigned char *result = malloc(strlen(s) + 1), *r = result;
+
+	while (*s) {
+		switch (*s) {
+		case '\\':
+			++s;
+#define ISOCT(c) (((c) >= '0' && (c) <= '7'))
+			if (ISOCT(*s)) {
+				unsigned int c;
+				char oct[4] = {0,0,0,0};
+				oct[0] = *s++;
+				c = (oct[0] - '0');
+				if (ISOCT(*s)) {
+					oct[1] = *s++;
+					c = (c << 3) |(oct[1] - '0');
+					if (ISOCT(*s)) {
+						oct[2] = *s++;
+						c = (c << 3) |(oct[2] - '0');
+					}
+				}
+				if (c > 255) {
+					fprintf(stderr, "invalid octal character specification: \\%s\n", oct);
+					exit(1);
+				}
+				*r++ = c & 0xff;
+			} else {
+				switch (*s) {
+				case '\0':
+					*r++ = '\\';
+					break;
+				case '\\':
+					*r++ = *s++;
+					break;
+				case 'a':
+					*r++ = '\a';
+					++s;
+					break;
+				case 'b':
+					*r++ = '\b';
+					++s;
+					break;
+				case 'f':
+					*r++ = '\f';
+					++s;
+					break;
+				case 'n':
+					*r++ = '\n';
+					++s;
+					break;
+				case 'r':
+					*r++ = '\r';
+					++s;
+					break;
+				case 't':
+					*r++ = '\t';
+					++s;
+					break;
+				case 'v':
+					*r++ = '\v';
+					++s;
+					break;
+				default:
+					*r++ = '\\';
+					*r++ = *s++;
+					break;
+				}
+			}
+			break;
+		default:
+			*r++ = *s++;
+		}
+	}
+
+	*len = r - result;
+	*r = '\0';
+	return result;
+}
+
+#define MAX_PATTERN 256
+static void put_op(unsigned char *conv, unsigned char ch, unsigned *len)
+{
+	unsigned i = (*len)++;
+	if (*len > MAX_PATTERN) {
+		fprintf(stderr, "pattern too long\n");
+		exit(1);
+	}
+	conv[i] = ch;
+}
+
+static void parse(const unsigned char *rule, unsigned rule_len,
+		  unsigned char *set, unsigned *set_len)
+{
+	const unsigned char *p = rule;
+	while (p < rule + rule_len) {
+		if ('-' == *p && p > rule && p[1]) {
+			unsigned c;
+			if (p[-1] > p[1]) {
+				fprintf(stderr, "%c%c%c: range is reversed\n",
+					p[-1], *p, p[1]);
+				exit(1);
+			}
+			c = p[-1] + 1u;
+			for (; c <= p[1]; ++c)
+				put_op(set, c, set_len);
+			++p;
+			++p;
+			continue;
+		}
+		put_op(set, *p, set_len);
+		++p;
+	}
+}
+
+int main(int argc, char *argv[])
+{
+	unsigned set1_len = 0, set2_len = 0;
+	unsigned char set1[MAX_PATTERN];
+	unsigned char set2[MAX_PATTERN];
+
+	ssize_t n;
+	unsigned char last = 0, have_last = 0;
+	unsigned char buf[BUFSIZ];
+
+	char *rule1 = NULL, *rule2 = NULL;
+	unsigned char *urule1, *urule2;
+	unsigned urule1_len, urule2_len;
+	int opt;
+
+	for (opt = 1; opt < argc; ++opt) {
+		if (!strcmp("-s", argv[opt]))
+			squeeze = 1;
+		else if (!strcmp("-d", argv[opt]))
+			delete = 1;
+		else if (!rule1) {
+			rule1 = argv[opt];
+		} else if (!rule2)
+			rule2 = argv[opt];
+	}
+	if (!rule1) {
+	    fprintf(stderr, "no source set given\n"
+		    "test-tr [-s] [-d] set1 [set2]\n"
+		    "\"set\" supports only \\NNN, \\a-\\v and CHAR1-CHAR2 rules\n");
+	    exit(1);
+	}
+	if (delete && rule2) {
+		fprintf(stderr, "extra operand %s when deleting\n", rule2);
+		exit(1);
+	}
+	urule1 = unquote(rule1, &urule1_len);
+	urule2 = NULL;
+	urule2_len = 0;
+	if ((!rule2 || !*rule2) && !delete && !squeeze) {
+		fprintf(stderr, "set2 must be non-empty\n");
+		exit(1);
+	}
+
+	parse(urule1, urule1_len, set1, &set1_len);
+
+	if (rule2) {
+		unsigned i;
+		urule2 = unquote(rule2, &urule2_len);
+		parse(urule2, urule2_len, set2, &set2_len);
+		i = set2[set2_len - 1];
+		while (set2_len < set1_len)
+			put_op(set2, i, &set2_len);
+	}
+
+	while ((n = read(STDIN_FILENO, buf, sizeof(buf)))) {
+		if (n < 0) {
+			int err = errno;
+			if (EINTR == err || EAGAIN == err)
+				continue;
+			fprintf(stderr, "%s: %s\n", argv[0], strerror(err));
+			exit(1);
+		}
+		if (set1_len) {
+			unsigned i, o = 0;
+			for (i = 0; i < (unsigned)n; ++i) {
+				unsigned char *p, ch = buf[i];
+				p = memchr(set1, ch, set1_len);
+				if (p) {
+					if (delete)
+						continue;
+					if (set2_len)
+						ch = set2[p - set1];
+				}
+				if (!(squeeze && have_last && ch == last))
+					buf[o++] = ch;
+				have_last = 1;
+				last = ch;
+			}
+			n = o;
+		}
+		write(STDOUT_FILENO, buf, n);
+	}
+	return 0;
+}
-- 
1.5.6.rc2.43.g77bfa

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]

  Powered by Linux