[PATCH 01/14] numparse: new module for parsing integral numbers

Michael Haggerty <mhagger@xxxxxxxxxxxx> · Tue, 17 Mar 2015 17:00:03 +0100

Implement wrappers for strtol() and strtoul() that are safer and more
convenient to use.

Signed-off-by: Michael Haggerty <mhagger@xxxxxxxxxxxx>
---
 Makefile   |   1 +
 numparse.c | 180 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 numparse.h | 207 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 388 insertions(+)
 create mode 100644 numparse.c
 create mode 100644 numparse.h

diff --git a/Makefile b/Makefile
index 44f1dd1..6c0cfcc 100644
--- a/Makefile
+++ b/Makefile
@@ -732,6 +732,7 @@ LIB_OBJS += notes.o
 LIB_OBJS += notes-cache.o
 LIB_OBJS += notes-merge.o
 LIB_OBJS += notes-utils.o
+LIB_OBJS += numparse.o
 LIB_OBJS += object.o
 LIB_OBJS += pack-bitmap.o
 LIB_OBJS += pack-bitmap-write.o
diff --git a/numparse.c b/numparse.c
new file mode 100644
index 0000000..90b44ce
--- /dev/null
+++ b/numparse.c
@@ -0,0 +1,180 @@
+#include "git-compat-util.h"
+#include "numparse.h"
+
+#define NUM_NEGATIVE (1 << 16)
+
+
+static int parse_precheck(const char *s, unsigned int *flags)
+{
+	const char *number;
+
+	if (isspace(*s)) {
+		if (!(*flags & NUM_LEADING_WHITESPACE))
+			return -NUM_LEADING_WHITESPACE;
+		do {
+			s++;
+		} while (isspace(*s));
+	}
+
+	if (*s == '+') {
+		if (!(*flags & NUM_PLUS))
+			return -NUM_PLUS;
+		number = s + 1;
+		*flags &= ~NUM_NEGATIVE;
+	} else if (*s == '-') {
+		if (!(*flags & NUM_MINUS))
+			return -NUM_MINUS;
+		number = s + 1;
+		*flags |= NUM_NEGATIVE;
+	} else {
+		number = s;
+		*flags &= ~NUM_NEGATIVE;
+	}
+
+	if (!(*flags & NUM_BASE_SPECIFIER)) {
+		int base = *flags & NUM_BASE_MASK;
+		if (base == 0) {
+			/* This is a pointless combination of options. */
+			die("BUG: base=0 specified without NUM_BASE_SPECIFIER");
+		} else if (base == 16 && starts_with(number, "0x")) {
+			/*
+			 * We want to treat this as zero terminated by
+			 * an 'x', whereas strtol()/strtoul() would
+			 * silently eat the "0x". We accomplish this
+			 * by treating it as a base 10 number:
+			 */
+			*flags = (*flags & ~NUM_BASE_MASK) | 10;
+		}
+	}
+	return 0;
+}
+
+int parse_l(const char *s, unsigned int flags, long *result, char **endptr)
+{
+	long l;
+	const char *end;
+	int err = 0;
+
+	err = parse_precheck(s, &flags);
+	if (err)
+		return err;
+
+	/*
+	 * Now let strtol() do the heavy lifting:
+	 */
+	errno = 0;
+	l = strtol(s, (char **)&end, flags & NUM_BASE_MASK);
+	if (errno) {
+		if (errno == ERANGE) {
+			if (!(flags & NUM_SATURATE))
+				return -NUM_SATURATE;
+		} else {
+			return -NUM_OTHER_ERROR;
+		}
+	}
+	if (end == s)
+		return -NUM_NO_DIGITS;
+
+	if (*end && !(flags & NUM_TRAILING))
+		return -NUM_TRAILING;
+
+	/* Everything was OK */
+	*result = l;
+	if (endptr)
+		*endptr = (char *)end;
+	return 0;
+}
+
+int parse_ul(const char *s, unsigned int flags,
+	     unsigned long *result, char **endptr)
+{
+	unsigned long ul;
+	const char *end;
+	int err = 0;
+
+	err = parse_precheck(s, &flags);
+	if (err)
+		return err;
+
+	/*
+	 * Now let strtoul() do the heavy lifting:
+	 */
+	errno = 0;
+	ul = strtoul(s, (char **)&end, flags & NUM_BASE_MASK);
+	if (errno) {
+		if (errno == ERANGE) {
+			if (!(flags & NUM_SATURATE))
+				return -NUM_SATURATE;
+		} else {
+			return -NUM_OTHER_ERROR;
+		}
+	}
+	if (end == s)
+		return -NUM_NO_DIGITS;
+
+	/*
+	 * strtoul(), perversely, accepts negative numbers, converting
+	 * them to the positive number with the same bit pattern. We
+	 * don't ever want that.
+	 */
+	if ((flags & NUM_NEGATIVE) && ul) {
+		if (!(flags & NUM_SATURATE))
+			return -NUM_SATURATE;
+		ul = 0;
+	}
+
+	if (*end && !(flags & NUM_TRAILING))
+		return -NUM_TRAILING;
+
+	/* Everything was OK */
+	*result = ul;
+	if (endptr)
+		*endptr = (char *)end;
+	return 0;
+}
+
+int parse_i(const char *s, unsigned int flags, int *result, char **endptr)
+{
+	long l;
+	int err;
+	char *end;
+
+	err = parse_l(s, flags, &l, &end);
+	if (err)
+		return err;
+
+	if ((int)l == l)
+		*result = l;
+	else if (!(flags & NUM_SATURATE))
+		return -NUM_SATURATE;
+	else
+		*result = (l <= 0) ? INT_MIN : INT_MAX;
+
+	if (endptr)
+		*endptr = end;
+
+	return 0;
+}
+
+int parse_ui(const char *s, unsigned int flags, unsigned int *result, char **endptr)
+{
+	unsigned long ul;
+	int err;
+	char *end;
+
+	err = parse_ul(s, flags, &ul, &end);
+	if (err)
+		return err;
+
+	if ((unsigned int)ul == ul)
+		*result = ul;
+	else if (!(flags & NUM_SATURATE))
+		return -NUM_SATURATE;
+	else
+		*result = UINT_MAX;
+
+	if (endptr)
+		*endptr = end;
+
+	return 0;
+}
diff --git a/numparse.h b/numparse.h
new file mode 100644
index 0000000..4de5e10
--- /dev/null
+++ b/numparse.h
@@ -0,0 +1,207 @@
+#ifndef NUMPARSE_H
+#define NUMPARSE_H
+
+/*
+ * Functions for parsing integral numbers.
+ *
+ * strtol() and strtoul() are very flexible, in fact too flexible for
+ * many purposes. These functions wrap them to make them easier to use
+ * in a stricter way.
+ *
+ * There are two classes of function, parse_*() and convert_*(). The
+ * former try to read a number from the front of a string and report a
+ * pointer to the character following the number. The latter don't
+ * report the end of the number, and are meant to be used when the
+ * input string should contain only a single number, with no trailing
+ * characters.
+ *
+ * Each class of functions has four variants:
+ *
+ * - parse_l(), convert_l() -- parse long ints
+ * - parse_ul(), convert_ul() -- parse unsigned long ints
+ * - parse_i(), convert_i() -- parse ints
+ * - parse_ui(), convert_ui() -- parse unsigned ints
+ *
+ * The style of parsing is controlled by a flags argument which
+ * encodes both the base of the number and many other options. The
+ * base is encoded by its numerical value (2 <= base <= 36), or zero
+ * if it should be determined automatically based on whether the
+ * number has a "0x" or "0" prefix.
+ *
+ * The functions all return zero on success. On error, they return a
+ * negative integer indicating the first error that was detected. For
+ * example, if no sign characters were allowed but the string
+ * contained a '-', the function will return -NUM_MINUS. If there is
+ * any kind of error, *result and *endptr are unchanged.
+ *
+ * Examples:
+ *
+ * - Convert hexadecimal string s into an unsigned int. Die if there
+ *   are any characters in s besides hexadecimal digits, or if the
+ *   result exceeds the range of an unsigned int:
+ *
+ *     if (convert_ui(s, 16, &result))
+ *             die("...");
+ *
+ * - Read a base-ten long number from the front of a string, allowing
+ *   sign characters and setting endptr to point at any trailing
+ *   characters:
+ *
+ *     if (parse_l(s, 10 | NUM_SIGN | NUM_TRAILING, &result, &endptr))
+ *             die("...");
+ *
+ * - Convert decimal string s into a signed int, but not allowing the
+ *   string to contain a '+' or '-' prefix (and thereby indirectly
+ *   ensuring that the result will be non-negative):
+ *
+ *     if (convert_i(s, 10, &result))
+ *             die("...");
+ *
+ * - Convert s into a signed int, interpreting prefix "0x" to mean
+ *   hexadecimal and "0" to mean octal. If the value doesn't fit in an
+ *   unsigned int, set result to INT_MIN or INT_MAX.
+ *
+ *     if (convert_i(s, NUM_SLOPPY, &result))
+ *             die("...");
+ */
+
+
+/*
+ * Constants for parsing numbers.
+ *
+ * These can be passed in flags to allow the specified features. Also,
+ * if there is an error parsing a number, the parsing functions return
+ * the negated value of one of these constants (or NUM_NO_DIGITS or
+ * NUM_OTHER_ERROR) to indicate the first error detected.
+ */
+
+/*
+ * The lowest 6 bits of flags hold the numerical base that should be
+ * used to parse the number, 2 <= base <= 36. If base is set to 0,
+ * then NUM_BASE_SPECIFIER must be set too; in this case, the base is
+ * detected automatically from the string's prefix.
+ */
+#define NUM_BASE_MASK 0x3f
+
+/* Skip any whitespace before the number. */
+#define NUM_LEADING_WHITESPACE (1 << 8)
+
+/* Allow a leading '+'. */
+#define NUM_PLUS               (1 << 9)
+
+/* Allow a leading '-'. */
+#define NUM_MINUS              (1 << 10)
+
+/*
+ * Allow a leading base specifier:
+ * - If base is 0: a leading "0x" indicates base 16; a leading "0"
+ *   indicates base 8; otherwise, assume base 10.
+ * - If base is 16: a leading "0x" is allowed and skipped over.
+ */
+#define NUM_BASE_SPECIFIER     (1 << 11)
+
+/*
+ * If the number is not in the allowed range, return the smallest or
+ * largest representable value instead.
+ */
+#define NUM_SATURATE           (1 << 12)
+
+/*
+ * Just parse until the end of the number, ignoring any subsequent
+ * characters. If this option is not specified, then it is an error if
+ * the whole string cannot be parsed.
+ */
+#define NUM_TRAILING           (1 << 13)
+
+
+/* Additional errors that can come from parsing numbers: */
+
+/* There were no valid digits */
+#define NUM_NO_DIGITS          (1 << 14)
+/* There was some other error reported by strtol()/strtoul(): */
+#define NUM_OTHER_ERROR        (1 << 15)
+
+/*
+ * Please note that there is also a NUM_NEGATIVE, which is used
+ * internally.
+ */
+
+/*
+ * Now define some useful combinations of parsing options:
+ */
+
+/* A bunch of digits with an optional sign. */
+#define NUM_SIGN (NUM_PLUS | NUM_MINUS)
+
+/*
+ * Be as liberal as possible with the form of the number itself
+ * (though if you also want to allow leading whitespace and/or
+ * trailing characters, you should combine this with
+ * NUM_LEADING_WHITESPACE and/or NUM_TRAILING).
+ */
+#define NUM_SLOPPY (NUM_SIGN | NUM_SATURATE | NUM_BASE_SPECIFIER)
+
+
+/*
+ * Number parsing functions:
+ *
+ * The following functions parse a number (long, unsigned long, int,
+ * or unsigned int respectively) from the front of s, storing the
+ * value to *result and storing a pointer to the first character after
+ * the number to *endptr. flags specifies how the number should be
+ * parsed, including which base should be used. flags is a combination
+ * of the numerical base (2-36) and the NUM_* constants above (see).
+ * Return 0 on success or a negative value if there was an error. On
+ * failure, *result and *entptr are left unchanged.
+ *
+ * Please note that if NUM_TRAILING is not set, then it is
+ * nevertheless an error if there are any characters between the end
+ * of the number and the end of the string.
+ */
+
+int parse_l(const char *s, unsigned int flags,
+	    long *result, char **endptr);
+
+int parse_ul(const char *s, unsigned int flags,
+	     unsigned long *result, char **endptr);
+
+int parse_i(const char *s, unsigned int flags,
+	    int *result, char **endptr);
+
+int parse_ui(const char *s, unsigned int flags,
+	     unsigned int *result, char **endptr);
+
+
+/*
+ * Number conversion functions:
+ *
+ * The following functions parse a string into a number. They are
+ * identical to the parse_*() functions above, except that the endptr
+ * is not returned. These are most useful when parsing a whole string
+ * into a number; i.e., when (flags & NUM_TRAILING) is unset.
+ */
+static inline int convert_l(const char *s, unsigned int flags,
+			    long *result)
+{
+	return parse_l(s, flags, result, NULL);
+}
+
+static inline int convert_ul(const char *s, unsigned int flags,
+			     unsigned long *result)
+{
+	return parse_ul(s, flags, result, NULL);
+}
+
+static inline int convert_i(const char *s, unsigned int flags,
+			    int *result)
+{
+	return parse_i(s, flags, result, NULL);
+}
+
+static inline int convert_ui(const char *s, unsigned int flags,
+			     unsigned int *result)
+{
+	return parse_ui(s, flags, result, NULL);
+}
+
+#endif /* NUMPARSE_H */
-- 
2.1.4

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html