Faster str to int conversion (was Table with large number of int columns, very slow COPY FROM)

Andres Freund <andres@xxxxxxxxxxx> · Fri, 8 Dec 2017 13:44:37 -0800

Hi,


On 2017-12-08 10:17:34 -0800, Andres Freund wrote:
> the strtoll is libc functionality triggered by pg_atoi(), something I've
> seen show up in numerous profiles. I think it's probably time to have
> our own optimized version of it rather than relying on libcs.

Attached is a hand-rolled version. After quickly hacking up one from
scratch, I noticed we already kind of have one for int64 (scanint8), so
I changed the structure of this one to be relatively similar.

It's currently using the overflow logic from [1], but that's not
fundamentally required, we could rely on fwrapv for this one too.

This one improves performance of the submitted workload from 1223.950ms
to 1020.640ms (best of three). The profile's shape changes quite
noticeably:

master:
+   15.38%  postgres  libc-2.25.so      [.] __GI_____strtoll_l_internal
+   11.79%  postgres  postgres          [.] heap_fill_tuple
+    8.00%  postgres  postgres          [.] CopyFrom
+    7.40%  postgres  postgres          [.] CopyReadLine
+    6.79%  postgres  postgres          [.] ExecConstraints
+    6.68%  postgres  postgres          [.] NextCopyFromRawFields
+    6.36%  postgres  postgres          [.] heap_compute_data_size
+    6.02%  postgres  postgres          [.] pg_atoi
patch:
+   13.70%  postgres  postgres          [.] heap_fill_tuple
+   10.46%  postgres  postgres          [.] CopyFrom
+    9.31%  postgres  postgres          [.] pg_strto32
+    8.39%  postgres  postgres          [.] CopyReadLine
+    7.88%  postgres  postgres          [.] ExecConstraints
+    7.63%  postgres  postgres          [.] InputFunctionCall
+    7.41%  postgres  postgres          [.] heap_compute_data_size
+    7.21%  postgres  postgres          [.] pg_verify_mbstr
+    5.49%  postgres  postgres          [.] NextCopyFromRawFields


This probably isn't going to resolve Alex's performance concerns
meaningfully, but seems quite worthwhile to do anyway.

We probably should have int8/16/64 version coded just as use the 32bit
version, but I decided to leave that out for now. Primarily interested
in comments.  Wonder a bit whether it's worth providing an 'errorOk'
mode like scanint8 does, but surveying its callers suggests we should
rather change them to not need it...

Greetings,

Andres Freund

[1] http://archives.postgresql.org/message-id/20171030112751.mukkriz2rur2qkxc%40alap3.anarazel.de
>From 98fbe53be0a3046f8ace687f846f91a0043deee8 Mon Sep 17 00:00:00 2001
From: Andres Freund <andres@xxxxxxxxxxx>
Date: Sun, 29 Oct 2017 22:13:54 -0700
Subject: [PATCH 1/3] Provide overflow safe integer math inline functions.

Author: Andres Freund, with some code stolen from Greg Stark
Reviewed-By:
Discussion: https://postgr.es/m/
Backpatch:
---
 config/c-compiler.m4          |  22 ++++
 configure                     |  33 ++++++
 configure.in                  |   4 +
 src/include/common/int.h      | 229 ++++++++++++++++++++++++++++++++++++++++++
 src/include/pg_config.h.in    |   3 +
 src/include/pg_config.h.win32 |   3 +
 6 files changed, 294 insertions(+)
 create mode 100644 src/include/common/int.h

diff --git a/config/c-compiler.m4 b/config/c-compiler.m4
index 6dcc7906491..0d91e52a28f 100644
--- a/config/c-compiler.m4
+++ b/config/c-compiler.m4
@@ -296,6 +296,28 @@ fi])# PGAC_C_BUILTIN_CONSTANT_P
 
 
 
+# PGAC_C_BUILTIN_OP_OVERFLOW
+# -------------------------
+# Check if the C compiler understands __builtin_$op_overflow(),
+# and define HAVE__BUILTIN_OP_OVERFLOW if so.
+#
+# Check for the most complicated case, 64 bit multiplication, as a
+# proxy for all of the operations.
+AC_DEFUN([PGAC_C_BUILTIN_OP_OVERFLOW],
+[AC_CACHE_CHECK(for __builtin_mul_overflow, pgac_cv__builtin_op_overflow,
+[AC_COMPILE_IFELSE([AC_LANG_PROGRAM([],
+[PG_INT64_TYPE result;
+__builtin_mul_overflow((PG_INT64_TYPE) 1, (PG_INT64_TYPE) 2, &result);]
+)],
+[pgac_cv__builtin_op_overflow=yes],
+[pgac_cv__builtin_op_overflow=no])])
+if test x"$pgac_cv__builtin_op_overflow" = xyes ; then
+AC_DEFINE(HAVE__BUILTIN_OP_OVERFLOW, 1,
+          [Define to 1 if your compiler understands __builtin_$op_overflow.])
+fi])# PGAC_C_BUILTIN_OP_OVERFLOW
+
+
+
 # PGAC_C_BUILTIN_UNREACHABLE
 # --------------------------
 # Check if the C compiler understands __builtin_unreachable(),
diff --git a/configure b/configure
index 4ecd2e19224..f66899488cc 100755
--- a/configure
+++ b/configure
@@ -14467,6 +14467,39 @@ esac
 
 fi
 
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for __builtin_mul_overflow" >&5
+$as_echo_n "checking for __builtin_mul_overflow... " >&6; }
+if ${pgac_cv__builtin_op_overflow+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+int
+main ()
+{
+PG_INT64_TYPE result;
+__builtin_mul_overflow((PG_INT64_TYPE) 1, (PG_INT64_TYPE) 2, &result);
+
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_compile "$LINENO"; then :
+  pgac_cv__builtin_op_overflow=yes
+else
+  pgac_cv__builtin_op_overflow=no
+fi
+rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $pgac_cv__builtin_op_overflow" >&5
+$as_echo "$pgac_cv__builtin_op_overflow" >&6; }
+if test x"$pgac_cv__builtin_op_overflow" = xyes ; then
+
+$as_echo "#define HAVE__BUILTIN_OP_OVERFLOW 1" >>confdefs.h
+
+fi
+
 # Check size of void *, size_t (enables tweaks for > 32bit address space)
 # The cast to long int works around a bug in the HP C Compiler
 # version HP92453-01 B.11.11.23709.GP, which incorrectly rejects
diff --git a/configure.in b/configure.in
index cea7fd07553..edf1dd2e7b8 100644
--- a/configure.in
+++ b/configure.in
@@ -1764,6 +1764,10 @@ if test $pgac_need_repl_snprintf = yes; then
   AC_LIBOBJ(snprintf)
 fi
 
+# has to be down here, rather than with the other builtins, because
+# the test uses PG_INT64_TYPE.
+PGAC_C_BUILTIN_OP_OVERFLOW
+
 # Check size of void *, size_t (enables tweaks for > 32bit address space)
 AC_CHECK_SIZEOF([void *])
 AC_CHECK_SIZEOF([size_t])
diff --git a/src/include/common/int.h b/src/include/common/int.h
new file mode 100644
index 00000000000..648cbd49f14
--- /dev/null
+++ b/src/include/common/int.h
@@ -0,0 +1,229 @@
+/*-------------------------------------------------------------------------
+ *
+ * int.h
+ *	  Routines to perform integer math, while checking for overflows.
+ *
+ * The routines in this file are intended to be well defined C, without
+ * relying on compiler flags like -fwrapv.
+ *
+ * To reduce the overhead of these routines try to use compiler intrinsics
+ * where available. That's not that important for the 16, 32 bit cases, but
+ * the 64 bit cases can be considerably faster with intrinsics. In case no
+ * intrinsics are available 128 bit math is used where available.
+ *
+ * Copyright (c) 2017, PostgreSQL Global Development Group
+ *
+ * src/include/common/int.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef COMMON_INT_H
+#define COMMON_INT_H
+
+/*
+ * If a + b overflows, return true, otherwise store the result of a + b into
+ * *result. The content of *result is implementation defined in case of
+ * overflow.
+ */
+static inline bool
+pg_add16_overflow(int16 a, int16 b, int16 *result)
+{
+#if defined(HAVE__BUILTIN_OP_OVERFLOW)
+	return __builtin_add_overflow(a, b, result);
+#else
+	int32 res = (int32) a + (int32) b;
+	if (res > PG_INT16_MAX || res < PG_INT16_MIN)
+		return true;
+	*result = (int16) res;
+	return false;
+#endif
+}
+
+/*
+ * If a - b overflows, return true, otherwise store the result of a + b into
+ * *result. The content of *result is implementation defined in case of
+ * overflow.
+ */
+static inline bool
+pg_sub16_overflow(int16 a, int16 b, int16 *result)
+{
+#if defined(HAVE__BUILTIN_OP_OVERFLOW)
+	return __builtin_sub_overflow(a, b, result);
+#else
+	int32 res = (int32) a - (int32) b;
+	if (res > PG_INT16_MAX || res < PG_INT16_MIN)
+		return true;
+	*result = (int16) res;
+	return false;
+#endif
+}
+
+/*
+ * If a * b overflows, return true, otherwise store the result of a + b into
+ * *result. The content of *result is implementation defined in case of
+ * overflow.
+ */
+static inline bool
+pg_mul16_overflow(int16 a, int16 b, int16 *result)
+{
+#if defined(HAVE__BUILTIN_OP_OVERFLOW)
+	return __builtin_mul_overflow(a, b, result);
+#else
+	int32 res = (int32) a * (int32) b;
+	if (res > PG_INT16_MAX || res < PG_INT16_MIN)
+		return true;
+	*result = (int16) res;
+	return false;
+#endif
+}
+
+/*
+ * If a + b overflows, return true, otherwise store the result of a + b into
+ * *result. The content of *result is implementation defined in case of
+ * overflow.
+ */
+static inline bool
+pg_add32_overflow(int32 a, int32 b, int32 *result)
+{
+#if defined(HAVE__BUILTIN_OP_OVERFLOW)
+	return __builtin_add_overflow(a, b, result);
+#else
+	int64 res = (int64) a + (int64) b;
+	if (res > PG_INT32_MAX || res < PG_INT32_MIN)
+		return true;
+	*result = (int32) res;
+	return false;
+#endif
+}
+
+/*
+ * If a - b overflows, return true, otherwise store the result of a + b into
+ * *result. The content of *result is implementation defined in case of
+ * overflow.
+ */
+static inline bool
+pg_sub32_overflow(int32 a, int32 b, int32 *result)
+{
+#if defined(HAVE__BUILTIN_OP_OVERFLOW)
+	return __builtin_sub_overflow(a, b, result);
+#else
+	int64 res = (int64) a - (int64) b;
+	if (res > PG_INT32_MAX || res < PG_INT32_MIN)
+		return true;
+	*result = (int32) res;
+	return false;
+#endif
+}
+
+/*
+ * If a * b overflows, return true, otherwise store the result of a + b into
+ * *result. The content of *result is implementation defined in case of
+ * overflow.
+ */
+static inline bool
+pg_mul32_overflow(int32 a, int32 b, int32 *result)
+{
+#if defined(HAVE__BUILTIN_OP_OVERFLOW)
+	return __builtin_mul_overflow(a, b, result);
+#else
+	int64 res = (int64) a * (int64) b;
+	if (res > PG_INT32_MAX || res < PG_INT32_MIN)
+		return true;
+	*result = (int32) res;
+	return false;
+#endif
+}
+
+/*
+ * If a + b overflows, return true, otherwise store the result of a + b into
+ * *result. The content of *result is implementation defined in case of
+ * overflow.
+ */
+static inline bool
+pg_add64_overflow(int64 a, int64 b, int64 *result)
+{
+#if defined(HAVE__BUILTIN_OP_OVERFLOW)
+	return __builtin_add_overflow(a, b, result);
+#elif defined(HAVE_INT128)
+	int128 res = (int128) a + (int128) b;
+	if (res > PG_INT64_MAX || res < PG_INT64_MIN)
+		return true;
+	*result = (int64) res;
+	return false;
+#else
+	if ((a > 0 && b > 0 && a > PG_INT64_MAX - b) ||
+		(a < 0 && b < 0 && a < PG_INT64_MIN - b))
+		return true;
+	*result = a + b;
+	return false;
+#endif
+}
+
+/*
+ * If a - b overflows, return true, otherwise store the result of a + b into
+ * *result. The content of *result is implementation defined in case of
+ * overflow.
+ */
+static inline bool
+pg_sub64_overflow(int64 a, int64 b, int64 *result)
+{
+#if defined(HAVE__BUILTIN_OP_OVERFLOW)
+	return __builtin_sub_overflow(a, b, result);
+#elif defined(HAVE_INT128)
+	int128 res = (int128) a - (int128) b;
+	if (res > PG_INT64_MAX || res < PG_INT64_MIN)
+		return true;
+	*result = (int64) res;
+	return false;
+#else
+	if ((a < 0 && b > 0 && a < PG_INT64_MIN + b) ||
+		(a > 0 && b < 0 && a > PG_INT64_MAX + b))
+		return true;
+	*result = a - b;
+	return false;
+#endif
+}
+
+/*
+ * If a * b overflows, return true, otherwise store the result of a + b into
+ * *result. The content of *result is implementation defined in case of
+ * overflow.
+ */
+static inline bool
+pg_mul64_overflow(int64 a, int64 b, int64 *result)
+{
+#if defined(HAVE__BUILTIN_OP_OVERFLOW)
+	return __builtin_mul_overflow(a, b, result);
+#elif defined(HAVE_INT128)
+	int128 res = (int128) a * (int128) b;
+	if (res > PG_INT64_MAX || res < PG_INT64_MIN)
+		return true;
+	*result = (int64) res;
+	return false;
+#else
+	/* Overflow can only happen if at least one value is outside the range
+	 * sqrt(min)..sqrt(max) so check that first as the division can be quite a bit
+	 * more expensive than the multiplication.
+	 *
+	 * Multiplying by 0 or 1 can't overflow of course and checking for 0
+	 * separately avoids any risk of dividing by 0.  Be careful about dividing
+	 * INT_MIN by -1 also, note reversing the a and b to ensure we're always
+	 * dividing it by a positive value.
+	 *
+	 */
+	if ((a > PG_INT32_MAX || a < PG_INT32_MIN  ||
+		 b > PG_INT32_MAX || b < PG_INT32_MIN) &&
+		a != 0 && a != 1 && b != 0 && b != 1 &&
+		((a > 0 && b > 0 && a > PG_INT64_MAX / b) ||
+		 (a > 0 && b < 0 && b < PG_INT64_MIN / a) ||
+		 (a < 0 && b > 0 && a < PG_INT64_MIN / b) ||
+		 (a < 0 && b < 0 && a < PG_INT64_MAX / b)))
+	{
+		return true;
+	}
+	*result = a * b;
+	return false;
+#endif
+}
+
+#endif /* COMMON_INT_H */
diff --git a/src/include/pg_config.h.in b/src/include/pg_config.h.in
index cfdcc5ac62f..dab6d41f5e0 100644
--- a/src/include/pg_config.h.in
+++ b/src/include/pg_config.h.in
@@ -687,6 +687,9 @@
 /* Define to 1 if your compiler understands __builtin_constant_p. */
 #undef HAVE__BUILTIN_CONSTANT_P
 
+/* Define to 1 if your compiler understands __builtin_$op_overflow. */
+#undef HAVE__BUILTIN_OP_OVERFLOW
+
 /* Define to 1 if your compiler understands __builtin_types_compatible_p. */
 #undef HAVE__BUILTIN_TYPES_COMPATIBLE_P
 
diff --git a/src/include/pg_config.h.win32 b/src/include/pg_config.h.win32
index ab9b941e89d..7dbf67ddf69 100644
--- a/src/include/pg_config.h.win32
+++ b/src/include/pg_config.h.win32
@@ -512,6 +512,9 @@
 /* Define to 1 if your compiler understands __builtin_constant_p. */
 /* #undef HAVE__BUILTIN_CONSTANT_P */
 
+/* Define to 1 if your compiler understands __builtin_$op_overflow. */
+/* #undef HAVE__BUILTIN_OP_OVERFLOW */
+
 /* Define to 1 if your compiler understands __builtin_types_compatible_p. */
 /* #undef HAVE__BUILTIN_TYPES_COMPATIBLE_P */
 
-- 
2.14.1.536.g6867272d5b.dirty

>From 64e24e2b8304619a305c8000b12d825e3b80aae5 Mon Sep 17 00:00:00 2001
From: Andres Freund <andres@xxxxxxxxxxx>
Date: Fri, 8 Dec 2017 13:31:15 -0800
Subject: [PATCH] Hand code string to int32 conversion for performance.

---
 src/backend/utils/adt/int.c      |  2 +-
 src/backend/utils/adt/numutils.c | 90 ++++++++++++++++++++++++++++++++++++++++
 src/include/utils/builtins.h     |  1 +
 3 files changed, 92 insertions(+), 1 deletion(-)

diff --git a/src/backend/utils/adt/int.c b/src/backend/utils/adt/int.c
index 4cd8960b3fc..8af4f8f3f7a 100644
--- a/src/backend/utils/adt/int.c
+++ b/src/backend/utils/adt/int.c
@@ -267,7 +267,7 @@ int4in(PG_FUNCTION_ARGS)
 {
 	char	   *num = PG_GETARG_CSTRING(0);
 
-	PG_RETURN_INT32(pg_atoi(num, sizeof(int32), '\0'));
+	PG_RETURN_INT32(pg_strto32(num));
 }
 
 /*
diff --git a/src/backend/utils/adt/numutils.c b/src/backend/utils/adt/numutils.c
index 244904ea940..f2281f86dae 100644
--- a/src/backend/utils/adt/numutils.c
+++ b/src/backend/utils/adt/numutils.c
@@ -18,6 +18,7 @@
 #include <limits.h>
 #include <ctype.h>
 
+#include "common/int.h"
 #include "utils/builtins.h"
 
 /*
@@ -108,6 +109,95 @@ pg_atoi(const char *s, int size, int c)
 	return (int32) l;
 }
 
+
+/*
+ * Convert input string to a 32 bit integer.
+ *
+ * Allows any number of leading or trailing whitespace characters. This will
+ * throw ereport() upon bad input format or overflow.
+ *
+ * NB: Accumulate input as a negative number, to deal with two's complement
+ * representation of the most negative number, which can't be represented as a
+ * positive number.
+ */
+int32
+pg_strto32(const char *s)
+{
+	const char *in = s;
+	int32		tmp = 0;
+	bool		neg = 0;
+
+
+	/* skip leading spaces */
+	while (likely(*in) && isspace((unsigned char) *in))
+		in++;
+
+	/* handle sign */
+	if (*in == '-')
+	{
+		in++;
+		neg = true;
+	}
+	else if (*in == '+')
+		in++;
+
+	/* require at least one digit */
+	if (unlikely(!isdigit((unsigned char) *in)))
+		goto err;
+
+	/* process digits */
+	while (true)
+	{
+		if (!*in)
+			goto out;
+		if (!isdigit((unsigned char) *in))
+			goto checkspace;
+
+		/* accumulate input */
+		if (unlikely(pg_mul32_overflow(tmp, 10, &tmp)) ||
+			unlikely(pg_sub32_overflow(tmp, *in - '0', &tmp)))
+			goto overflow;
+		in++;
+	}
+
+checkspace:
+	/* allow trailing whitespace, but not other trailing chars */
+	while (*in != '\0' && isspace((unsigned char) *in))
+		in++;
+
+	if (unlikely(*in != '\0'))
+		goto err;
+
+out:
+	/*
+	 * Accumulated input as a negative number, so adjust if that's not what's
+	 * needed.
+	 */
+	if (!neg)
+	{
+		/* could fail if input is most negative number */
+		if (unlikely(tmp == PG_INT32_MIN))
+			goto overflow;
+
+		return -tmp;
+	}
+
+	return tmp;
+
+overflow:
+	ereport(ERROR,
+			(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+			 errmsg("value \"%s\" is out of range for type %s",
+					s, "integer")));
+
+err:
+	ereport(ERROR,
+			(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+			 errmsg("invalid input syntax for integer: \"%s\"",
+					s)));
+}
+
+
 /*
  * pg_itoa: converts a signed 16-bit integer to its string representation
  *
diff --git a/src/include/utils/builtins.h b/src/include/utils/builtins.h
index 762532f6369..fa45c84b752 100644
--- a/src/include/utils/builtins.h
+++ b/src/include/utils/builtins.h
@@ -43,6 +43,7 @@ extern int	namestrcmp(Name name, const char *str);
 
 /* numutils.c */
 extern int32 pg_atoi(const char *s, int size, int c);
+extern int32 pg_strto32(const char *s);
 extern void pg_itoa(int16 i, char *a);
 extern void pg_ltoa(int32 l, char *a);
 extern void pg_lltoa(int64 ll, char *a);
-- 
2.14.1.536.g6867272d5b.dirty