Re: Unexpected interval comparison

Kyotaro HORIGUCHI <horiguchi.kyotaro@xxxxxxxxxxxxx> · Mon, 03 Apr 2017 20:51:18 +0900 (Tokyo Standard Time)

Hmm. It took a bit longer time than expected.

At Fri, 31 Mar 2017 13:29:24 -0400, Tom Lane <tgl@xxxxxxxxxxxxx> wrote in <10353.1490981364@xxxxxxxxxxxxx>
> Kyotaro HORIGUCHI <horiguchi.kyotaro@xxxxxxxxxxxxx> writes:
> > int128 is seen in numeric.c. It is doable in the same manner. In
> > that case it will be a bit slower on the platforms without
> > int128.
> 
> > By the way is it right that we don't assume this as a bug-fix
> > which should be done in the Pg10 dev cycle, but an improvement
> > for 11?
> 
> Well, it seems like a bug to me.  We might conclude that the fix
> is too risky to back-patch, but it's hard to make that decision
> before having a patch in hand to evaluate.

Ok, the attached patch changes the result type of
interval_cmp_value from TimeOffset(=int64) to new 128 bit
LinearInterval. The value is hidden under the functions
interval_eq/ge.../cmp and all other stuff seems to use the
functions.

For platforms without 128 bit support, int64 * 2 version of
interval_cmp_value is used.

I added separate test for the near-overflow values since just
adding such values into INTERVAL_TABLE resuted in a mess. (I ran
64-bit version by commenting-out the definition of PG_INT128_TYPE
in pg_config.h).

The attached patch is that.

regards,

-- 
Kyotaro Horiguchi
NTT Open Source Software Center
>From e8866f35b95dc09cfc570923ee9be3e3a8e93f8a Mon Sep 17 00:00:00 2001
From: Kyotaro Horiguchi <horiguchi.kyotaro@xxxxxxxxxxxxx>
Date: Mon, 3 Apr 2017 19:41:09 +0900
Subject: [PATCH] Fix overflow during interval comparison.

The values in interval are compared by TimeOffset results of
interval_cmp_value but it is so narrow that overflows quite easily.
This patch widen the output of the function to 128 bit. For platforms
without 128 bit arithmetic, a pair of 64 bit intergers is used
instead.
---
 src/backend/utils/adt/timestamp.c      | 121 ++++++++++++++++++++++++++++++---
 src/include/datatype/timestamp.h       |  13 ++++
 src/test/regress/expected/interval.out |  36 ++++++++++
 src/test/regress/sql/interval.sql      |  17 +++++
 4 files changed, 178 insertions(+), 9 deletions(-)

diff --git a/src/backend/utils/adt/timestamp.c b/src/backend/utils/adt/timestamp.c
index 4be1999..c9e811d 100644
--- a/src/backend/utils/adt/timestamp.c
+++ b/src/backend/utils/adt/timestamp.c
@@ -2289,25 +2289,116 @@ timestamptz_cmp_timestamp(PG_FUNCTION_ARGS)
 /*
  *		interval_relop	- is interval1 relop interval2
  */
-static inline TimeOffset
+#ifdef HAVE_INT128
+static inline LinearInterval
 interval_cmp_value(const Interval *interval)
 {
-	TimeOffset	span;
+	LinearInterval	span;
 
-	span = interval->time;
-	span += interval->month * INT64CONST(30) * USECS_PER_DAY;
-	span += interval->day * INT64CONST(24) * USECS_PER_HOUR;
+	span = (int128)interval->time;
+	span += (int128)interval->month * INT64CONST(30) * USECS_PER_DAY;
+	span += (int128)interval->day * INT64CONST(24) * USECS_PER_HOUR;
 
 	return span;
 }
+#else
+/*
+ * arithmetic$ 32 bit extraction from int64
+ *
+ * INT64_AU32 extracts significant 32 bit of int64 as a int64, and INT64_AL32
+ * extracts non-siginificant 32 bit as a int64. Both macros extends sign bits
+ * according to the given value.
+ */
+
+#define INT64_AU32(i64) (((i64) < 0 ? (((i64) - 1) >> 32) + 1: ((i64) >> 32)))
+#define INT64_AL32(i64) (((i64) & 0xffffffff) | ((i64) < 0 ? 0xffffffff00000000 : 0))
+
+/* Adds signed int64 value into LinearInterval variable  */
+#define LINEARINTERVAL_ADD_INT64(li, v) \
+{ \
+	int64 t = (int64)(v); \
+	int64 p = (int64)(li).lo; \
+	(li).lo += t; \
+	if (t > 0 && p > 0 && (int64)(li).lo < 0) \
+		(li).hi += 1; \
+	else if ((t < 0 && p == 0) || \
+			 (t < 0 && p < 0 && (int64)(li).lo > 0))	\
+		(li).hi -= 1; \
+}
+
+static inline LinearInterval
+interval_cmp_value(const Interval *interval)
+{
+	LinearInterval	span = {0, 0};
+	int64	dayfraction;
+	int64	days, tmp;
+
+	/* days cannot overflow here */
+	dayfraction = interval->time % USECS_PER_DAY;
+	days = interval->time / USECS_PER_DAY;
+	days += interval->month * INT64CONST(30);
+	days += interval->day;
+
+	/* we assume arithmetic shift here */
+	Assert(-1 >> 1 == -1);
+
+	/*
+	 * perform 128 bit multiplication using 64 bit variables.
+	 *
+	 *   x * y = ((x.hi << 32) + x.lo) * (((y.hi << 32) + y.lo)
+	 *         = (x.hi * y.hi) << 64 +
+	 *           ((x.hi * y.lo) + (x.lo * y.hi)) << 32 +
+	 *           x.lo * y.lo
+	 */
+
+	/* We don't bother calculation results in zero */
+	if (days != 0)
+	{
+		/*
+		 * Here, the possible maximum number of days is 0xf_865ce7d8 and the
+		 * constant USECS_PER_DAY is 0x14_1dd76000. So the first and second
+		 * term can be calculated safely in 64 bit arithmetic.
+		 */
+		int64 days_u32 = INT64_AU32(days);
+		int64 days_l32 = INT64_AL32(days);
+
+		/* the first term */
+		span.hi = days_u32 * (USECS_PER_DAY >> 32);
+
+		/* the second term */
+		tmp = days_l32 * (USECS_PER_DAY >> 32)
+			+ days_u32 * (USECS_PER_DAY & 0xffffffff);
+		span.hi += INT64_AU32(tmp);
+		LINEARINTERVAL_ADD_INT64(span, INT64_AL32(tmp) << 32);
+
+		/* the third term */
+		LINEARINTERVAL_ADD_INT64(span,
+								 days_l32 * (USECS_PER_DAY & 0xffffffff));
+	}
+
+	/* add dayfraction */
+	LINEARINTERVAL_ADD_INT64(span, dayfraction);
+
+	return span;
+}
+#endif
 
 static int
 interval_cmp_internal(Interval *interval1, Interval *interval2)
 {
-	TimeOffset	span1 = interval_cmp_value(interval1);
-	TimeOffset	span2 = interval_cmp_value(interval2);
+	LinearInterval	span1 = interval_cmp_value(interval1);
+	LinearInterval	span2 = interval_cmp_value(interval2);
 
+#ifdef HAVE_INT128
 	return ((span1 < span2) ? -1 : (span1 > span2) ? 1 : 0);
+#else
+	if (span1.hi < span2.hi)
+		return -1;
+	if (span1.hi > span2.hi)
+		return 1;
+	return (span1.lo < span2.lo ? -1 :
+			(span1.lo > span2.lo ? 1 : 0));
+#endif
 }
 
 Datum
@@ -2384,9 +2475,21 @@ Datum
 interval_hash(PG_FUNCTION_ARGS)
 {
 	Interval   *interval = PG_GETARG_INTERVAL_P(0);
-	TimeOffset	span = interval_cmp_value(interval);
+	LinearInterval	span = interval_cmp_value(interval);
+	int64 span64;
+
+	/*
+	 * taking non-siginificant 64 bits for hashing. it is not necessary to
+	 * fold the significant half of the bits into the rest for this
+	 * usage. Anyway it is zero for most cases.
+	 */
+#ifdef HAVE_INT128
+	span64 = span & 0xffffffffffffffff;
+#else
+	span64 = span.lo;
+#endif
 
-	return DirectFunctionCall1(hashint8, Int64GetDatumFast(span));
+	return DirectFunctionCall1(hashint8, Int64GetDatumFast(span64));
 }
 
 /* overlaps_timestamp() --- implements the SQL OVERLAPS operator.
diff --git a/src/include/datatype/timestamp.h b/src/include/datatype/timestamp.h
index 16e1b4c..fd24987 100644
--- a/src/include/datatype/timestamp.h
+++ b/src/include/datatype/timestamp.h
@@ -48,6 +48,19 @@ typedef struct
 	int32		month;			/* months and years, after time for alignment */
 } Interval;
 
+#ifdef HAVE_INT128
+typedef int128	LinearInterval;
+#else
+/*
+ * LinearInterval's alternative defeinition for the environments without
+ * int128 arithmetics. See interval_cmp_value for datails.
+ */
+typedef struct
+{
+	int64	hi;	/* holds significant 64 bits including a sign bit */
+	uint64	lo; /* holds the lower 64 bits without sign */
+} LinearInterval;
+#endif
 
 /* Limits on the "precision" option (typmod) for these data types */
 #define MAX_TIMESTAMP_PRECISION 6
diff --git a/src/test/regress/expected/interval.out b/src/test/regress/expected/interval.out
index 946c97a..d4e6a00 100644
--- a/src/test/regress/expected/interval.out
+++ b/src/test/regress/expected/interval.out
@@ -207,6 +207,42 @@ SELECT '' AS fortyfive, r1.*, r2.*
            | 34 years        | 6 years
 (45 rows)
 
+-- intervals out of 64bit linear value in microseconds
+CREATE TABLE INTERVAL_TBL_OF (f1 interval);
+INSERT INTO INTERVAL_TBL_OF (f1) VALUES ('2147483647 days 2147483647 months');
+INSERT INTO INTERVAL_TBL_OF (f1) VALUES ('1 year');
+INSERT INTO INTERVAL_TBL_OF (f1) VALUES ('-2147483648 days -2147483648 months');
+SELECT '' AS "64bits", r1.*, r2.*
+   FROM INTERVAL_TBL_OF r1, INTERVAL_TBL_OF r2
+   WHERE r1.f1 > r2.f1
+   ORDER BY r1.f1, r2.f1;
+ 64bits |                   f1                   |                    f1                     
+--------+----------------------------------------+-------------------------------------------
+        | 1 year                                 | -178956970 years -8 mons -2147483648 days
+        | 178956970 years 7 mons 2147483647 days | -178956970 years -8 mons -2147483648 days
+        | 178956970 years 7 mons 2147483647 days | 1 year
+(3 rows)
+
+CREATE INDEX ON INTERVAL_TBL_OF USING btree (f1);
+SET enable_seqscan to false;
+EXPLAIN SELECT '' AS "64bits_2", f1
+  FROM INTERVAL_TBL_OF r1 ORDER BY f1;
+                                               QUERY PLAN                                               
+--------------------------------------------------------------------------------------------------------
+ Index Only Scan using interval_tbl_of_f1_idx on interval_tbl_of r1  (cost=0.13..12.18 rows=3 width=48)
+(1 row)
+
+SELECT '' AS "64bits_2", f1
+  FROM INTERVAL_TBL_OF r1 ORDER BY f1;
+ 64bits_2 |                    f1                     
+----------+-------------------------------------------
+          | -178956970 years -8 mons -2147483648 days
+          | 1 year
+          | 178956970 years 7 mons 2147483647 days
+(3 rows)
+
+SET enable_seqscan to default;
+DROP TABLE INTERVAL_TBL_OF;
 -- Test multiplication and division with intervals.
 -- Floating point arithmetic rounding errors can lead to unexpected results,
 -- though the code attempts to do the right thing and round up to days and
diff --git a/src/test/regress/sql/interval.sql b/src/test/regress/sql/interval.sql
index cff9ada..aa21673 100644
--- a/src/test/regress/sql/interval.sql
+++ b/src/test/regress/sql/interval.sql
@@ -59,6 +59,23 @@ SELECT '' AS fortyfive, r1.*, r2.*
    WHERE r1.f1 > r2.f1
    ORDER BY r1.f1, r2.f1;
 
+-- intervals out of 64bit linear value in microseconds
+CREATE TABLE INTERVAL_TBL_OF (f1 interval);
+INSERT INTO INTERVAL_TBL_OF (f1) VALUES ('2147483647 days 2147483647 months');
+INSERT INTO INTERVAL_TBL_OF (f1) VALUES ('1 year');
+INSERT INTO INTERVAL_TBL_OF (f1) VALUES ('-2147483648 days -2147483648 months');
+SELECT '' AS "64bits", r1.*, r2.*
+   FROM INTERVAL_TBL_OF r1, INTERVAL_TBL_OF r2
+   WHERE r1.f1 > r2.f1
+   ORDER BY r1.f1, r2.f1;
+CREATE INDEX ON INTERVAL_TBL_OF USING btree (f1);
+SET enable_seqscan to false;
+EXPLAIN SELECT '' AS "64bits_2", f1
+  FROM INTERVAL_TBL_OF r1 ORDER BY f1;
+SELECT '' AS "64bits_2", f1
+  FROM INTERVAL_TBL_OF r1 ORDER BY f1;
+SET enable_seqscan to default;
+DROP TABLE INTERVAL_TBL_OF;
 
 -- Test multiplication and division with intervals.
 -- Floating point arithmetic rounding errors can lead to unexpected results,
-- 
2.9.2

-- 
Sent via pgsql-general mailing list (pgsql-general@xxxxxxxxxxxxxx)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-general