Hmm. It took a bit longer time than expected. At Fri, 31 Mar 2017 13:29:24 -0400, Tom Lane <tgl@xxxxxxxxxxxxx> wrote in <10353.1490981364@xxxxxxxxxxxxx> > Kyotaro HORIGUCHI <horiguchi.kyotaro@xxxxxxxxxxxxx> writes: > > int128 is seen in numeric.c. It is doable in the same manner. In > > that case it will be a bit slower on the platforms without > > int128. > > > By the way is it right that we don't assume this as a bug-fix > > which should be done in the Pg10 dev cycle, but an improvement > > for 11? > > Well, it seems like a bug to me. We might conclude that the fix > is too risky to back-patch, but it's hard to make that decision > before having a patch in hand to evaluate. Ok, the attached patch changes the result type of interval_cmp_value from TimeOffset(=int64) to new 128 bit LinearInterval. The value is hidden under the functions interval_eq/ge.../cmp and all other stuff seems to use the functions. For platforms without 128 bit support, int64 * 2 version of interval_cmp_value is used. I added separate test for the near-overflow values since just adding such values into INTERVAL_TABLE resuted in a mess. (I ran 64-bit version by commenting-out the definition of PG_INT128_TYPE in pg_config.h). The attached patch is that. regards, -- Kyotaro Horiguchi NTT Open Source Software Center
>From e8866f35b95dc09cfc570923ee9be3e3a8e93f8a Mon Sep 17 00:00:00 2001 From: Kyotaro Horiguchi <horiguchi.kyotaro@xxxxxxxxxxxxx> Date: Mon, 3 Apr 2017 19:41:09 +0900 Subject: [PATCH] Fix overflow during interval comparison. The values in interval are compared by TimeOffset results of interval_cmp_value but it is so narrow that overflows quite easily. This patch widen the output of the function to 128 bit. For platforms without 128 bit arithmetic, a pair of 64 bit intergers is used instead. --- src/backend/utils/adt/timestamp.c | 121 ++++++++++++++++++++++++++++++--- src/include/datatype/timestamp.h | 13 ++++ src/test/regress/expected/interval.out | 36 ++++++++++ src/test/regress/sql/interval.sql | 17 +++++ 4 files changed, 178 insertions(+), 9 deletions(-) diff --git a/src/backend/utils/adt/timestamp.c b/src/backend/utils/adt/timestamp.c index 4be1999..c9e811d 100644 --- a/src/backend/utils/adt/timestamp.c +++ b/src/backend/utils/adt/timestamp.c @@ -2289,25 +2289,116 @@ timestamptz_cmp_timestamp(PG_FUNCTION_ARGS) /* * interval_relop - is interval1 relop interval2 */ -static inline TimeOffset +#ifdef HAVE_INT128 +static inline LinearInterval interval_cmp_value(const Interval *interval) { - TimeOffset span; + LinearInterval span; - span = interval->time; - span += interval->month * INT64CONST(30) * USECS_PER_DAY; - span += interval->day * INT64CONST(24) * USECS_PER_HOUR; + span = (int128)interval->time; + span += (int128)interval->month * INT64CONST(30) * USECS_PER_DAY; + span += (int128)interval->day * INT64CONST(24) * USECS_PER_HOUR; return span; } +#else +/* + * arithmetic$ 32 bit extraction from int64 + * + * INT64_AU32 extracts significant 32 bit of int64 as a int64, and INT64_AL32 + * extracts non-siginificant 32 bit as a int64. Both macros extends sign bits + * according to the given value. + */ + +#define INT64_AU32(i64) (((i64) < 0 ? (((i64) - 1) >> 32) + 1: ((i64) >> 32))) +#define INT64_AL32(i64) (((i64) & 0xffffffff) | ((i64) < 0 ? 0xffffffff00000000 : 0)) + +/* Adds signed int64 value into LinearInterval variable */ +#define LINEARINTERVAL_ADD_INT64(li, v) \ +{ \ + int64 t = (int64)(v); \ + int64 p = (int64)(li).lo; \ + (li).lo += t; \ + if (t > 0 && p > 0 && (int64)(li).lo < 0) \ + (li).hi += 1; \ + else if ((t < 0 && p == 0) || \ + (t < 0 && p < 0 && (int64)(li).lo > 0)) \ + (li).hi -= 1; \ +} + +static inline LinearInterval +interval_cmp_value(const Interval *interval) +{ + LinearInterval span = {0, 0}; + int64 dayfraction; + int64 days, tmp; + + /* days cannot overflow here */ + dayfraction = interval->time % USECS_PER_DAY; + days = interval->time / USECS_PER_DAY; + days += interval->month * INT64CONST(30); + days += interval->day; + + /* we assume arithmetic shift here */ + Assert(-1 >> 1 == -1); + + /* + * perform 128 bit multiplication using 64 bit variables. + * + * x * y = ((x.hi << 32) + x.lo) * (((y.hi << 32) + y.lo) + * = (x.hi * y.hi) << 64 + + * ((x.hi * y.lo) + (x.lo * y.hi)) << 32 + + * x.lo * y.lo + */ + + /* We don't bother calculation results in zero */ + if (days != 0) + { + /* + * Here, the possible maximum number of days is 0xf_865ce7d8 and the + * constant USECS_PER_DAY is 0x14_1dd76000. So the first and second + * term can be calculated safely in 64 bit arithmetic. + */ + int64 days_u32 = INT64_AU32(days); + int64 days_l32 = INT64_AL32(days); + + /* the first term */ + span.hi = days_u32 * (USECS_PER_DAY >> 32); + + /* the second term */ + tmp = days_l32 * (USECS_PER_DAY >> 32) + + days_u32 * (USECS_PER_DAY & 0xffffffff); + span.hi += INT64_AU32(tmp); + LINEARINTERVAL_ADD_INT64(span, INT64_AL32(tmp) << 32); + + /* the third term */ + LINEARINTERVAL_ADD_INT64(span, + days_l32 * (USECS_PER_DAY & 0xffffffff)); + } + + /* add dayfraction */ + LINEARINTERVAL_ADD_INT64(span, dayfraction); + + return span; +} +#endif static int interval_cmp_internal(Interval *interval1, Interval *interval2) { - TimeOffset span1 = interval_cmp_value(interval1); - TimeOffset span2 = interval_cmp_value(interval2); + LinearInterval span1 = interval_cmp_value(interval1); + LinearInterval span2 = interval_cmp_value(interval2); +#ifdef HAVE_INT128 return ((span1 < span2) ? -1 : (span1 > span2) ? 1 : 0); +#else + if (span1.hi < span2.hi) + return -1; + if (span1.hi > span2.hi) + return 1; + return (span1.lo < span2.lo ? -1 : + (span1.lo > span2.lo ? 1 : 0)); +#endif } Datum @@ -2384,9 +2475,21 @@ Datum interval_hash(PG_FUNCTION_ARGS) { Interval *interval = PG_GETARG_INTERVAL_P(0); - TimeOffset span = interval_cmp_value(interval); + LinearInterval span = interval_cmp_value(interval); + int64 span64; + + /* + * taking non-siginificant 64 bits for hashing. it is not necessary to + * fold the significant half of the bits into the rest for this + * usage. Anyway it is zero for most cases. + */ +#ifdef HAVE_INT128 + span64 = span & 0xffffffffffffffff; +#else + span64 = span.lo; +#endif - return DirectFunctionCall1(hashint8, Int64GetDatumFast(span)); + return DirectFunctionCall1(hashint8, Int64GetDatumFast(span64)); } /* overlaps_timestamp() --- implements the SQL OVERLAPS operator. diff --git a/src/include/datatype/timestamp.h b/src/include/datatype/timestamp.h index 16e1b4c..fd24987 100644 --- a/src/include/datatype/timestamp.h +++ b/src/include/datatype/timestamp.h @@ -48,6 +48,19 @@ typedef struct int32 month; /* months and years, after time for alignment */ } Interval; +#ifdef HAVE_INT128 +typedef int128 LinearInterval; +#else +/* + * LinearInterval's alternative defeinition for the environments without + * int128 arithmetics. See interval_cmp_value for datails. + */ +typedef struct +{ + int64 hi; /* holds significant 64 bits including a sign bit */ + uint64 lo; /* holds the lower 64 bits without sign */ +} LinearInterval; +#endif /* Limits on the "precision" option (typmod) for these data types */ #define MAX_TIMESTAMP_PRECISION 6 diff --git a/src/test/regress/expected/interval.out b/src/test/regress/expected/interval.out index 946c97a..d4e6a00 100644 --- a/src/test/regress/expected/interval.out +++ b/src/test/regress/expected/interval.out @@ -207,6 +207,42 @@ SELECT '' AS fortyfive, r1.*, r2.* | 34 years | 6 years (45 rows) +-- intervals out of 64bit linear value in microseconds +CREATE TABLE INTERVAL_TBL_OF (f1 interval); +INSERT INTO INTERVAL_TBL_OF (f1) VALUES ('2147483647 days 2147483647 months'); +INSERT INTO INTERVAL_TBL_OF (f1) VALUES ('1 year'); +INSERT INTO INTERVAL_TBL_OF (f1) VALUES ('-2147483648 days -2147483648 months'); +SELECT '' AS "64bits", r1.*, r2.* + FROM INTERVAL_TBL_OF r1, INTERVAL_TBL_OF r2 + WHERE r1.f1 > r2.f1 + ORDER BY r1.f1, r2.f1; + 64bits | f1 | f1 +--------+----------------------------------------+------------------------------------------- + | 1 year | -178956970 years -8 mons -2147483648 days + | 178956970 years 7 mons 2147483647 days | -178956970 years -8 mons -2147483648 days + | 178956970 years 7 mons 2147483647 days | 1 year +(3 rows) + +CREATE INDEX ON INTERVAL_TBL_OF USING btree (f1); +SET enable_seqscan to false; +EXPLAIN SELECT '' AS "64bits_2", f1 + FROM INTERVAL_TBL_OF r1 ORDER BY f1; + QUERY PLAN +-------------------------------------------------------------------------------------------------------- + Index Only Scan using interval_tbl_of_f1_idx on interval_tbl_of r1 (cost=0.13..12.18 rows=3 width=48) +(1 row) + +SELECT '' AS "64bits_2", f1 + FROM INTERVAL_TBL_OF r1 ORDER BY f1; + 64bits_2 | f1 +----------+------------------------------------------- + | -178956970 years -8 mons -2147483648 days + | 1 year + | 178956970 years 7 mons 2147483647 days +(3 rows) + +SET enable_seqscan to default; +DROP TABLE INTERVAL_TBL_OF; -- Test multiplication and division with intervals. -- Floating point arithmetic rounding errors can lead to unexpected results, -- though the code attempts to do the right thing and round up to days and diff --git a/src/test/regress/sql/interval.sql b/src/test/regress/sql/interval.sql index cff9ada..aa21673 100644 --- a/src/test/regress/sql/interval.sql +++ b/src/test/regress/sql/interval.sql @@ -59,6 +59,23 @@ SELECT '' AS fortyfive, r1.*, r2.* WHERE r1.f1 > r2.f1 ORDER BY r1.f1, r2.f1; +-- intervals out of 64bit linear value in microseconds +CREATE TABLE INTERVAL_TBL_OF (f1 interval); +INSERT INTO INTERVAL_TBL_OF (f1) VALUES ('2147483647 days 2147483647 months'); +INSERT INTO INTERVAL_TBL_OF (f1) VALUES ('1 year'); +INSERT INTO INTERVAL_TBL_OF (f1) VALUES ('-2147483648 days -2147483648 months'); +SELECT '' AS "64bits", r1.*, r2.* + FROM INTERVAL_TBL_OF r1, INTERVAL_TBL_OF r2 + WHERE r1.f1 > r2.f1 + ORDER BY r1.f1, r2.f1; +CREATE INDEX ON INTERVAL_TBL_OF USING btree (f1); +SET enable_seqscan to false; +EXPLAIN SELECT '' AS "64bits_2", f1 + FROM INTERVAL_TBL_OF r1 ORDER BY f1; +SELECT '' AS "64bits_2", f1 + FROM INTERVAL_TBL_OF r1 ORDER BY f1; +SET enable_seqscan to default; +DROP TABLE INTERVAL_TBL_OF; -- Test multiplication and division with intervals. -- Floating point arithmetic rounding errors can lead to unexpected results, -- 2.9.2
-- Sent via pgsql-general mailing list (pgsql-general@xxxxxxxxxxxxxx) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-general