Thank you for the comment. At Mon, 03 Apr 2017 11:35:25 -0400, Tom Lane <tgl@xxxxxxxxxxxxx> wrote in <23053.1491233725@xxxxxxxxxxxxx> > Kyotaro HORIGUCHI <horiguchi.kyotaro@xxxxxxxxxxxxx> writes: > > Ok, the attached patch changes the result type of > > interval_cmp_value from TimeOffset(=int64) to new 128 bit > > LinearInterval. The value is hidden under the functions > > interval_eq/ge.../cmp and all other stuff seems to use the > > functions. > > Looking at this now ... why isn't the INT64_AU32 macro just > > #define INT64_AU32(i64) ((i64) >> 32) > > ? The business with subtracting and re-adding 1 seems unnecessary, and it > also creates a risk of overflow with the minimum possible int64 value. It is equivalent to "i64 / (1<<32)" except for -INT64_MAX. INT64_AU32 gives the value for the first term in the following polynomial. (int64)INT64_AU32(i64) * (2^32) + (int64)INT64_AL32(i64) = i64 The previous expression intended to avoid decimal arithmetic, but gcc optimizes the simple division better (using cmovns-add-sar) than the current INT64_AU32 (jmp-sar) so I changed it. This doesn't suffer overflow. -#define INT64_AU32(i64) (((i64) < 0 ? (((i64) - 1) >> 32) + 1: ((i64) >> 32))) +#define INT64_AU32(i64) ((i64) / (1LL<<32)) In summation of terms in 128bit multiplication expression, I noticed that the value of the second term's lower 32bit loses MSB for certain cases. I changed LINEARINTERVAL_ADD_INT64 to accept the MSB (as the 65th bit) separately. The first attached is the revised patch and the second is temporary sanity check code for non-128bit environment code. (but works only on 128 bit environment) regards, -- Kyotaro Horiguchi NTT Open Source Software Center
>From d79fb456ed551fcaedea3eb3420fd4d8e56bd48c Mon Sep 17 00:00:00 2001 From: Kyotaro Horiguchi <horiguchi.kyotaro@xxxxxxxxxxxxx> Date: Mon, 3 Apr 2017 19:41:09 +0900 Subject: [PATCH 1/2] Fix overflow during interval comparison. The values in interval are compared by TimeOffset results of interval_cmp_value but it is so narrow that overflows quite easily. This patch widen the output of the function to 128 bit. For platforms without 128 bit arithmetic, a pair of 64 bit intergers is used instead. --- src/backend/utils/adt/timestamp.c | 127 ++++++++++++++++++++++++++++++--- src/include/datatype/timestamp.h | 13 ++++ src/test/regress/expected/interval.out | 36 ++++++++++ src/test/regress/sql/interval.sql | 17 +++++ 4 files changed, 184 insertions(+), 9 deletions(-) diff --git a/src/backend/utils/adt/timestamp.c b/src/backend/utils/adt/timestamp.c index 4be1999..2411bd7 100644 --- a/src/backend/utils/adt/timestamp.c +++ b/src/backend/utils/adt/timestamp.c @@ -2289,25 +2289,122 @@ timestamptz_cmp_timestamp(PG_FUNCTION_ARGS) /* * interval_relop - is interval1 relop interval2 */ -static inline TimeOffset +#ifdef HAVE_INT128 +static inline LinearInterval interval_cmp_value(const Interval *interval) { - TimeOffset span; + LinearInterval span; - span = interval->time; - span += interval->month * INT64CONST(30) * USECS_PER_DAY; - span += interval->day * INT64CONST(24) * USECS_PER_HOUR; + span = (int128)interval->time; + span += (int128)interval->month * INT64CONST(30) * USECS_PER_DAY; + span += (int128)interval->day * INT64CONST(24) * USECS_PER_HOUR; return span; } +#else +/* + * arithmetic 32 bit extraction from int64 + * + * INT64_AU32 extracts significant 32 bit of int64 as a int64, and INT64_AL32 + * extracts non-siginificant 32 bit as a int64. Both macros extends sign bits + * according to the given value. The values of these macros and the parameter + * value are in the following relationship. + * + * i64 = (int64)INT64_AU32(i64) * (2^32) + (int64)INT64_AL32(i64) + */ +#define INT64_AU32(i64) ((i64) / (1LL<<32)) +#define INT64_AL32(i64) (((i64) & 0xffffffff) | ((i64) < 0 ? 0xffffffff00000000 : 0)) + +/* + * Adds signed 65 bit integer into LinearInterval variable. If s is not zero, + * its sign is used as v's sign. + */ +#define LINEARINTERVAL_ADD_INT65(li, v, s) \ +{ \ + uint64 t = (uint64)(v); \ + uint64 p = (li).lo; \ + (li).lo += t; \ + if (s < 0 || (s == 0 && v < 0)) \ + (li).hi --; \ + if ((li).lo < p) \ + (li).hi ++; \ +} + +static inline LinearInterval +interval_cmp_value(const Interval *interval) +{ + LinearInterval span = {0, 0}; + int64 dayfraction; + int64 days, tmp; + + /* days cannot overflow here */ + dayfraction = interval->time % USECS_PER_DAY; + days = interval->time / USECS_PER_DAY; + days += interval->month * INT64CONST(30); + days += interval->day; + + /* we assume arithmetic shift here */ + Assert(-1 >> 1 == -1); + + /* + * perform 128 bit multiplication using 64 bit variables. + * + * x * y = ((x.hi << 32) + x.lo) * (((y.hi << 32) + y.lo) + * = (x.hi * y.hi) << 64 + + * ((x.hi * y.lo) + (x.lo * y.hi)) << 32 + + * x.lo * y.lo + */ + + /* We don't bother calculation results in zero */ + if (days != 0) + { + /* + * Here, the possible maximum number of days is 0xf_865ce7d8 and the + * constant USECS_PER_DAY is 0x14_1dd76000. So the all term in the + * formula can be calculated safely in 64 bit arithmetic. + */ + int64 days_u32 = INT64_AU32(days); + int64 days_l32 = INT64_AL32(days); + + /* the first term */ + span.hi = days_u32 * (USECS_PER_DAY >> 32); + + /* the second term */ + tmp = days_l32 * (USECS_PER_DAY >> 32) + + days_u32 * (USECS_PER_DAY & 0xffffffff); + span.hi += INT64_AU32(tmp); + + /* this shift may push out MSB. supply it explicitly */ + LINEARINTERVAL_ADD_INT65(span, INT64_AL32(tmp) << 32, tmp); + + /* the third term */ + tmp = days_l32 * (USECS_PER_DAY & 0xffffffff); + LINEARINTERVAL_ADD_INT65(span, tmp, 0); + } + + /* add dayfraction */ + LINEARINTERVAL_ADD_INT65(span, dayfraction, 0); + + return span; +} +#endif static int interval_cmp_internal(Interval *interval1, Interval *interval2) { - TimeOffset span1 = interval_cmp_value(interval1); - TimeOffset span2 = interval_cmp_value(interval2); + LinearInterval span1 = interval_cmp_value(interval1); + LinearInterval span2 = interval_cmp_value(interval2); +#ifdef HAVE_INT128 return ((span1 < span2) ? -1 : (span1 > span2) ? 1 : 0); +#else + if (span1.hi < span2.hi) + return -1; + if (span1.hi > span2.hi) + return 1; + return (span1.lo < span2.lo ? -1 : + (span1.lo > span2.lo ? 1 : 0)); +#endif } Datum @@ -2384,9 +2481,21 @@ Datum interval_hash(PG_FUNCTION_ARGS) { Interval *interval = PG_GETARG_INTERVAL_P(0); - TimeOffset span = interval_cmp_value(interval); + LinearInterval span = interval_cmp_value(interval); + int64 span64; + + /* + * taking non-siginificant 64 bits for hashing. it is not necessary to + * fold the significant half of the bits into the rest for this + * usage. Anyway it is zero for most cases. + */ +#ifdef HAVE_INT128 + span64 = span & 0xffffffffffffffff; +#else + span64 = span.lo; +#endif - return DirectFunctionCall1(hashint8, Int64GetDatumFast(span)); + return DirectFunctionCall1(hashint8, Int64GetDatumFast(span64)); } /* overlaps_timestamp() --- implements the SQL OVERLAPS operator. diff --git a/src/include/datatype/timestamp.h b/src/include/datatype/timestamp.h index 16e1b4c..fd24987 100644 --- a/src/include/datatype/timestamp.h +++ b/src/include/datatype/timestamp.h @@ -48,6 +48,19 @@ typedef struct int32 month; /* months and years, after time for alignment */ } Interval; +#ifdef HAVE_INT128 +typedef int128 LinearInterval; +#else +/* + * LinearInterval's alternative defeinition for the environments without + * int128 arithmetics. See interval_cmp_value for datails. + */ +typedef struct +{ + int64 hi; /* holds significant 64 bits including a sign bit */ + uint64 lo; /* holds the lower 64 bits without sign */ +} LinearInterval; +#endif /* Limits on the "precision" option (typmod) for these data types */ #define MAX_TIMESTAMP_PRECISION 6 diff --git a/src/test/regress/expected/interval.out b/src/test/regress/expected/interval.out index 946c97a..d4e6a00 100644 --- a/src/test/regress/expected/interval.out +++ b/src/test/regress/expected/interval.out @@ -207,6 +207,42 @@ SELECT '' AS fortyfive, r1.*, r2.* | 34 years | 6 years (45 rows) +-- intervals out of 64bit linear value in microseconds +CREATE TABLE INTERVAL_TBL_OF (f1 interval); +INSERT INTO INTERVAL_TBL_OF (f1) VALUES ('2147483647 days 2147483647 months'); +INSERT INTO INTERVAL_TBL_OF (f1) VALUES ('1 year'); +INSERT INTO INTERVAL_TBL_OF (f1) VALUES ('-2147483648 days -2147483648 months'); +SELECT '' AS "64bits", r1.*, r2.* + FROM INTERVAL_TBL_OF r1, INTERVAL_TBL_OF r2 + WHERE r1.f1 > r2.f1 + ORDER BY r1.f1, r2.f1; + 64bits | f1 | f1 +--------+----------------------------------------+------------------------------------------- + | 1 year | -178956970 years -8 mons -2147483648 days + | 178956970 years 7 mons 2147483647 days | -178956970 years -8 mons -2147483648 days + | 178956970 years 7 mons 2147483647 days | 1 year +(3 rows) + +CREATE INDEX ON INTERVAL_TBL_OF USING btree (f1); +SET enable_seqscan to false; +EXPLAIN SELECT '' AS "64bits_2", f1 + FROM INTERVAL_TBL_OF r1 ORDER BY f1; + QUERY PLAN +-------------------------------------------------------------------------------------------------------- + Index Only Scan using interval_tbl_of_f1_idx on interval_tbl_of r1 (cost=0.13..12.18 rows=3 width=48) +(1 row) + +SELECT '' AS "64bits_2", f1 + FROM INTERVAL_TBL_OF r1 ORDER BY f1; + 64bits_2 | f1 +----------+------------------------------------------- + | -178956970 years -8 mons -2147483648 days + | 1 year + | 178956970 years 7 mons 2147483647 days +(3 rows) + +SET enable_seqscan to default; +DROP TABLE INTERVAL_TBL_OF; -- Test multiplication and division with intervals. -- Floating point arithmetic rounding errors can lead to unexpected results, -- though the code attempts to do the right thing and round up to days and diff --git a/src/test/regress/sql/interval.sql b/src/test/regress/sql/interval.sql index cff9ada..aa21673 100644 --- a/src/test/regress/sql/interval.sql +++ b/src/test/regress/sql/interval.sql @@ -59,6 +59,23 @@ SELECT '' AS fortyfive, r1.*, r2.* WHERE r1.f1 > r2.f1 ORDER BY r1.f1, r2.f1; +-- intervals out of 64bit linear value in microseconds +CREATE TABLE INTERVAL_TBL_OF (f1 interval); +INSERT INTO INTERVAL_TBL_OF (f1) VALUES ('2147483647 days 2147483647 months'); +INSERT INTO INTERVAL_TBL_OF (f1) VALUES ('1 year'); +INSERT INTO INTERVAL_TBL_OF (f1) VALUES ('-2147483648 days -2147483648 months'); +SELECT '' AS "64bits", r1.*, r2.* + FROM INTERVAL_TBL_OF r1, INTERVAL_TBL_OF r2 + WHERE r1.f1 > r2.f1 + ORDER BY r1.f1, r2.f1; +CREATE INDEX ON INTERVAL_TBL_OF USING btree (f1); +SET enable_seqscan to false; +EXPLAIN SELECT '' AS "64bits_2", f1 + FROM INTERVAL_TBL_OF r1 ORDER BY f1; +SELECT '' AS "64bits_2", f1 + FROM INTERVAL_TBL_OF r1 ORDER BY f1; +SET enable_seqscan to default; +DROP TABLE INTERVAL_TBL_OF; -- Test multiplication and division with intervals. -- Floating point arithmetic rounding errors can lead to unexpected results, -- 2.9.2
>From db899ea2bbee38aed367c522f898a3e0fe1ceffd Mon Sep 17 00:00:00 2001 From: Kyotaro Horiguchi <horiguchi.kyotaro@xxxxxxxxxxxxx> Date: Tue, 4 Apr 2017 16:44:03 +0900 Subject: [PATCH 2/2] sanity check code --- src/backend/utils/adt/timestamp.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/backend/utils/adt/timestamp.c b/src/backend/utils/adt/timestamp.c index 2411bd7..47af4cf 100644 --- a/src/backend/utils/adt/timestamp.c +++ b/src/backend/utils/adt/timestamp.c @@ -2385,6 +2385,20 @@ interval_cmp_value(const Interval *interval) /* add dayfraction */ LINEARINTERVAL_ADD_INT65(span, dayfraction, 0); + { + __int128 v1 = (((__int128)span.hi)<<64) + (__int128)span.lo; + __int128 v2 = (__int128)interval->time; + v2 += (__int128)interval->month * INT64CONST(30) * USECS_PER_DAY; + v2 += (__int128)interval->day * INT64CONST(24) * USECS_PER_HOUR; + if (v1 != v2) + elog(LOG, "#### %lx:%lx != %lx:%lx for (%ld, %d, %d)", + (uint64)(v1>>64), + (uint64)(v1 & 0xffffffffffffffff), + (uint64)(v2>>64), + (uint64)(v2 & 0xffffffffffffffff), + interval->time, interval->month, interval->day); + } + return span; } #endif -- 2.9.2
-- Sent via pgsql-general mailing list (pgsql-general@xxxxxxxxxxxxxx) To make changes to your subscription: http://www.postgresql.org/mailpref/pgsql-general