I was surprised by the poor performance when I first tried to use range
types. What I expected was that the following two queries would be
equivalent (see attached script):
postgres=# EXPLAIN ANALYZE SELECT some_number FROM integer_test WHERE
some_number BETWEEN -2 AND 2;
QUERY PLAN
------------------------------------------------------------------------------------------------------------------------------------------------
Index Only Scan using integer_test_some_number_idx on integer_test
(cost=0.28..8.38 rows=5 width=4) (actual time=0.045..0.052 rows=5 loops=1)
Index Cond: ((some_number >= '-2'::integer) AND (some_number <= 2))
Heap Fetches: 5
Planning Time: 0.319 ms
Execution Time: 0.094 ms
(5 rows)
postgres=# EXPLAIN ANALYZE SELECT some_number FROM integer_test WHERE
some_number <@ int4range(-2, 2, '[]');
QUERY PLAN
--------------------------------------------------------------------------------------------------------
Seq Scan on integer_test (cost=0.00..34.01 rows=10 width=4) (actual
time=0.585..1.136 rows=5 loops=1)
Filter: (some_number <@ '[-2,3)'::int4range)
Rows Removed by Filter: 1996
Planning Time: 0.175 ms
Execution Time: 1.164 ms
(5 rows)
But clearly, the planner is not able to use the btree index in the
presence of the range operator.
So I attempted to add support functions for the
'elem_contained_by_range' and 'range_contains_elem' operators (patch
attached):
That gives the following execution plan (applied on
26f7802beb2a4aafa0903f5bedeb7f1fa6f4f358):
QUERY PLAN
-------------------------------------------------------------------------------------------------------------------------------------------------
Index Only Scan using integer_test_some_number_idx on integer_test
(cost=0.28..8.38 rows=10 width=4) (actual time=0.046..0.058 rows=5 loops=1)
Index Cond: ((some_number >= '-2'::integer) AND (some_number < 3))
Heap Fetches: 5
Planning Time: 0.694 ms
Execution Time: 0.114 ms
(5 rows)
That was what I was hoping to see (even though the row estimate is still
a bit off).
Unfortunately this only works for the trivial case where the range is
actually a constant.
The third query in the attached script (range_test.sql) produces the
following plan, where the support function is not useful:
QUERY PLAN
-------------------------------------------------------------------------------------------------------------------------
Nested Loop (cost=0.14..419.56 rows=22 width=12) (actual
time=3.791..36.549 rows=121 loops=1)
Join Filter: (integer_test.some_number <@
int4range(number_q.one_number, number_q.another_number, '[]'::text))
Rows Removed by Join Filter: 21890
CTE number_q
-> Function Scan on generate_series (cost=0.00..0.14 rows=11
width=8) (actual time=0.063..0.076 rows=11 loops=1)
-> CTE Scan on number_q (cost=0.00..0.22 rows=11 width=8) (actual
time=0.071..0.107 rows=11 loops=1)
-> Materialize (cost=0.00..39.02 rows=2001 width=4) (actual
time=0.011..0.516 rows=2001 loops=11)
-> Seq Scan on integer_test (cost=0.00..29.01 rows=2001
width=4) (actual time=0.077..1.043 rows=2001 loops=1)
Planning Time: 3.172 ms
Execution Time: 36.908 ms
(10 rows)
So my question here is, how to go about handling the more interesting
cases, where we are passed a FuncExpr (instead of a Const)?
Is it even possible to return something useful in this case?
As far as I can tell, the support function is being passed a reference
to the range constructor function when the range is not a constant.
But I don't have the insight required to build opclauses that can handle
non-constants.
Any thoughs or pointers on solving this?
Thanks,
Kim Johan Andersson
diff --git a/src/backend/utils/adt/rangetypes.c b/src/backend/utils/adt/rangetypes.c
index b09cb49054..4cb940dbbe 100644
--- a/src/backend/utils/adt/rangetypes.c
+++ b/src/backend/utils/adt/rangetypes.c
@@ -31,12 +31,18 @@
#include "postgres.h"
#include "access/tupmacs.h"
+#include "access/stratnum.h"
#include "common/hashfn.h"
#include "lib/stringinfo.h"
#include "libpq/pqformat.h"
#include "miscadmin.h"
+#include "nodes/supportnodes.h"
+#include "nodes/makefuncs.h"
+#include "nodes/nodeFuncs.h"
+#include "nodes/pg_list.h"
#include "port/pg_bitutils.h"
#include "utils/builtins.h"
+#include "utils/fmgroids.h"
#include "utils/date.h"
#include "utils/lsyscache.h"
#include "utils/rangetypes.h"
@@ -547,7 +553,6 @@ elem_contained_by_range(PG_FUNCTION_ARGS)
PG_RETURN_BOOL(range_contains_elem_internal(typcache, r, val));
}
-
/* range, range -> bool functions */
/* equality (internal version) */
@@ -2619,3 +2624,259 @@ datum_write(Pointer ptr, Datum datum, bool typbyval, char typalign,
return ptr;
}
+
+
+/*
+ * find_index_conditions
+ * Try to generate an indexquals for an element contained in a range
+ *
+ * Supports both the ELEM_CONTAINED_BY_RANGE and RANGE_CONTAINS_ELEM cases
+ *
+ */
+static List*
+find_index_conditions(Node* leftop,
+ Node* rightop,
+ int indexarg,
+ Oid funcid,
+ Oid opfamily)
+
+{
+ List* result = NIL;
+ RangeType* range;
+ TypeCacheEntry* rangetypcache;
+ RangeBound lower;
+ RangeBound upper;
+ bool empty;
+ Const* rangeConst;
+ Expr* elemExpr;
+ Oid elemType;
+ int16 elemTypeLen;
+ bool elemByValue;
+ Oid elemCollation;
+
+ switch (funcid)
+ {
+ case F_ELEM_CONTAINED_BY_RANGE:
+ /*
+ if (IsA(rightop, FuncExpr))
+ {
+ // What to do here?
+ FuncExpr* func = (FuncExpr*)rightop;
+ ListCell* lc;
+
+ Assert(list_length(func->args) == 3);
+
+ foreach(lc, func->args)
+ {
+ Expr* arg = (Expr*)lfirst(lc);
+ NodeTag nodeTag = nodeTag(arg);
+ arg = NULL;
+ }
+ }
+ */
+
+ if (!IsA(rightop, Const) || ((Const*)rightop)->constisnull)
+ return NIL;
+
+ elemExpr = (Expr*)leftop;
+ rangeConst = (Const*)rightop;
+ break;
+
+ case F_RANGE_CONTAINS_ELEM:
+
+ if (!IsA(leftop, Const) || ((Const*)leftop)->constisnull)
+ return NIL;
+
+ elemExpr = (Expr*)rightop;
+ rangeConst = (Const*)leftop;
+ break;
+
+ default:
+ return NIL;
+ }
+
+ Assert(IsA(elemExpr, Var));
+ Assert(IsA(rangeConst, Const));
+
+ // We need to figure out what kind of elemType is in the range we are dealing with, and deserialize it to get the bounds.
+ range = DatumGetRangeTypeP(rangeConst->constvalue);
+ rangetypcache = lookup_type_cache(RangeTypeGetOid(range), TYPECACHE_RANGE_INFO);
+
+ elemType = rangetypcache->rngelemtype->type_id;
+ elemByValue = rangetypcache->rngelemtype->typbyval;
+ elemTypeLen = rangetypcache->rngelemtype->typlen;
+ elemCollation = rangetypcache->rngelemtype->typcollation;
+
+ range_deserialize(rangetypcache, range, &lower, &upper, &empty);
+
+ // The planner will call us for an empty range.
+ if (empty)
+ return NIL;
+
+ // We can't do anything useful with a bound if it is not finite.
+ if (!lower.infinite)
+ {
+ Oid oproid = get_opfamily_member(opfamily, elemType, elemType, lower.inclusive?BTGreaterEqualStrategyNumber:BTGreaterStrategyNumber);
+ if (oproid != InvalidOid)
+ {
+ Expr* expr = make_opclause
+ (
+ oproid,
+ BOOLOID,
+ false,
+ elemExpr,
+ (Expr*)makeConst
+ (
+ elemType,
+ -1,
+ elemCollation,
+ elemTypeLen,
+ lower.val,
+ false,
+ elemByValue
+ ),
+ InvalidOid,
+ InvalidOid
+ );
+
+ result = lappend(result, expr);
+ }
+ }
+
+ if (!upper.infinite)
+ {
+ Oid oproid = get_opfamily_member(opfamily, elemType, elemType, upper.inclusive?BTLessEqualStrategyNumber:BTLessStrategyNumber);
+ if (oproid != InvalidOid)
+ {
+ Expr* expr = make_opclause
+ (
+ oproid,
+ BOOLOID,
+ false,
+ elemExpr,
+ (Expr*)makeConst
+ (
+ elemType,
+ -1,
+ elemCollation,
+ elemTypeLen,
+ upper.val,
+ false,
+ elemByValue
+ ),
+ InvalidOid,
+ InvalidOid
+ );
+
+ result = lappend(result, expr);
+ }
+ }
+
+ return result;
+}
+
+Node * match_support_request(Node* rawreq)
+{
+ Node* ret = NULL;
+
+ if (IsA(rawreq, SupportRequestIndexCondition))
+ {
+ /* Try to convert operator/function call to index conditions */
+ SupportRequestIndexCondition* req = (SupportRequestIndexCondition*)rawreq;
+
+ if (is_opclause(req->node))
+ {
+ OpExpr* clause = (OpExpr*)req->node;
+
+ Assert(list_length(clause->args) == 2);
+
+ ret = (Node*)
+ find_index_conditions((Node*)linitial(clause->args),
+ (Node*)lsecond(clause->args),
+ req->indexarg,
+ req->funcid,
+ req->opfamily);
+
+ }
+ else if (is_funcclause(req->node))
+ {
+ FuncExpr* clause = (FuncExpr*)req->node;
+
+ Assert(list_length(clause->args) == 2);
+
+ ret = (Node*)
+ find_index_conditions((Node*)linitial(clause->args),
+ (Node*)lsecond(clause->args),
+ req->indexarg,
+ req->funcid,
+ req->opfamily);
+ }
+
+ // If matched, the index condition is exact.
+ if (ret != NULL)
+ req->lossy = false;
+ }
+ else if (IsA(rawreq, SupportRequestSimplify))
+ {
+ SupportRequestSimplify* req = (SupportRequestSimplify*)rawreq;
+ FuncExpr* clause = req->fcall;
+ Const* rangeConst;
+ RangeType* range;
+ Node* leftop = (Node*)linitial(clause->args);
+ Node* rightop = lsecond(clause->args);
+
+ switch (clause->funcid)
+ {
+ case F_ELEM_CONTAINED_BY_RANGE:
+
+ if (!IsA(rightop, Const) || ((Const*)rightop)->constisnull)
+ return ret;
+
+ rangeConst = (Const*)rightop;
+ break;
+
+ case F_RANGE_CONTAINS_ELEM:
+
+ if (!IsA(leftop, Const) || ((Const*)leftop)->constisnull)
+ return ret;
+
+ rangeConst = (Const*)leftop;
+ break;
+
+ default:
+ return ret;
+ }
+
+ // If the range is empty, then there can be no matches.
+ range = DatumGetRangeTypeP(rangeConst->constvalue);
+ char flags = range_get_flags(range);
+ if (flags & RANGE_EMPTY)
+ ret = makeBoolConst(false, false);
+ }
+
+ return ret;
+}
+
+/*
+ * Planner support function for elem_contained_by_range operator
+ */
+Datum
+elem_contained_by_range_support(PG_FUNCTION_ARGS)
+{
+ Node* rawreq = (Node*)PG_GETARG_POINTER(0);
+ Node* ret = match_support_request(rawreq);
+
+ PG_RETURN_POINTER(ret);
+}
+
+/*
+ * Planner support function for range_contains_elem operator
+ */
+Datum
+range_contains_elem_support(PG_FUNCTION_ARGS)
+{
+ Node* rawreq = (Node*)PG_GETARG_POINTER(0);
+ Node* ret = match_support_request(rawreq);
+
+ PG_RETURN_POINTER(ret);
+}
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index a07e737a33..53a3f67ec5 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -10237,13 +10237,15 @@
proargtypes => 'anyrange anyrange', prosrc => 'range_overlaps' },
{ oid => '3858',
proname => 'range_contains_elem', prorettype => 'bool',
- proargtypes => 'anyrange anyelement', prosrc => 'range_contains_elem' },
+ proargtypes => 'anyrange anyelement', prosrc => 'range_contains_elem',
+ prosupport => 'range_contains_elem_support' },
{ oid => '3859',
proname => 'range_contains', prorettype => 'bool',
proargtypes => 'anyrange anyrange', prosrc => 'range_contains' },
{ oid => '3860',
proname => 'elem_contained_by_range', prorettype => 'bool',
- proargtypes => 'anyelement anyrange', prosrc => 'elem_contained_by_range' },
+ proargtypes => 'anyelement anyrange', prosrc => 'elem_contained_by_range',
+ prosupport => 'elem_contained_by_range_support' },
{ oid => '3861',
proname => 'range_contained_by', prorettype => 'bool',
proargtypes => 'anyrange anyrange', prosrc => 'range_contained_by' },
@@ -10265,6 +10267,12 @@
{ oid => '3867',
proname => 'range_union', prorettype => 'anyrange',
proargtypes => 'anyrange anyrange', prosrc => 'range_union' },
+{ oid => '9998', descr => 'Planner support function for range_contains_elem operator',
+ proname => 'range_contains_elem_support', prorettype => 'internal',
+ proargtypes => 'internal', prosrc => 'range_contains_elem_support' },
+{ oid => '9999', descr => 'Planner support function for elem_contained_by_range operator',
+ proname => 'elem_contained_by_range_support', prorettype => 'internal',
+ proargtypes => 'internal', prosrc => 'elem_contained_by_range_support' },
{ oid => '4057',
descr => 'the smallest range which includes both of the given ranges',
proname => 'range_merge', prorettype => 'anyrange',
CREATE TABLE integer_test AS
(
SELECT
generate_series AS some_number
FROM
generate_series(-1000, 1000)
);
CREATE INDEX ON integer_test( some_number );
ANALYZE integer_test;
EXPLAIN ANALYZE SELECT some_number FROM integer_test WHERE some_number BETWEEN -2 AND 2;
EXPLAIN ANALYZE SELECT some_number FROM integer_test WHERE some_number <@ int4range(-2, 2, '[]');
EXPLAIN ANALYZE
WITH number_q AS MATERIALIZED
(
SELECT
-generate_series AS one_number,
generate_series AS another_number
FROM
generate_series(0,10)
)
SELECT
number_q.*,
integer_test.some_number
FROM
number_q
JOIN integer_test ON (integer_test.some_number <@ int4range( number_q.one_number, number_q.another_number, '[]' ))
;
DROP TABLE integer_test;