Search Postgresql Archives

Support functions for range types

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



I was surprised by the poor performance when I first tried to use range types. What I expected was that the following two queries would be equivalent (see attached script):

postgres=# EXPLAIN ANALYZE SELECT some_number FROM integer_test WHERE some_number BETWEEN -2 AND 2;

QUERY PLAN
------------------------------------------------------------------------------------------------------------------------------------------------
Index Only Scan using integer_test_some_number_idx on integer_test (cost=0.28..8.38 rows=5 width=4) (actual time=0.045..0.052 rows=5 loops=1)
   Index Cond: ((some_number >= '-2'::integer) AND (some_number <= 2))
   Heap Fetches: 5
 Planning Time: 0.319 ms
 Execution Time: 0.094 ms
(5 rows)

postgres=# EXPLAIN ANALYZE SELECT some_number FROM integer_test WHERE some_number <@ int4range(-2, 2, '[]');
                                               QUERY PLAN
--------------------------------------------------------------------------------------------------------
Seq Scan on integer_test (cost=0.00..34.01 rows=10 width=4) (actual time=0.585..1.136 rows=5 loops=1)
   Filter: (some_number <@ '[-2,3)'::int4range)
   Rows Removed by Filter: 1996
 Planning Time: 0.175 ms
 Execution Time: 1.164 ms
(5 rows)

But clearly, the planner is not able to use the btree index in the presence of the range operator. So I attempted to add support functions for the 'elem_contained_by_range' and 'range_contains_elem' operators (patch attached): That gives the following execution plan (applied on 26f7802beb2a4aafa0903f5bedeb7f1fa6f4f358):


QUERY PLAN
-------------------------------------------------------------------------------------------------------------------------------------------------
Index Only Scan using integer_test_some_number_idx on integer_test (cost=0.28..8.38 rows=10 width=4) (actual time=0.046..0.058 rows=5 loops=1)
   Index Cond: ((some_number >= '-2'::integer) AND (some_number < 3))
   Heap Fetches: 5
 Planning Time: 0.694 ms
 Execution Time: 0.114 ms
(5 rows)

That was what I was hoping to see (even though the row estimate is still a bit off). Unfortunately this only works for the trivial case where the range is actually a constant. The third query in the attached script (range_test.sql) produces the following plan, where the support function is not useful:

QUERY PLAN
-------------------------------------------------------------------------------------------------------------------------
Nested Loop (cost=0.14..419.56 rows=22 width=12) (actual time=3.791..36.549 rows=121 loops=1) Join Filter: (integer_test.some_number <@ int4range(number_q.one_number, number_q.another_number, '[]'::text))
   Rows Removed by Join Filter: 21890
   CTE number_q
-> Function Scan on generate_series (cost=0.00..0.14 rows=11 width=8) (actual time=0.063..0.076 rows=11 loops=1) -> CTE Scan on number_q (cost=0.00..0.22 rows=11 width=8) (actual time=0.071..0.107 rows=11 loops=1) -> Materialize (cost=0.00..39.02 rows=2001 width=4) (actual time=0.011..0.516 rows=2001 loops=11) -> Seq Scan on integer_test (cost=0.00..29.01 rows=2001 width=4) (actual time=0.077..1.043 rows=2001 loops=1)
 Planning Time: 3.172 ms
 Execution Time: 36.908 ms
(10 rows)

So my question here is, how to go about handling the more interesting cases, where we are passed a FuncExpr (instead of a Const)?
Is it even possible to return something useful in this case?

As far as I can tell, the support function is being passed a reference to the range constructor function when the range is not a constant. But I don't have the insight required to build opclauses that can handle non-constants.
Any thoughs or pointers on solving this?

	Thanks,
			Kim Johan Andersson
diff --git a/src/backend/utils/adt/rangetypes.c b/src/backend/utils/adt/rangetypes.c
index b09cb49054..4cb940dbbe 100644
--- a/src/backend/utils/adt/rangetypes.c
+++ b/src/backend/utils/adt/rangetypes.c
@@ -31,12 +31,18 @@
 #include "postgres.h"
 
 #include "access/tupmacs.h"
+#include "access/stratnum.h"
 #include "common/hashfn.h"
 #include "lib/stringinfo.h"
 #include "libpq/pqformat.h"
 #include "miscadmin.h"
+#include "nodes/supportnodes.h"
+#include "nodes/makefuncs.h"
+#include "nodes/nodeFuncs.h"
+#include "nodes/pg_list.h"
 #include "port/pg_bitutils.h"
 #include "utils/builtins.h"
+#include "utils/fmgroids.h"
 #include "utils/date.h"
 #include "utils/lsyscache.h"
 #include "utils/rangetypes.h"
@@ -547,7 +553,6 @@ elem_contained_by_range(PG_FUNCTION_ARGS)
 	PG_RETURN_BOOL(range_contains_elem_internal(typcache, r, val));
 }
 
-
 /* range, range -> bool functions */
 
 /* equality (internal version) */
@@ -2619,3 +2624,259 @@ datum_write(Pointer ptr, Datum datum, bool typbyval, char typalign,
 
 	return ptr;
 }
+
+
+/*
+ * find_index_conditions
+ *	  Try to generate an indexquals for an element contained in a range
+ *
+ * Supports both the ELEM_CONTAINED_BY_RANGE and RANGE_CONTAINS_ELEM cases
+ * 
+ */
+static List*
+find_index_conditions(Node* leftop,
+	Node* rightop,
+	int indexarg,
+	Oid funcid,
+	Oid opfamily)
+
+{
+	List*			result = NIL;
+	RangeType*		range;
+	TypeCacheEntry* rangetypcache;
+	RangeBound		lower;
+	RangeBound		upper;
+	bool			empty;
+	Const*			rangeConst;
+	Expr*			elemExpr;
+	Oid				elemType;
+	int16			elemTypeLen;
+	bool			elemByValue;
+	Oid				elemCollation;
+
+	switch (funcid)
+	{
+		case F_ELEM_CONTAINED_BY_RANGE:
+			/*
+			if (IsA(rightop, FuncExpr))
+			{
+				// What to do here?
+				FuncExpr* func = (FuncExpr*)rightop;
+				ListCell* lc;
+
+				Assert(list_length(func->args) == 3);
+
+				foreach(lc, func->args)
+				{
+					Expr* arg = (Expr*)lfirst(lc);
+					NodeTag nodeTag = nodeTag(arg);
+					arg = NULL;
+				}
+			}
+			*/
+
+			if (!IsA(rightop, Const) || ((Const*)rightop)->constisnull)
+				return NIL;
+
+			elemExpr = (Expr*)leftop;
+			rangeConst = (Const*)rightop;
+			break;
+
+		case F_RANGE_CONTAINS_ELEM:
+
+			if (!IsA(leftop, Const) || ((Const*)leftop)->constisnull)
+				return NIL;
+
+			elemExpr = (Expr*)rightop;
+			rangeConst = (Const*)leftop;
+			break;
+
+		default:
+			return NIL;
+	}
+
+	Assert(IsA(elemExpr, Var));
+	Assert(IsA(rangeConst, Const));
+
+	// We need to figure out what kind of elemType is in the range we are dealing with, and deserialize it to get the bounds.
+	range = DatumGetRangeTypeP(rangeConst->constvalue);
+	rangetypcache = lookup_type_cache(RangeTypeGetOid(range), TYPECACHE_RANGE_INFO);
+
+	elemType = rangetypcache->rngelemtype->type_id;
+	elemByValue = rangetypcache->rngelemtype->typbyval;
+	elemTypeLen = rangetypcache->rngelemtype->typlen;
+	elemCollation = rangetypcache->rngelemtype->typcollation;
+
+	range_deserialize(rangetypcache, range, &lower, &upper, &empty);
+
+	// The planner will call us for an empty range.
+	if (empty)
+		return NIL;
+
+	// We can't do anything useful with a bound if it is not finite.
+	if (!lower.infinite)
+	{
+		Oid oproid = get_opfamily_member(opfamily, elemType, elemType, lower.inclusive?BTGreaterEqualStrategyNumber:BTGreaterStrategyNumber);
+		if (oproid != InvalidOid)
+		{
+			Expr* expr = make_opclause
+			(
+				oproid,
+				BOOLOID,
+				false,
+				elemExpr,
+				(Expr*)makeConst
+				(
+					elemType,
+					-1,
+					elemCollation,
+					elemTypeLen,
+					lower.val,
+					false,
+					elemByValue
+				),
+				InvalidOid,
+				InvalidOid
+			);
+
+			result = lappend(result, expr);
+		}
+	}
+
+	if (!upper.infinite)
+	{
+		Oid oproid = get_opfamily_member(opfamily, elemType, elemType, upper.inclusive?BTLessEqualStrategyNumber:BTLessStrategyNumber);
+		if (oproid != InvalidOid)
+		{
+			Expr* expr = make_opclause
+			(
+				oproid,
+				BOOLOID,
+				false,
+				elemExpr,
+				(Expr*)makeConst
+				(
+					elemType,
+					-1,
+					elemCollation,
+					elemTypeLen,
+					upper.val,
+					false,
+					elemByValue
+				),
+				InvalidOid,
+				InvalidOid
+			);
+
+			result = lappend(result, expr);
+		}
+	}
+
+	return result;
+}
+
+Node * match_support_request(Node* rawreq)
+{
+	Node* ret = NULL;
+
+	if (IsA(rawreq, SupportRequestIndexCondition))
+	{
+		/* Try to convert operator/function call to index conditions */
+		SupportRequestIndexCondition* req = (SupportRequestIndexCondition*)rawreq;
+
+		if (is_opclause(req->node))
+		{
+			OpExpr* clause = (OpExpr*)req->node;
+
+			Assert(list_length(clause->args) == 2);
+
+			ret = (Node*)
+				find_index_conditions((Node*)linitial(clause->args),
+					(Node*)lsecond(clause->args),
+					req->indexarg,
+					req->funcid,
+					req->opfamily);
+
+		}
+		else if (is_funcclause(req->node))
+		{
+			FuncExpr* clause = (FuncExpr*)req->node;
+
+			Assert(list_length(clause->args) == 2);
+
+			ret = (Node*)
+				find_index_conditions((Node*)linitial(clause->args),
+					(Node*)lsecond(clause->args),
+					req->indexarg,
+					req->funcid,
+					req->opfamily);
+		}
+
+		// If matched, the index condition is exact.
+		if (ret != NULL)
+			req->lossy = false;
+	}
+	else if (IsA(rawreq, SupportRequestSimplify))
+	{
+		SupportRequestSimplify* req = (SupportRequestSimplify*)rawreq;
+		FuncExpr* clause = req->fcall;
+		Const* rangeConst;
+		RangeType* range;
+		Node*	leftop = (Node*)linitial(clause->args);
+		Node*	rightop = lsecond(clause->args);
+
+		switch (clause->funcid)
+		{
+		case F_ELEM_CONTAINED_BY_RANGE:
+
+			if (!IsA(rightop, Const) || ((Const*)rightop)->constisnull)
+				return ret;
+
+			rangeConst = (Const*)rightop;
+			break;
+
+		case F_RANGE_CONTAINS_ELEM:
+
+			if (!IsA(leftop, Const) || ((Const*)leftop)->constisnull)
+				return ret;
+
+			rangeConst = (Const*)leftop;
+			break;
+
+		default:
+			return ret;
+		}
+
+		// If the range is empty, then there can be no matches.
+		range = DatumGetRangeTypeP(rangeConst->constvalue);
+		char	flags = range_get_flags(range);
+		if (flags & RANGE_EMPTY)
+			ret = makeBoolConst(false, false);
+	}
+
+	return ret;
+}
+
+/*
+ * Planner support function for elem_contained_by_range operator
+ */
+Datum
+elem_contained_by_range_support(PG_FUNCTION_ARGS)
+{
+	Node* rawreq = (Node*)PG_GETARG_POINTER(0);
+	Node* ret = match_support_request(rawreq);
+
+	PG_RETURN_POINTER(ret);
+}
+
+/*
+ * Planner support function for range_contains_elem operator
+ */
+Datum
+range_contains_elem_support(PG_FUNCTION_ARGS)
+{
+	Node* rawreq = (Node*)PG_GETARG_POINTER(0);
+	Node* ret = match_support_request(rawreq);
+
+	PG_RETURN_POINTER(ret);
+}
diff --git a/src/include/catalog/pg_proc.dat b/src/include/catalog/pg_proc.dat
index a07e737a33..53a3f67ec5 100644
--- a/src/include/catalog/pg_proc.dat
+++ b/src/include/catalog/pg_proc.dat
@@ -10237,13 +10237,15 @@
   proargtypes => 'anyrange anyrange', prosrc => 'range_overlaps' },
 { oid => '3858',
   proname => 'range_contains_elem', prorettype => 'bool',
-  proargtypes => 'anyrange anyelement', prosrc => 'range_contains_elem' },
+  proargtypes => 'anyrange anyelement', prosrc => 'range_contains_elem',
+  prosupport => 'range_contains_elem_support' },
 { oid => '3859',
   proname => 'range_contains', prorettype => 'bool',
   proargtypes => 'anyrange anyrange', prosrc => 'range_contains' },
 { oid => '3860',
   proname => 'elem_contained_by_range', prorettype => 'bool',
-  proargtypes => 'anyelement anyrange', prosrc => 'elem_contained_by_range' },
+  proargtypes => 'anyelement anyrange', prosrc => 'elem_contained_by_range',
+  prosupport => 'elem_contained_by_range_support' },
 { oid => '3861',
   proname => 'range_contained_by', prorettype => 'bool',
   proargtypes => 'anyrange anyrange', prosrc => 'range_contained_by' },
@@ -10265,6 +10267,12 @@
 { oid => '3867',
   proname => 'range_union', prorettype => 'anyrange',
   proargtypes => 'anyrange anyrange', prosrc => 'range_union' },
+{ oid => '9998', descr => 'Planner support function for range_contains_elem operator',
+  proname => 'range_contains_elem_support', prorettype => 'internal',
+  proargtypes => 'internal', prosrc => 'range_contains_elem_support' },
+{ oid => '9999', descr => 'Planner support function for elem_contained_by_range operator',
+  proname => 'elem_contained_by_range_support', prorettype => 'internal',
+  proargtypes => 'internal', prosrc => 'elem_contained_by_range_support' },
 { oid => '4057',
   descr => 'the smallest range which includes both of the given ranges',
   proname => 'range_merge', prorettype => 'anyrange',
CREATE TABLE integer_test AS
(
	SELECT
		generate_series AS some_number
	FROM
		generate_series(-1000, 1000)
);
CREATE INDEX ON integer_test( some_number );
ANALYZE integer_test;

EXPLAIN ANALYZE SELECT some_number FROM integer_test WHERE some_number BETWEEN -2 AND 2;
EXPLAIN ANALYZE SELECT some_number FROM integer_test WHERE some_number <@ int4range(-2, 2, '[]');

EXPLAIN ANALYZE
WITH number_q AS MATERIALIZED
(
	SELECT
		-generate_series AS one_number,
		generate_series AS another_number
	FROM
		generate_series(0,10)
)
SELECT
	number_q.*,
	integer_test.some_number
FROM
	number_q
	JOIN integer_test ON (integer_test.some_number <@ int4range( number_q.one_number, number_q.another_number, '[]' ))
;

DROP TABLE integer_test;

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Index of Archives]     [Postgresql Jobs]     [Postgresql Admin]     [Postgresql Performance]     [Linux Clusters]     [PHP Home]     [PHP on Windows]     [Kernel Newbies]     [PHP Classes]     [PHP Databases]     [Postgresql & PHP]     [Yosemite]

  Powered by Linux