More benchmarking results are in with a comparison between cursors, arrays, and temporary tables for storing, using, and accessing data outside the stored procedure:
CREATE OR REPLACE FUNCTION Test_Init() RETURNS INTEGER AS
$BODY$
DECLARE
temp INTEGER;
BEGIN
DROP TABLE IF EXISTS test_table1 CASCADE;
CREATE TABLE test_table1 (
id SERIAL NOT NULL PRIMARY KEY,
junk_field1 INTEGER,
junk_field2 INTEGER,
junk_field3 INTEGER
) WITH (OIDS=FALSE);
DROP INDEX IF EXISTS test_table1_junk_field1_idx CASCADE;
DROP INDEX IF EXISTS test_table1_junk_field2_idx CASCADE;
DROP INDEX IF EXISTS test_table1_junk_field3_idx CASCADE;
FOR i IN 1..10000 LOOP
INSERT INTO test_table1 (junk_field1, junk_field2, junk_field3) VALUES
(i%10, i%20, i%30);
END LOOP;
CREATE INDEX test_table1_junk_field1_idx ON test_table1 USING btree (junk_field1);
CREATE INDEX test_table1_junk_field2_idx ON test_table1 USING btree (junk_field2);
CREATE INDEX test_table1_junk_field3_idx ON test_table1 USING btree (junk_field3);
DROP TABLE IF EXISTS test_table2 CASCADE;
CREATE TABLE test_table2 (
id SERIAL NOT NULL PRIMARY KEY,
junk_field1 INTEGER,
junk_field2 INTEGER,
junk_field3 INTEGER
) WITH (OIDS=FALSE);
DROP INDEX IF EXISTS test_table2_junk_field1_idx CASCADE;
DROP INDEX IF EXISTS test_table2_junk_field2_idx CASCADE;
DROP INDEX IF EXISTS test_table2_junk_field3_idx CASCADE;
FOR i IN 1..10000 LOOP
INSERT INTO test_table2 (junk_field1, junk_field2, junk_field3) VALUES
(i%15, i%25, i%35);
END LOOP;
CREATE INDEX test_table2_junk_field1_idx ON test_table2 USING btree (junk_field1);
CREATE INDEX test_table2_junk_field2_idx ON test_table2 USING btree (junk_field2);
CREATE INDEX test_table2_junk_field3_idx ON test_table2 USING btree (junk_field3);
RETURN 1;
END;
$BODY$
LANGUAGE plpgsql;
SELECT * FROM Test_Init();
DROP TYPE IF EXISTS test_row_type CASCADE;
CREATE TYPE test_row_type AS (
junk_field1 INTEGER,
junk_field2 INTEGER,
junk_field3 INTEGER
);
CREATE OR REPLACE FUNCTION Test1() RETURNS INTEGER AS
$BODY$
DECLARE
temp_row test_row_type;
cursresults test_row_type[];
curs SCROLL CURSOR IS
SELECT * FROM test_table1 WHERE junk_field1=8;
BEGIN
FOR temp IN curs LOOP
temp_row := temp;
cursresults := array_append(cursresults, temp_row);
END LOOP;
OPEN curs;
RETURN 1;
END;
$BODY$
LANGUAGE plpgsql;
CREATE OR REPLACE FUNCTION Test2() RETURNS INTEGER AS
$BODY$
DECLARE
cursresults test_row_type[];
cur SCROLL CURSOR IS
SELECT * FROM unnest(cursresults);
BEGIN
cursresults := array(SELECT (junk_field1, junk_field2, junk_field3)::test_row_type AS rec FROM test_table1 WHERE junk_field1=8);
OPEN cur;
RETURN 1;
END;
$BODY$
LANGUAGE plpgsql;
CREATE OR REPLACE FUNCTION Test3() RETURNS INTEGER AS
$BODY$
DECLARE
BEGIN
CREATE TEMPORARY TABLE results WITH (OIDS=FALSE) ON COMMIT DROP AS (
SELECT junk_field1, junk_field2, junk_field3 FROM test_table1 WHERE junk_field1=8
);
RETURN 1;
END;
$BODY$
LANGUAGE plpgsql;
CREATE OR REPLACE FUNCTION Test4() RETURNS INTEGER AS
$BODY$
DECLARE
cur SCROLL CURSOR IS
SELECT * FROM results;
BEGIN
CREATE TEMPORARY TABLE results WITH (OIDS=FALSE) ON COMMIT DROP AS (
SELECT junk_field1, junk_field2, junk_field3 FROM test_table1 WHERE junk_field1=8
);
OPEN cur;
RETURN 1;
END;
$BODY$
LANGUAGE plpgsql;
$BODY$
DECLARE
temp INTEGER;
BEGIN
DROP TABLE IF EXISTS test_table1 CASCADE;
CREATE TABLE test_table1 (
id SERIAL NOT NULL PRIMARY KEY,
junk_field1 INTEGER,
junk_field2 INTEGER,
junk_field3 INTEGER
) WITH (OIDS=FALSE);
DROP INDEX IF EXISTS test_table1_junk_field1_idx CASCADE;
DROP INDEX IF EXISTS test_table1_junk_field2_idx CASCADE;
DROP INDEX IF EXISTS test_table1_junk_field3_idx CASCADE;
FOR i IN 1..10000 LOOP
INSERT INTO test_table1 (junk_field1, junk_field2, junk_field3) VALUES
(i%10, i%20, i%30);
END LOOP;
CREATE INDEX test_table1_junk_field1_idx ON test_table1 USING btree (junk_field1);
CREATE INDEX test_table1_junk_field2_idx ON test_table1 USING btree (junk_field2);
CREATE INDEX test_table1_junk_field3_idx ON test_table1 USING btree (junk_field3);
DROP TABLE IF EXISTS test_table2 CASCADE;
CREATE TABLE test_table2 (
id SERIAL NOT NULL PRIMARY KEY,
junk_field1 INTEGER,
junk_field2 INTEGER,
junk_field3 INTEGER
) WITH (OIDS=FALSE);
DROP INDEX IF EXISTS test_table2_junk_field1_idx CASCADE;
DROP INDEX IF EXISTS test_table2_junk_field2_idx CASCADE;
DROP INDEX IF EXISTS test_table2_junk_field3_idx CASCADE;
FOR i IN 1..10000 LOOP
INSERT INTO test_table2 (junk_field1, junk_field2, junk_field3) VALUES
(i%15, i%25, i%35);
END LOOP;
CREATE INDEX test_table2_junk_field1_idx ON test_table2 USING btree (junk_field1);
CREATE INDEX test_table2_junk_field2_idx ON test_table2 USING btree (junk_field2);
CREATE INDEX test_table2_junk_field3_idx ON test_table2 USING btree (junk_field3);
RETURN 1;
END;
$BODY$
LANGUAGE plpgsql;
SELECT * FROM Test_Init();
DROP TYPE IF EXISTS test_row_type CASCADE;
CREATE TYPE test_row_type AS (
junk_field1 INTEGER,
junk_field2 INTEGER,
junk_field3 INTEGER
);
CREATE OR REPLACE FUNCTION Test1() RETURNS INTEGER AS
$BODY$
DECLARE
temp_row test_row_type;
cursresults test_row_type[];
curs SCROLL CURSOR IS
SELECT * FROM test_table1 WHERE junk_field1=8;
BEGIN
FOR temp IN curs LOOP
temp_row := temp;
cursresults := array_append(cursresults, temp_row);
END LOOP;
OPEN curs;
RETURN 1;
END;
$BODY$
LANGUAGE plpgsql;
CREATE OR REPLACE FUNCTION Test2() RETURNS INTEGER AS
$BODY$
DECLARE
cursresults test_row_type[];
cur SCROLL CURSOR IS
SELECT * FROM unnest(cursresults);
BEGIN
cursresults := array(SELECT (junk_field1, junk_field2, junk_field3)::test_row_type AS rec FROM test_table1 WHERE junk_field1=8);
OPEN cur;
RETURN 1;
END;
$BODY$
LANGUAGE plpgsql;
CREATE OR REPLACE FUNCTION Test3() RETURNS INTEGER AS
$BODY$
DECLARE
BEGIN
CREATE TEMPORARY TABLE results WITH (OIDS=FALSE) ON COMMIT DROP AS (
SELECT junk_field1, junk_field2, junk_field3 FROM test_table1 WHERE junk_field1=8
);
RETURN 1;
END;
$BODY$
LANGUAGE plpgsql;
CREATE OR REPLACE FUNCTION Test4() RETURNS INTEGER AS
$BODY$
DECLARE
cur SCROLL CURSOR IS
SELECT * FROM results;
BEGIN
CREATE TEMPORARY TABLE results WITH (OIDS=FALSE) ON COMMIT DROP AS (
SELECT junk_field1, junk_field2, junk_field3 FROM test_table1 WHERE junk_field1=8
);
OPEN cur;
RETURN 1;
END;
$BODY$
LANGUAGE plpgsql;
EXPLAIN ANALYZE SELECT * FROM Test1();
"Function Scan on test1 (cost=0.00..0.26 rows=1 width=4) (actual time=17.701..17.701 rows=1 loops=1)"
"Total runtime: 17.714 ms" -- Ouch
"Function Scan on test1 (cost=0.00..0.26 rows=1 width=4) (actual time=17.701..17.701 rows=1 loops=1)"
"Total runtime: 17.714 ms" -- Ouch
EXPLAIN ANALYZE SELECT * FROM Test2();
"Function Scan on test2 (cost=0.00..0.26 rows=1 width=4) (actual time=1.137..1.137 rows=1 loops=1)"
"Total runtime: 1.153 ms" -- Wow
"Total runtime: 1.153 ms" -- Wow
EXPLAIN ANALYZE SELECT * FROM Test3();
"Function Scan on test3 (cost=0.00..0.26 rows=1 width=4) (actual time=2.033..2.034 rows=1 loops=1)"
"Total runtime: 2.050 ms"
"Function Scan on test3 (cost=0.00..0.26 rows=1 width=4) (actual time=2.033..2.034 rows=1 loops=1)"
"Total runtime: 2.050 ms"
EXPLAIN ANALYZE SELECT * FROM Test4();
"Function Scan on test4 (cost=0.00..0.26 rows=1 width=4) (actual time=2.001..2.001 rows=1 loops=1)"
"Total runtime: 2.012 ms"
"Function Scan on test4 (cost=0.00..0.26 rows=1 width=4) (actual time=2.001..2.001 rows=1 loops=1)"
"Total runtime: 2.012 ms"
In each case, the results are available outside the stored procedure by either fetching from the cursor or selecting from the temporary table. Clearly, the temporary table takes a performance hit compared using arrays. Building an array with array append is horrendously inefficient. Unnesting an array is surprisingly efficient. As can be seen from Test3 and Test4, cursors have no detectable overhead for opening the cursor (at least in this example with 1000 result rows). It is unclear whether there is any difference at all from Test3 and Test4 for retrieving the data as I have no easy way right now to measure that accurately. However, since arrays+cursors are more efficient than anything having to do with temp tables, that is the way I will go. With the number of rows I am dealing with (which should always be less than 1,000 in the final returned result set), unnesting an array is much faster than building a temp table and selecting from it.
If anyone thinks I may have missed some important item in this testing, please let me know.
On Fri, Apr 23, 2010 at 8:39 PM, Eliot Gable <egable+pgsql-performance@xxxxxxxxx> wrote:
That's a good point. However, even after changing it, it is still 12ms with the function call verses 6ms without the extra function call. Though, it is worth noting that if you can make the function call be guaranteed to return the same results when used with the same input parameters, it ends up being faster (roughly 3ms in my test case) due to caching -- at least when executing it multiple times in a row like this. Unfortunately, I cannot take advantage of that, because in my particular use case, the chances of it being called again with the same input values within the cache lifetime of the results is close to zero. Add to that the fact that the function queries tables that could change between transactions (meaning the function is volatile) and it's a moot point. However, it is worth noting that for those people using a non-volatile function call multiple times in the same transaction with the same input values, there is no need to inline the function call.
On Fri, Apr 23, 2010 at 5:01 PM, Merlin Moncure <mmoncure@xxxxxxxxx> wrote:
On Fri, Apr 23, 2010 at 4:42 PM, Eliot Gable
<egable+pgsql-performance@xxxxxxxxx> wrote:
That's not a fair test. test2a() is a SRF which has higher overhead> To answer the question of whether calling a stored procedure adds any
> significant overhead, I built a test case and the short answer is that it
> seems that it does:
>
> CREATE OR REPLACE FUNCTION Test1() RETURNS INTEGER AS
> $BODY$
> DECLARE
> temp INTEGER;
> BEGIN
> FOR i IN 1..1000 LOOP
> SELECT 1 AS id INTO temp;
> END LOOP;
> RETURN 1;
> END;
> $BODY$
> LANGUAGE plpgsql;
>
> CREATE OR REPLACE FUNCTION Test2A() RETURNS SETOF INTEGER AS
> $BODY$
> DECLARE
> BEGIN
> RETURN QUERY SELECT 1 AS id;
> END;
> $BODY$
> LANGUAGE plpgsql;
>
> CREATE OR REPLACE FUNCTION Test2B() RETURNS INTEGER AS
> $BODY$
> DECLARE
> temp INTEGER;
> BEGIN
> FOR i IN 1..1000 LOOP
> temp := Test2A();
> END LOOP;
> RETURN 1;
> END;
> $BODY$
> LANGUAGE plpgsql;
>
>
> EXPLAIN ANALYZE SELECT * FROM Test1();
> "Function Scan on test1 (cost=0.00..0.26 rows=1 width=4) (actual
> time=6.568..6.569 rows=1 loops=1)"
> "Total runtime: 6.585 ms"
>
>
> EXPLAIN ANALYZE SELECT * FROM Test2B();
> "Function Scan on test2b (cost=0.00..0.26 rows=1 width=4) (actual
> time=29.006..29.007 rows=1 loops=1)"
> "Total runtime: 29.020 ms"
than regular function. Try it this way and the timings will level
out:
CREATE OR REPLACE FUNCTION Test2A() RETURNS INTEGER AS
$BODY$
DECLARE
BEGIN
merlinRETURN 1 ;
END;
$BODY$
LANGUAGE plpgsql ;
--
Eliot Gable
"We do not inherit the Earth from our ancestors: we borrow it from our children." ~David Brower
"I decided the words were too conservative for me. We're not borrowing from our children, we're stealing from them--and it's not even considered to be a crime." ~David Brower
"Esse oportet ut vivas, non vivere ut edas." (Thou shouldst eat to live; not live to eat.) ~Marcus Tullius Cicero
--
Eliot Gable
"We do not inherit the Earth from our ancestors: we borrow it from our children." ~David Brower
"I decided the words were too conservative for me. We're not borrowing from our children, we're stealing from them--and it's not even considered to be a crime." ~David Brower
"Esse oportet ut vivas, non vivere ut edas." (Thou shouldst eat to live; not live to eat.) ~Marcus Tullius Cicero