Maybe someone can explain this. The following SQL will reproduce our issue:
DROP TABLE IF EXISTS t1 CASCADE;
CREATE TABLE t1 (name text,
state text);
CREATE INDEX t1_name ON t1(name);
CREATE INDEX t1_state ON t1(state);
CREATE INDEX t1_name_state ON t1(name,state);
-- Create some sample data
DO $$
DECLARE
states text[] := array['UNKNOWN', 'TODO', 'DONE', 'UNKNOWN'];
BEGIN
FOR v IN 1..200000 LOOP
INSERT INTO t1 VALUES('user'||v, states[floor(random()*4)]);
INSERT INTO t1 VALUES('user'||v, states[floor(random()*4)]);
INSERT INTO t1 VALUES('user'||v, states[floor(random()*4)]);
INSERT INTO t1 VALUES('user'||v, states[floor(random()*4)]);
END LOOP;
END $$;
CREATE OR REPLACE FUNCTION state_to_int(state character varying) RETURNS integer
LANGUAGE plpgsql IMMUTABLE STRICT
AS $$BEGIN
IF state = 'UNKNOWN' THEN RETURN 0;
ELSIF state = 'TODO' THEN RETURN 1;
ELSIF state = 'DONE' THEN RETURN 2;
ELSIF state = 'NOT REQUIRED' THEN RETURN 3;
ELSE RAISE EXCEPTION 'state_to_int called with invalid state value';
END IF;
END;$$;
CREATE OR REPLACE FUNCTION int_to_state(state integer) RETURNS character varying
LANGUAGE plpgsql IMMUTABLE STRICT
AS $$BEGIN
IF state = 0 THEN RETURN 'UNKNOWN';
ELSIF state = 1 THEN RETURN 'TODO';
ELSIF state = 2 THEN RETURN 'DONE';
ELSIF state = 3 THEN RETURN 'NOT REQUIRED';
ELSE RAISE EXCEPTION 'int_to_state called with invalid state value';
END IF;
END;$$;
-- Why is this a lot slower
explain (analyse, buffers) select name, int_to_state(min(state_to_int(state))) as status from t1 group by t1.name;
-- Than this?
explain (analyze, buffers) select name, (array['UNKNOWN', 'TODO', 'DONE', 'NOT REQUIRED'])[min(
CASE state
WHEN 'UNKNOWN' THEN 0
WHEN 'TODO' THEN 1
WHEN 'DONE' THEN 2
WHEN 'NOT REQUIRED' THEN 3
END)] AS status from t1 group by t1.name;
-- This is also very much slower
explain (analyze, buffers) select name, (array['UNKNOWN', 'TODO', 'DONE', 'NOT REQUIRED'])[min(state_to_int(state))] AS status from t1 group by t1.name;
This was done on:
PostgreSQL 9.3.5 on x86_64-unknown-linux-gnu, compiled by gcc (Ubuntu 4.8.2-19ubuntu1) 4.8.2, 64-bit
We get results like this:
QUERY PLAN
-----------------------------------------------------------------------------------------------------------------------------------
GroupAggregate (cost=0.42..280042.62 rows=208120 width=15) (actual time=0.076..2439.066 rows=200000 loops=1)
Buffers: shared hit=53146
-> Index Scan using t1_name on t1 (cost=0.42..21931.42 rows=800000 width=15) (actual time=0.009..229.477 rows=800000 loops=1)
Buffers: shared hit=53146
Total runtime: 2460.860 ms
(5 rows)
QUERY PLAN
-----------------------------------------------------------------------------------------------------------------------------------
GroupAggregate (cost=0.42..36012.62 rows=208120 width=15) (actual time=0.017..559.384 rows=200000 loops=1)
Buffers: shared hit=53146
-> Index Scan using t1_name on t1 (cost=0.42..21931.42 rows=800000 width=15) (actual time=0.008..197.133 rows=800000 loops=1)
Buffers: shared hit=53146
Total runtime: 574.550 ms
(5 rows)
QUERY PLAN
-----------------------------------------------------------------------------------------------------------------------------------
GroupAggregate (cost=0.42..228012.62 rows=208120 width=15) (actual time=0.042..2089.367 rows=200000 loops=1)
Buffers: shared hit=53146
-> Index Scan using t1_name on t1 (cost=0.42..21931.42 rows=800000 width=15) (actual time=0.008..237.854 rows=800000 loops=1)
Buffers: shared hit=53146
Total runtime: 2111.004 ms
(5 rows)
We cannot change our table structure to reflect something more sensible. What we would really like to know is why using functions is so much slower than the unreadable method.
Regards
Ross