View containing a recursive function

Mathieu De Zutter <mathieu@xxxxxxxxxxxx> · Mon, 01 Feb 2016 07:23:40 +0000

Hi all,
I have a recursive part in my database logic that I want to isolate and reuse as a view. I had found a blog that explained how move a function parameter into a view. The SQL is in attachment.
When I write a query based on that view with a fixed value (or values) for the (input) parameter, the planner does fine and only evaluates the function once.
However, when the value of the parameter should be deduced from something else, the planner doesn't understand that and will evaluate the function for each possible value.

Any pointers to what I'm doing wrong or on how to optimize it?

Attachment contains the queries and explain plans.

Thanks!

Kind regards,
Mathieu
CREATE OR REPLACE FUNCTION fn_covering_works(wid INTEGER)
  RETURNS TABLE(work_id INTEGER)
AS
  $$
  WITH RECURSIVE func(work_id) AS
  (
    SELECT wid
    UNION ALL
    SELECT ad.adapted_id
    FROM func f JOIN adaptation ad ON f.work_id = ad.original_id
  )
  SELECT work_id
  FROM func
  $$
LANGUAGE 'sql' ROWS 1 COST 10000;

CREATE OR REPLACE VIEW covering_works_r AS
  SELECT
    w.id                    AS work_id,
    fn_covering_works(w.id) AS covering_work_id
  FROM work w;

-- This one is fine

EXPLAIN ANALYZE
SELECT
  w.id,
  cw.covering_work_id
FROM work w
  JOIN covering_works_r cw ON cw.work_id = w.id
WHERE w.id = 4249;

  id  | covering_work_id 
------+------------------
 4249 |             4249
 4249 |           102813
 4249 |             4250
 4249 |            23551
 4249 |            68931
 4249 |            74836
 4249 |            76088
 4249 |           111423
 4249 |           112399
 4249 |           112502
 4249 |           112666
 4249 |           120640
 4249 |           126994
 4249 |           133918
 4249 |           139519
 4249 |           142989
 4249 |           149393
 4249 |           111424

"Nested Loop  (cost=0.58..33.64 rows=1 width=8) (actual time=0.334..0.424 rows=18 loops=1)"
"  ->  Index Only Scan using work_pkey on work w  (cost=0.29..4.31 rows=1 width=4) (actual time=0.021..0.021 rows=1 loops=1)"
"        Index Cond: (id = 4249)"
"        Heap Fetches: 0"
"  ->  Index Only Scan using work_pkey on work w_1  (cost=0.29..29.31 rows=1 width=4) (actual time=0.309..0.393 rows=18 loops=1)"
"        Index Cond: (id = 4249)"
"        Heap Fetches: 0"
"Total runtime: 0.457 ms"

-- This one is too slow, but should be as fast as the first query.
-- At first sight it seems right, but the condition w_1.id=4249 (=w.id) isn't pushed to the second index scan.

EXPLAIN ANALYZE
SELECT
  w.id,
  cw.covering_work_id
FROM work w
  JOIN covering_works_r cw ON cw.work_id = w.id
WHERE w.first_release_id = 4249;

  id  | covering_work_id 
------+------------------
 4249 |             4249
 4249 |           102813
 4249 |             4250
 4249 |            23551
 4249 |            68931
 4249 |            74836
 4249 |            76088
 4249 |           111423
 4249 |           112399
 4249 |           112502
 4249 |           112666
 4249 |           120640
 4249 |           126994
 4249 |           133918
 4249 |           139519
 4249 |           142989
 4249 |           149393
 4249 |           111424

"Nested Loop  (cost=0.58..1659529.05 rows=1 width=8) (actual time=30.075..995.889 rows=18 loops=1)"
"  Join Filter: (w.id = w_1.id)"
"  Rows Removed by Join Filter: 81228"
"  ->  Index Scan using work_first_release_idx on work w  (cost=0.29..8.31 rows=1 width=4) (actual time=0.009..0.009 rows=1 loops=1)"
"        Index Cond: (first_release_id = 4249)"
"  ->  Index Only Scan using work_pkey on work w_1  (cost=0.29..1658030.07 rows=66252 width=4) (actual time=0.185..981.054 rows=81246 loops=1)"
"        Heap Fetches: 0"
"Total runtime: 995.916 ms"

# select id, first_release_id from work w where id = 4249;
  id  | first_release_id 
------+------------------
 4249 |             4249

-- 
Sent via pgsql-performance mailing list (pgsql-performance@xxxxxxxxxxxxxx)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-performance