I would appreciate some help optimising the following query:
with
subject_journals as(
select A.sq
from isi.rissue A,
isi.rsc_joern_link C
WHERE
C.sc_id in
('d0963875-e438-4923-b3fa-f462e8975221',
'04e14284-09c8-421a-b1ad-c8238051601a',
'04e2189f-cd2a-44f0-b98d-52f6bb5dcd78',
'f5521c65-ec49-408a-9a42-8a69d47703cd',
'2e47ae2f-2c4d-433e-8bdf-9983eeeafc42',
'5d3639b1-04c2-4d94-a99a-5323277fd2b7')
AND
C.rj_id = A.uuid),
subject_articles as (
SELECT B.article_id as art_id
FROM
isi.isi_l1_publication B,
subject_journals A,
isi.ritem C
WHERE
A.sq = B.journal_id
AND
B.publication_year <= '2012'
AND
B.publication_year >= '2000'
AND
C.ut = B.article_id
AND
C.dt in ('@ Article','Review')
),
country_articles as (
SELECT A.art_id
FROM isi.art_country_link A
WHERE
A.countrycode = 'ZA')
select art_id from subject_articles
INTERSECT
select art_id from country_articles
Analyze explains shows that it is not using the indexes on both isi.isi_l1_publication and isi.ritem (both tables with more than 43 million records).:
"HashSetOp Intersect (cost=10778065.50..11227099.44 rows=200 width=48) (actual time=263120.868..263279.467 rows=4000 loops=1)"
" Output: "*SELECT* 1".art_id, (0)"
" Buffers: shared hit=627401 read=4347235, temp read=234498 written=234492"
" CTE subject_journals"
" -> Hash Join (cost=12846.55..17503.27 rows=28818 width=8) (actual time=99.762..142.439 rows=30291 loops=1)"
" Output: a.sq"
" Hash Cond: ((c.rj_id)::text = (a.uuid)::text)"
" Buffers: shared hit=12232"
" -> Bitmap Heap Scan on isi.rsc_joern_link c (cost=1020.92..5029.23 rows=28818 width=37) (actual time=4.238..15.806 rows=30291 loops=1)"
" Output: c.id, c.rj_id, c.sc_id"
" Recheck Cond: ((c.sc_id)::text = ANY ('{d0963875-e438-4923-b3fa-f462e8975221,04e14284-09c8-421a-b1ad-c8238051601a,04e2189f-cd2a-44f0-b98d-52f6bb5dcd78,f5521c65-ec49-408a-9a42-8a69d47703cd,2e47ae2f-2c4d-433e-8bdf-9983eeeafc42,5d3639b1-04c2-4 (...)"
" Buffers: shared hit=3516"
" -> Bitmap Index Scan on rsc_joern_link_sc_id_idx (cost=0.00..1013.72 rows=28818 width=0) (actual time=3.722..3.722 rows=30291 loops=1)"
" Index Cond: ((c.sc_id)::text = ANY ('{d0963875-e438-4923-b3fa-f462e8975221,04e14284-09c8-421a-b1ad-c8238051601a,04e2189f-cd2a-44f0-b98d-52f6bb5dcd78,f5521c65-ec49-408a-9a42-8a69d47703cd,2e47ae2f-2c4d-433e-8bdf-9983eeeafc42,5d3639b1-04 (...)"
" Buffers: shared hit=237"
" -> Hash (cost=10098.06..10098.06 rows=138206 width=45) (actual time=95.495..95.495 rows=138206 loops=1)"
" Output: a.sq, a.uuid"
" Buckets: 16384 Batches: 1 Memory Usage: 10393kB"
" Buffers: shared hit=8716"
" -> Seq Scan on isi.rissue a (cost=0.00..10098.06 rows=138206 width=45) (actual time=0.005..58.225 rows=138206 loops=1)"
" Output: a.sq, a.uuid"
" Buffers: shared hit=8716"
" CTE subject_articles"
" -> Merge Join (cost=9660996.21..9896868.27 rows=13571895 width=16) (actual time=229449.020..259557.073 rows=2513896 loops=1)"
" Output: b.article_id"
" Merge Cond: ((a.sq)::text = (b.journal_id)::text)"
" Buffers: shared hit=519891 read=4347235, temp read=234498 written=234492"
" -> Sort (cost=2711.01..2783.05 rows=28818 width=32) (actual time=224.901..230.615 rows=30288 loops=1)"
" Output: a.sq"
" Sort Key: a.sq"
" Sort Method: quicksort Memory: 2188kB"
" Buffers: shared hit=12232"
" -> CTE Scan on subject_journals a (cost=0.00..576.36 rows=28818 width=32) (actual time=99.764..152.459 rows=30291 loops=1)"
" Output: a.sq"
" Buffers: shared hit=12232"
" -> Materialize (cost=9658285.21..9722584.29 rows=12859816 width=24) (actual time=229223.851..253191.308 rows=14664245 loops=1)"
" Output: b.article_id, b.journal_id"
" Buffers: shared hit=507659 read=4347235, temp read=234498 written=234492"
" -> Sort (cost=9658285.21..9690434.75 rows=12859816 width=24) (actual time=229223.846..251142.167 rows=14072645 loops=1)"
" Output: b.article_id, b.journal_id"
" Sort Key: b.journal_id"
" Sort Method: external merge Disk: 467704kB"
" Buffers: shared hit=507659 read=4347235, temp read=234498 written=234492"
" -> Hash Join (cost=1474828.02..7876046.06 rows=12859816 width=24) (actual time=27181.734..91781.942 rows=14072645 loops=1)"
" Output: b.article_id, b.journal_id"
" Hash Cond: ((c.ut)::text = (b.article_id)::text)"
" Buffers: shared hit=507659 read=4347235, temp read=176031 written=176025"
" -> Seq Scan on isi.ritem c (cost=0.00..5071936.72 rows=29104515 width=16) (actual time=0.012..25529.577 rows=29182778 loops=1)"
" Output: c.ut"
" Filter: (((c.dt)::text = '@ Article'::text) OR ((c.dt)::text = 'Review'::text))"
" Buffers: shared hit=52128 read=4347235"
" -> Hash (cost=1111096.04..1111096.04 rows=19811758 width=24) (actual time=27176.450..27176.450 rows=19820997 loops=1)"
" Output: b.article_id, b.journal_id"
" Buckets: 1048576 Batches: 4 Memory Usage: 271177kB"
" Buffers: shared hit=455531, temp written=79848"
" -> Seq Scan on isi.isi_l1_publication b (cost=0.00..1111096.04 rows=19811758 width=24) (actual time=152.219..21215.614 rows=19820997 loops=1)"
" Output: b.article_id, b.journal_id"
" Filter: (((b.publication_year)::text < '2012'::text) AND ((b.publication_year)::text > '1999'::text))"
" Buffers: shared hit=455531"
" CTE country_articles"
" -> Bitmap Heap Scan on isi.art_country_link a (cost=6427.92..863693.95 rows=244534 width=16) (actual time=65.053..256.632 rows=205195 loops=1)"
" Output: a.art_id"
" Recheck Cond: ((a.countrycode)::text = 'ZA'::text)"
" Buffers: shared hit=107510"
" -> Bitmap Index Scan on art_country_link_countrycode_idx (cost=0.00..6366.79 rows=244534 width=0) (actual time=36.481..36.481 rows=205195 loops=1)"
" Index Cond: ((a.countrycode)::text = 'ZA'::text)"
" Buffers: shared hit=603"
" -> Append (cost=0.00..414492.87 rows=13816429 width=48) (actual time=229449.025..261565.050 rows=2719091 loops=1)"
" Buffers: shared hit=627401 read=4347235, temp read=234498 written=234492"
" -> Subquery Scan on "*SELECT* 1" (cost=0.00..407156.85 rows=13571895 width=48) (actual time=229449.025..260892.314 rows=2513896 loops=1)"
" Output: "*SELECT* 1".art_id, 0"
" Buffers: shared hit=519891 read=4347235, temp read=234498 written=234492"
" -> CTE Scan on subject_articles (cost=0.00..271437.90 rows=13571895 width=48) (actual time=229449.024..260423.294 rows=2513896 loops=1)"
" Output: subject_articles.art_id"
" Buffers: shared hit=519891 read=4347235, temp read=234498 written=234492"
" -> Subquery Scan on "*SELECT* 2" (cost=0.00..7336.02 rows=244534 width=48) (actual time=65.059..353.671 rows=205195 loops=1)"
" Output: "*SELECT* 2".art_id, 1"
" Buffers: shared hit=107510"
" -> CTE Scan on country_articles (cost=0.00..4890.68 rows=244534 width=48) (actual time=65.057..320.444 rows=205195 loops=1)"
" Output: country_articles.art_id"
" Buffers: shared hit=107510"
"Total runtime: 263466.781 ms"
with
subject_journals as(
select A.sq
from isi.rissue A,
isi.rsc_joern_link C
WHERE
C.sc_id in
('d0963875-e438-4923-b3fa-f462e8975221',
'04e14284-09c8-421a-b1ad-c8238051601a',
'04e2189f-cd2a-44f0-b98d-52f6bb5dcd78',
'f5521c65-ec49-408a-9a42-8a69d47703cd',
'2e47ae2f-2c4d-433e-8bdf-9983eeeafc42',
'5d3639b1-04c2-4d94-a99a-5323277fd2b7')
AND
C.rj_id = A.uuid),
subject_articles as (
SELECT B.article_id as art_id
FROM
isi.isi_l1_publication B,
subject_journals A,
isi.ritem C
WHERE
A.sq = B.journal_id
AND
B.publication_year <= '2012'
AND
B.publication_year >= '2000'
AND
C.ut = B.article_id
AND
C.dt in ('@ Article','Review')
),
country_articles as (
SELECT A.art_id
FROM isi.art_country_link A
WHERE
A.countrycode = 'ZA')
select art_id from subject_articles
INTERSECT
select art_id from country_articles
Analyze explains shows that it is not using the indexes on both isi.isi_l1_publication and isi.ritem (both tables with more than 43 million records).:
"HashSetOp Intersect (cost=10778065.50..11227099.44 rows=200 width=48) (actual time=263120.868..263279.467 rows=4000 loops=1)"
" Output: "*SELECT* 1".art_id, (0)"
" Buffers: shared hit=627401 read=4347235, temp read=234498 written=234492"
" CTE subject_journals"
" -> Hash Join (cost=12846.55..17503.27 rows=28818 width=8) (actual time=99.762..142.439 rows=30291 loops=1)"
" Output: a.sq"
" Hash Cond: ((c.rj_id)::text = (a.uuid)::text)"
" Buffers: shared hit=12232"
" -> Bitmap Heap Scan on isi.rsc_joern_link c (cost=1020.92..5029.23 rows=28818 width=37) (actual time=4.238..15.806 rows=30291 loops=1)"
" Output: c.id, c.rj_id, c.sc_id"
" Recheck Cond: ((c.sc_id)::text = ANY ('{d0963875-e438-4923-b3fa-f462e8975221,04e14284-09c8-421a-b1ad-c8238051601a,04e2189f-cd2a-44f0-b98d-52f6bb5dcd78,f5521c65-ec49-408a-9a42-8a69d47703cd,2e47ae2f-2c4d-433e-8bdf-9983eeeafc42,5d3639b1-04c2-4 (...)"
" Buffers: shared hit=3516"
" -> Bitmap Index Scan on rsc_joern_link_sc_id_idx (cost=0.00..1013.72 rows=28818 width=0) (actual time=3.722..3.722 rows=30291 loops=1)"
" Index Cond: ((c.sc_id)::text = ANY ('{d0963875-e438-4923-b3fa-f462e8975221,04e14284-09c8-421a-b1ad-c8238051601a,04e2189f-cd2a-44f0-b98d-52f6bb5dcd78,f5521c65-ec49-408a-9a42-8a69d47703cd,2e47ae2f-2c4d-433e-8bdf-9983eeeafc42,5d3639b1-04 (...)"
" Buffers: shared hit=237"
" -> Hash (cost=10098.06..10098.06 rows=138206 width=45) (actual time=95.495..95.495 rows=138206 loops=1)"
" Output: a.sq, a.uuid"
" Buckets: 16384 Batches: 1 Memory Usage: 10393kB"
" Buffers: shared hit=8716"
" -> Seq Scan on isi.rissue a (cost=0.00..10098.06 rows=138206 width=45) (actual time=0.005..58.225 rows=138206 loops=1)"
" Output: a.sq, a.uuid"
" Buffers: shared hit=8716"
" CTE subject_articles"
" -> Merge Join (cost=9660996.21..9896868.27 rows=13571895 width=16) (actual time=229449.020..259557.073 rows=2513896 loops=1)"
" Output: b.article_id"
" Merge Cond: ((a.sq)::text = (b.journal_id)::text)"
" Buffers: shared hit=519891 read=4347235, temp read=234498 written=234492"
" -> Sort (cost=2711.01..2783.05 rows=28818 width=32) (actual time=224.901..230.615 rows=30288 loops=1)"
" Output: a.sq"
" Sort Key: a.sq"
" Sort Method: quicksort Memory: 2188kB"
" Buffers: shared hit=12232"
" -> CTE Scan on subject_journals a (cost=0.00..576.36 rows=28818 width=32) (actual time=99.764..152.459 rows=30291 loops=1)"
" Output: a.sq"
" Buffers: shared hit=12232"
" -> Materialize (cost=9658285.21..9722584.29 rows=12859816 width=24) (actual time=229223.851..253191.308 rows=14664245 loops=1)"
" Output: b.article_id, b.journal_id"
" Buffers: shared hit=507659 read=4347235, temp read=234498 written=234492"
" -> Sort (cost=9658285.21..9690434.75 rows=12859816 width=24) (actual time=229223.846..251142.167 rows=14072645 loops=1)"
" Output: b.article_id, b.journal_id"
" Sort Key: b.journal_id"
" Sort Method: external merge Disk: 467704kB"
" Buffers: shared hit=507659 read=4347235, temp read=234498 written=234492"
" -> Hash Join (cost=1474828.02..7876046.06 rows=12859816 width=24) (actual time=27181.734..91781.942 rows=14072645 loops=1)"
" Output: b.article_id, b.journal_id"
" Hash Cond: ((c.ut)::text = (b.article_id)::text)"
" Buffers: shared hit=507659 read=4347235, temp read=176031 written=176025"
" -> Seq Scan on isi.ritem c (cost=0.00..5071936.72 rows=29104515 width=16) (actual time=0.012..25529.577 rows=29182778 loops=1)"
" Output: c.ut"
" Filter: (((c.dt)::text = '@ Article'::text) OR ((c.dt)::text = 'Review'::text))"
" Buffers: shared hit=52128 read=4347235"
" -> Hash (cost=1111096.04..1111096.04 rows=19811758 width=24) (actual time=27176.450..27176.450 rows=19820997 loops=1)"
" Output: b.article_id, b.journal_id"
" Buckets: 1048576 Batches: 4 Memory Usage: 271177kB"
" Buffers: shared hit=455531, temp written=79848"
" -> Seq Scan on isi.isi_l1_publication b (cost=0.00..1111096.04 rows=19811758 width=24) (actual time=152.219..21215.614 rows=19820997 loops=1)"
" Output: b.article_id, b.journal_id"
" Filter: (((b.publication_year)::text < '2012'::text) AND ((b.publication_year)::text > '1999'::text))"
" Buffers: shared hit=455531"
" CTE country_articles"
" -> Bitmap Heap Scan on isi.art_country_link a (cost=6427.92..863693.95 rows=244534 width=16) (actual time=65.053..256.632 rows=205195 loops=1)"
" Output: a.art_id"
" Recheck Cond: ((a.countrycode)::text = 'ZA'::text)"
" Buffers: shared hit=107510"
" -> Bitmap Index Scan on art_country_link_countrycode_idx (cost=0.00..6366.79 rows=244534 width=0) (actual time=36.481..36.481 rows=205195 loops=1)"
" Index Cond: ((a.countrycode)::text = 'ZA'::text)"
" Buffers: shared hit=603"
" -> Append (cost=0.00..414492.87 rows=13816429 width=48) (actual time=229449.025..261565.050 rows=2719091 loops=1)"
" Buffers: shared hit=627401 read=4347235, temp read=234498 written=234492"
" -> Subquery Scan on "*SELECT* 1" (cost=0.00..407156.85 rows=13571895 width=48) (actual time=229449.025..260892.314 rows=2513896 loops=1)"
" Output: "*SELECT* 1".art_id, 0"
" Buffers: shared hit=519891 read=4347235, temp read=234498 written=234492"
" -> CTE Scan on subject_articles (cost=0.00..271437.90 rows=13571895 width=48) (actual time=229449.024..260423.294 rows=2513896 loops=1)"
" Output: subject_articles.art_id"
" Buffers: shared hit=519891 read=4347235, temp read=234498 written=234492"
" -> Subquery Scan on "*SELECT* 2" (cost=0.00..7336.02 rows=244534 width=48) (actual time=65.059..353.671 rows=205195 loops=1)"
" Output: "*SELECT* 2".art_id, 1"
" Buffers: shared hit=107510"
" -> CTE Scan on country_articles (cost=0.00..4890.68 rows=244534 width=48) (actual time=65.057..320.444 rows=205195 loops=1)"
" Output: country_articles.art_id"
" Buffers: shared hit=107510"
"Total runtime: 263466.781 ms"
The index for those fields:
CREATE INDEX isi_l1_publication_publication_year_idx
ON isi.isi_l1_publication
USING btree
(publication_year COLLATE pg_catalog."default");
CREATE INDEX ritem_dt_idx
ON isi.ritem
USING btree
(dt COLLATE pg_catalog."default");
CREATE INDEX isi_l1_publication_publication_year_idx
ON isi.isi_l1_publication
USING btree
(publication_year COLLATE pg_catalog."default");
CREATE INDEX ritem_dt_idx
ON isi.ritem
USING btree
(dt COLLATE pg_catalog."default");
Regards
Johann
Johann
--
Because experiencing your loyal love is better than life itself,
my lips will praise you. (Psalm 63:3)