Re: Terribly slow query with very good plan?

Pavel Stehule <pavel.stehule@xxxxxxxxx> · Fri, 4 Feb 2022 10:23:03 +0100

pá 4. 2. 2022 v 10:11 odesílatel Les <nagylzs@xxxxxxxxx> napsal:
Hello,

I have a table that contains folders, and another one that contains files. 

Here are the table definitions. I have removed most of the columns because they are not important for this question. (There are lots of columns.)

CREATE TABLE media.oo_folder (
	id int8 NOT NULL,
is_active bool NOT NULL DEFAULT true,
	title text NOT NULL,
relpath text NOT NULL,
CONSTRAINT chk_no_slash CHECK (("position"(title, '/'::text) = 0)),
	CONSTRAINT oo_folder_chk_no_slash CHECK (("position"(title, '/'::text) = 0)),
	CONSTRAINT pk_oo_folder PRIMARY KEY (id),
CONSTRAINT fk_oo_folder_parent_id FOREIGN KEY (parent_id) REFERENCES media.oo_folder(id) ON DELETE CASCADE DEFERRABLE
);
CREATE INDEX oo_folder_idx_parent ON media.oo_folder USING btree (parent_id);
CREATE INDEX oo_folder_idx_relpath ON media.oo_folder USING btree (relpath);
CREATE UNIQUE INDEX uidx_oo_folder_active_title ON media.oo_folder USING btree (parent_id, title) WHERE is_active;

CREATE TABLE media.oo_file (
	id int8 NOT NULL,
is_active bool NOT NULL DEFAULT true,
title text NOT NULL,
	ext text NULL,
relpath text NOT NULL,
sha1 text NOT NULL,
CONSTRAINT chk_no_slash CHECK (("position"(title, '/'::text) = 0)),
	CONSTRAINT oo_file_chk_no_slash CHECK (("position"(title, '/'::text) = 0)),
	CONSTRAINT pk_oo_file PRIMARY KEY (id),
CONSTRAINT fk_oo_file_oo_folder_id FOREIGN KEY (oo_folder_id) REFERENCES media.oo_folder(id) ON DELETE CASCADE DEFERRABLE,
);
CREATE INDEX oo_file_idx_oo_folder_id ON media.oo_file USING btree (oo_folder_id);
CREATE INDEX oo_file_idx_relpath ON media.oo_file USING btree (relpath);
CREATE INDEX oo_file_idx_sha1 ON media.oo_file USING btree (sha1);
CREATE UNIQUE INDEX uidx_oo_file_active_title ON media.oo_file USING btree (oo_folder_id, title) WHERE is_active;

The "replath" field contains the path of the file/folder. For example: "/folder1/folder2/folder3/filename4.ext5".  The replath field is managed by triggers. There are about 1M rows for files and 600K folder rows in the database. The files are well distributed between folders, and there are only 45 root folders ( parent_id is null)

This query runs very fast:

EXPLAIN (ANALYZE, COSTS, VERBOSE, BUFFERS) select id, title from media.oo_folder f where f.parent_id is null

QUERY PLAN                                                                                                                                |
------------------------------------------------------------------------------------------------------------------------------------------+
Index Scan using oo_folder_idx_parent on media.oo_folder f  (cost=0.42..73.70 rows=20 width=25) (actual time=0.030..0.159 rows=45 loops=1)|
  Output: id, title                                                                                                                       |
  Index Cond: (f.parent_id IS NULL)                                                                                                       |
  Buffers: shared hit=40                                                                                                                  |
Planning Time: 0.123 ms                                                                                                                   |
Execution Time: 0.187 ms                                                                                                                  |

My task is to write a query that tells if a folder has any active file inside it - directly or in subfolders. Here is the query for that:

EXPLAIN (ANALYZE, COSTS, VERBOSE, BUFFERS)

select id, title,
	(exists (select f2.id from media.oo_file f2 where f2.relpath like f.relpath || '%')) as has_file
from media.oo_folder f where f.parent_id is null

QUERY PLAN                                                                                                                                                    |
--------------------------------------------------------------------------------------------------------------------------------------------------------------+
Index Scan using oo_folder_idx_parent on media.oo_folder f  (cost=0.42..488.02 rows=20 width=26) (actual time=713.419..25414.969 rows=45 loops=1)             |
  Output: f.id, f.title, (SubPlan 1)                                                                                                                          |
  Index Cond: (f.parent_id IS NULL)                                                                                                                           |
  Buffers: shared hit=7014170                                                                                                                                 |
  SubPlan 1                                                                                                                                                   |
    ->  Index Only Scan using oo_file_idx_relpath on media.oo_file f2  (cost=0.55..108499.27 rows=5381 width=0) (actual time=564.756..564.756 rows=0 loops=45)|
          Filter: (f2.relpath ~~ (f.relpath || '%'::text))                                                                                                    |
          Rows Removed by Filter: 792025                                                                                                                      |
          Heap Fetches: 768960                                                                                                                                |
          Buffers: shared hit=7014130                                                                                                                         |
Planning Time: 0.361 ms                                                                                                                                       |
Execution Time: 25415.088 ms                                                                                                                                  |

It also returns 45 rows, but in 25 seconds which is unacceptable. 

It I execute the "has_file" subquery for one specific relpath then it speeds up again, to < 1msec:

EXPLAIN (ANALYZE, COSTS, VERBOSE, BUFFERS)
select exists ( select id from media.oo_file of2  where relpath  like 'Felhasználók%')
QUERY PLAN                                                                                                                |
--------------------------------------------------------------------------------------------------------------------------+
Result  (cost=1.66..1.67 rows=1 width=1) (actual time=0.049..0.050 rows=1 loops=1)                                        |
  Output: $0                                                                                                              |
  Buffers: shared hit=2                                                                                                   |
  InitPlan 1 (returns $0)                                                                                                 |
    ->  Seq Scan on media.oo_file of2  (cost=0.00..144714.70 rows=86960 width=0) (actual time=0.044..0.044 rows=1 loops=1)|
          Filter: (of2.relpath ~~ 'Felhasználók%'::text)                                                                  |
          Rows Removed by Filter: 15                                                                                      |
          Buffers: shared hit=2                                                                                           |
Planning Time: 0.290 ms                                                                                                   |
Execution Time: 0.076 ms                                                                                                  |

In other words, I could write a pl/sql function with a nested loop instead of the problematic query, and it will be 1000 times faster.

What am I missing?

I don't understand how it is possible in the slow case  Rows Removed by Filter: 792025 (returns 0 row) and in the second case Rows Removed by Filter: 15 (returns 1 row).

It is strange.

Thanks,

   Laszlo