Re: Slow query with big tables

Pavel Stehule <pavel.stehule@xxxxxxxxx> · Sat, 27 Aug 2016 06:55:48 +0200

2016-08-26 22:26 GMT+02:00 Mike Sofen <msofen@xxxxxxxxxx>:

From: pgsql-performance-owner@postgresql.org [mailto:pgsql-performance-owner@xxxxxxxxxxxxxx] On Behalf Of Tommi K
Sent: Friday, August 26, 2016 7:25 AM
To: Craig James <cjames@xxxxxxxxxxxxxx>
Cc: andreas kretschmer <akretschmer@xxxxxxxxxxxxx>; pgsql-performance@postgresql.org
Subject: Re:  Slow query with big tables

Ok, sorry that I did not add the original message. I thought that it would be automatically added to the message thread.

Here is the question again:

Is there way to keep query time constant as the database size grows. Should I use partitioning or partial indexes?

try to disable nested_loop - there are bad estimations.

This query should not be fast - there are two ILIKE filters with negative impact on estimations.

Regards

Pavel

Thanks,
Tommi Kaksonen

> Hello, 
> 
> I have the following tables and query. I would like to get some help to find out why it is slow and how its performance could be improved.
> 
> Thanks,
> Tommi K.
> 
> 
> --Table definitions---
> CREATE TABLE "Measurement"
> (
>   id bigserial NOT NULL,
>   product_id bigserial NOT NULL,
>   nominal_data_id bigserial NOT NULL,
>   description text,
>   serial text,
>   measurement_time timestamp without time zone,
>   status smallint,
>   system_description text,
>   CONSTRAINT "Measurement_pkey" PRIMARY KEY (id),
>   CONSTRAINT "Measurement_nominal_data_id_fkey" FOREIGN KEY (nominal_data_id)
>       REFERENCES "Nominal_data" (id) MATCH SIMPLE
>       ON UPDATE NO ACTION ON DELETE NO ACTION,
>   CONSTRAINT "Measurement_product_id_fkey" FOREIGN KEY (product_id)
>       REFERENCES "Product" (id) MATCH SIMPLE
>       ON UPDATE NO ACTION ON DELETE NO ACTION
> )
> WITH (
>   OIDS=FALSE
> );
> 
> CREATE INDEX measurement_time_index
>   ON "Measurement"
>   USING btree
>   (measurement_time);
> ALTER TABLE "Measurement" CLUSTER ON measurement_time_index;
> 
> CREATE TABLE "Product"
> (
>   id bigserial NOT NULL,
>   name text,
>   description text,
>   info text,
>   system_name text,
>   CONSTRAINT "Product_pkey" PRIMARY KEY (id)
> )
> WITH (
>   OIDS=FALSE
> );
> 
> 
> CREATE TABLE "Extra_info"
> (
>   id bigserial NOT NULL,
>   measurement_id bigserial NOT NULL,
>   name text,
>   description text,
>   info text,
>   type text,
>   value_string text,
>   value_double double precision,
>   value_integer bigint,
>   value_bool boolean,
>   CONSTRAINT "Extra_info_pkey" PRIMARY KEY (id),
>   CONSTRAINT "Extra_info_measurement_id_fkey" FOREIGN KEY (measurement_id)
>       REFERENCES "Measurement" (id) MATCH SIMPLE
>       ON UPDATE NO ACTION ON DELETE NO ACTION
> )
> WITH (
>   OIDS=FALSE
> );
> 
> CREATE INDEX extra_info_measurement_id_index
>   ON "Extra_info"
>   USING btree
>   (measurement_id);
> 
> CREATE TABLE "Feature"
> (
>   id bigserial NOT NULL,
>   measurement_id bigserial NOT NULL,
>   name text,
>   description text,
>   info text,
>   CONSTRAINT "Feature_pkey" PRIMARY KEY (id),
>   CONSTRAINT "Feature_measurement_id_fkey" FOREIGN KEY (measurement_id)
>       REFERENCES "Measurement" (id) MATCH SIMPLE
>       ON UPDATE NO ACTION ON DELETE NO ACTION
> )
> WITH (
>   OIDS=FALSE
> );
> 
> CREATE INDEX feature_measurement_id_and_name_index
>   ON "Feature"
>   USING btree
>   (measurement_id, name COLLATE pg_catalog."default");
> 
> CREATE INDEX feature_measurement_id_index
>   ON "Feature"
>   USING hash
>   (measurement_id);
> 
> 
> CREATE TABLE "Point"
> (
>   id bigserial NOT NULL,
>   feature_id bigserial NOT NULL,
>   x double precision,
>   y double precision,
>   z double precision,
>   status_x smallint,
>   status_y smallint,
>   status_z smallint,
>   difference_x double precision,
>   difference_y double precision,
>   difference_z double precision,
>   CONSTRAINT "Point_pkey" PRIMARY KEY (id),
>   CONSTRAINT "Point_feature_id_fkey" FOREIGN KEY (feature_id)
>       REFERENCES "Feature" (id) MATCH SIMPLE
>       ON UPDATE NO ACTION ON DELETE NO ACTION
> )
> WITH (
>   OIDS=FALSE
> );
> 
> CREATE INDEX point_feature_id_index
>   ON "Point"
>   USING btree
>   (feature_id);
> 
> CREATE TABLE "Warning"
> (
>   id bigserial NOT NULL,
>   feature_id bigserial NOT NULL,
>   "number" smallint,
>   info text,
>   CONSTRAINT "Warning_pkey" PRIMARY KEY (id),
>   CONSTRAINT "Warning_feature_id_fkey" FOREIGN KEY (feature_id)
>       REFERENCES "Feature" (id) MATCH SIMPLE
>       ON UPDATE NO ACTION ON DELETE NO ACTION
> )
> WITH (
>   OIDS=FALSE
> );
> 
> CREATE INDEX warning_feature_id_index
>   ON "Warning"
>   USING btree
>   (feature_id);
> 
> 
> ---Query---
> SELECT
> f.name, 
> f.description,
> SUM(CASE WHEN p.status_x = 0 AND p.status_y = 0 AND p.status_z = 0 AND warning.id IS NULL THEN 1 ELSE 0 END) AS green_count, 
> SUM(CASE WHEN p.status_x = 0 AND p.status_y = 0 AND p.status_z = 0 AND warning.id IS NOT NULL THEN 1 ELSE 0 END) AS green_warned_count,
> SUM(CASE WHEN NOT (p.status_x = 0 AND p.status_y = 0 AND p.status_z = 0) AND NOT (p.status_x = 2 OR p.status_y = 2 OR p.status_z = 2) AND warning.id IS NULL THEN 1 ELSE 0 END) AS yellow_count, 
> SUM(CASE WHEN NOT (p.status_x = 0 AND p.status_y = 0 AND p.status_z = 0) AND NOT (p.status_x = 2 OR p.status_y = 2 OR p.status_z = 2) AND warning.id IS NOT NULL THEN 1 ELSE 0 END) AS yellow_warned_count, 
> SUM(CASE WHEN (p.status_x = 2 OR p.status_y = 2 OR p.status_z = 2) AND warning.id IS NULL THEN 1 ELSE 0 END) AS red_count, 
> SUM(CASE WHEN (p.status_x = 2 OR p.status_y = 2 OR p.status_z = 2) AND warning.id IS NOT NULL THEN 1 ELSE 0 END) AS red_warned_count, 
> SUM(CASE WHEN (p.status_x = 1000 OR p.status_y = 1000 OR p.status_z = 1000) AND warning.id IS NOT NULL THEN 1 ELSE 0 END) AS unable_to_measure_count 
> FROM "Point" p 
> JOIN "Feature" f ON f.id = p.feature_id
> JOIN "Measurement" measurement ON measurement.id = f.measurement_id 
> JOIN "Product" product ON product.id = measurement.product_id 
> LEFT JOIN "Warning" warning ON f.id = warning.feature_id
> WHERE (product.name ILIKE 'Part 1') AND 
> measurement.measurement_start_time >= '2015-06-18 17:00:00' AND 
> measurement.measurement_start_time <= '2015-06-18 18:00:00' AND 
> measurement.id NOT IN(SELECT measurement_id FROM "Extra_info" e 
> WHERE e.measurement_id = measurement.id AND e.description = 'Clamp' AND e.value_string ILIKE 'Clamped%')
>  GROUP BY f.name, f.description;
> 
> 
> ---Explain Analyze---
> GroupAggregate  (cost=1336999.08..1337569.18 rows=5562 width=33) (actual time=6223.622..6272.321 rows=255 loops=1)
>   Buffers: shared hit=263552 read=996, temp read=119 written=119
>   ->  Sort  (cost=1336999.08..1337012.98 rows=5562 width=33) (actual time=6223.262..6231.106 rows=26265 loops=1)
>         Sort Key: f.name, f.description
>         Sort Method: external merge  Disk: 936kB
>         Buffers: shared hit=263552 read=996, temp read=119 written=119
>         ->  Nested Loop Left Join  (cost=0.00..1336653.08 rows=5562 width=33) (actual time=55.792..6128.875 rows=26265 loops=1)
>               Buffers: shared hit=263552 read=996
>               ->  Nested Loop  (cost=0.00..1220487.17 rows=5562 width=33) (actual time=55.773..5910.852 rows=26265 loops=1)
>                     Buffers: shared hit=182401 read=954
>                     ->  Nested Loop  (cost=0.00..22593.53 rows=8272 width=27) (actual time=30.980..3252.869 rows=38831 loops=1)
>                           Buffers: shared hit=972 read=528
>                           ->  Nested Loop  (cost=0.00..657.24 rows=22 width=8) (actual time=0.102..109.577 rows=103 loops=1)
>                                 Join Filter: (measurement.product_id = product.id)
>                                 Rows Removed by Join Filter: 18
>                                 Buffers: shared hit=484 read=9
>                                 ->  Seq Scan on "Product" product  (cost=0.00..1.04 rows=1 width=8) (actual time=0.010..0.019 rows=1 loops=1)
>                                       Filter: (name ~~* 'Part 1'::text)
>                                       Rows Removed by Filter: 2
>                                       Buffers: shared hit=1
>                                 ->  Index Scan using measurement_start_time_index on "Measurement" measurement  (cost=0.00..655.37 rows=67 width=16) (actual time=0.042..109.416 rows=121 loops=1)
>                                       Index Cond: ((measurement_start_time >= '2015-06-18 17:00:00'::timestamp without time zone) AND (measurement_start_time <= '2015-06-18 18:00:00'::timestamp without time zone))
>                                       Filter: (NOT (SubPlan 1))
>                                       Buffers: shared hit=483 read=9
>                                       SubPlan 1
>                                         ->  Index Scan using extra_info_measurement_id_index on "Extra_info" e  (cost=0.00..9.66 rows=1 width=8) (actual time=0.900..0.900 rows=0 loops=121)
>                                               Index Cond: (measurement_id = measurement.id)
>                                               Filter: ((value_string ~~* 'Clamped%'::text) AND (description = 'Clamp'::text))
>                                               Rows Removed by Filter: 2
>                                               Buffers: shared hit=479 read=7
>                           ->  Index Scan using feature_measurement_id_and_name_index on "Feature" rf  (cost=0.00..993.40 rows=370 width=35) (actual time=28.152..30.407 rows=377 loops=103)
>                                 Index Cond: (measurement_id = measurement.id)
>                                 Buffers: shared hit=488 read=519
>                     ->  Index Scan using point_feature_id_index on "Point" p  (cost=0.00..144.80 rows=1 width=14) (actual time=0.067..0.067 rows=1 loops=38831)
>                           Index Cond: (feature_id = f.id)
>                           Buffers: shared hit=181429 read=426
>               ->  Index Scan using warning_feature_id_index on "Warning" warning  (cost=0.00..20.88 rows=1 width=16) (actual time=0.007..0.007 rows=0 loops=26265)
>                     Index Cond: (f.id = feature_id)
>                     Buffers: shared hit=81151 read=42
> Total runtime: 6273.312 ms
> 
> 
> ---Version---
> PostgreSQL 9.2.1, compiled by Visual C++ build 1600, 64-bit
> 
> 
> ---Table sizes---
> Extra_info          1223400 rows
> Feature                              185436000 rows
> Measurement     500000 rows
> Point                   124681000 rows
> Warning                            11766800 rows
> 
> ---Hardware---
> Intel Core i5-2320 CPU 3.00GHz (4 CPUs)
> 6GB Memory
> 64-bit Operating System (Windows 7 Professional)
> WD Blue 500GB HDD - 7200 RPM SATA 6 Gb/s 16MB Cache
> 
> ---History---
> Query gets slower as more data is added to the database
> 
> ---Maintenance---
> Autovacuum is used with default settings
> 
> Tommi Kaksonen <t2nn2t(at)gmail(dot)com> wrote:

I don’t see a reason to partition such small data.  What I do see is you attempting to run a big query on what looks like a small desktop pc.  6GB of ram, especially under Windows 7, isn’t enough ram for a database server.  Run the query on a normal small server of say 16gb and it should perform fine.  IMO.

Mike