Re: Slow update on column that is part of exclusion constraint

Adam Brusselback <adambrusselback@xxxxxxxxx> · Thu, 14 Apr 2016 00:17:42 -0400

So fair enough, it does seem to be related to the lookup rather than maintenance on the index. I was misguided in my initial assumption.  
Spent quite a bit of time trying to come up with a self contained test, and it seems like I can't make it choose the GiST index unless I remove the regular btree index in my test case, though the opposite is true for my table in production.  Not really sure what that means as far as what I need to do though. I've tried a vacuum full, analyze, rebuild index, drop and re-add the constraint... It still uses that GiST index for this query.

Hell, a sequential scan is a ton faster even.

On Wed, Apr 13, 2016 at 2:54 PM, Evgeniy Shishkin <itparanoia@xxxxxxxxx> wrote:

> On 13 Apr 2016, at 20:14, Adam Brusselback <adambrusselback@xxxxxxxxx> wrote:

>

> Sorry, brain stopped working and I forgot to include the normal info.

>

> Postgres version: 9.5.1

> Hardware: 2 core, 4gb Digital Ocean virtual server

> OS: Debian

>

> explain analyze for an example update:

> 'Update on price_generated  (cost=32.45..644.83 rows=1 width=157) (actual time=29329.614..29329.614 rows=0 loops=1)'

> '  ->  Nested Loop  (cost=32.45..644.83 rows=1 width=157) (actual time=29329.608..29329.608 rows=0 loops=1)'

> '        ->  HashAggregate  (cost=32.04..34.35 rows=231 width=52) (actual time=1.137..2.090 rows=231 loops=1)'

> '              Group Key: pti.product_id, pti.company_id, pti.date_range'

> '              ->  Seq Scan on _prices_to_insert pti  (cost=0.00..30.31 rows=231 width=52) (actual time=0.060..0.678 rows=231 loops=1)'

> '        ->  Index Scan using price_generated_company_product_date_active_excl on price_generated  (cost=0.41..2.63 rows=1 width=151) (actual time=126.949..126.949 rows=0 loops=231)'

> '              Index Cond: (date_range = pti.date_range)'

> '              Filter: ((upper(active_range) IS NULL) AND (pti.product_id = product_id) AND (pti.company_id = company_id))'

> '              Rows Removed by Filter: 29460'

> 'Planning time: 3.134 ms'

> 'Execution time: 29406.717 ms'

Well, you see execution time of 30 seconds because there are 231 index lookups,

each taking 126 ms.

And that lookup is slow because of

Filter: ((upper(active_range) IS NULL) AND (pti.product_id = product_id) AND (pti.company_id = company_id))'

Can you provide self-containing example of update?

I don't see there (upper(active_range) IS NULL condition is coming from.

SET work_mem = '512 MB';

DROP TABLE IF EXISTS price_generated_test;
DROP TABLE IF EXISTS _prices_to_insert;

CREATE TEMPORARY TABLE price_generated_test
(
  price_generated_id uuid NOT NULL DEFAULT gen_random_uuid(),
  product_id uuid NOT NULL,
  company_id uuid NOT NULL,
  date_range daterange NOT NULL,
  average_price numeric NOT NULL,
  average_price_delivered numeric NOT NULL,
  low_price numeric NOT NULL,
  low_price_delivered numeric NOT NULL,
  high_price numeric NOT NULL,
  high_price_delivered numeric NOT NULL,
  uom_type_id uuid NOT NULL,
  active_range tstzrange NOT NULL DEFAULT tstzrange(now(), NULL::timestamp with time zone),
  CONSTRAINT price_generated_test_pkey PRIMARY KEY (price_generated_id),
  CONSTRAINT price_generated_test_company_product_date_active_excl EXCLUDE 
  USING gist (gosimple.hex_to_bigint(company_id::text) WITH =, gosimple.hex_to_bigint(product_id::text) WITH =, date_range WITH &&, active_range WITH &&)
);

CREATE INDEX idx_price_generated_test_prod_comp_date
  ON price_generated_test
  USING btree
  (product_id, company_id, date_range);

INSERT INTO price_generated_test (product_id, company_id, date_range, average_price, average_price_delivered, low_price, low_price_delivered, high_price, high_price_delivered, uom_type_id, active_range)
SELECT p.product_id, c.company_id, d.date_range, avg(pr.price), avg(pr.price + pr.delivery), min(pr.price), min(pr.price + pr.delivery), max(pr.price), max(pr.price + pr.delivery), u.uom_type_id, ar.active_range
FROM (
	SELECT gen_random_uuid() as product_id
	FROM generate_series(1, 200)
) p
CROSS JOIN (
	SELECT gen_random_uuid() as company_id
	FROM generate_series(1, 60)
) c
CROSS JOIN (
	SELECT daterange((now() + (generate_series || ' months')::interval)::date, (now() + (generate_series + 1 || ' months')::interval)::date) as date_range
	FROM generate_series(1, 24)
) d
CROSS JOIN (
	SELECT gen_random_uuid() as uom_type_id
	FROM generate_series(1, 1)
) u 
CROSS JOIN (
	SELECT random() * 15 as price, random() * 3 as delivery
	FROM generate_series(1, 15)
) pr
CROSS JOIN (
	SELECT tstzrange(now() - (generate_series || ' days')::interval, CASE WHEN now() - (generate_series || ' days')::interval = now() THEN null ELSE now() - (generate_series || ' days')::interval END, '[]') as active_range
	FROM generate_series(0, 10, 10)
) ar
GROUP BY p.product_id, c.company_id, d.date_range, u.uom_type_id, ar.active_range;

CREATE TEMPORARY TABLE _prices_to_insert AS
SELECT product_id, company_id, date_range, average_price * random() * 1.5 average_price, average_price_delivered * random() * 1.5 average_price_delivered, low_price, low_price_delivered, high_price, high_price_delivered, uom_type_id
FROM price_generated_test
WHERE active_range @> now()
ORDER BY random()
LIMIT 200;

BEGIN;
-- If this index exists this test case will use it for the update
DROP INDEX idx_price_generated_test_prod_comp_date;
-- If this is enabled it will choose a seq scan where my regular table still won't
SET enable_seqscan = false;

UPDATE price_generated_test
SET active_range = tstzrange(lower(active_range), now())
WHERE upper(price_generated_test.active_range) IS NULL
AND EXISTS (
	SELECT 1
	FROM _prices_to_insert pti
	WHERE price_generated_test.product_id = pti.product_id
	AND price_generated_test.company_id = pti.company_id
	AND price_generated_test.date_range = pti.date_range);
-- 
Sent via pgsql-performance mailing list (pgsql-performance@xxxxxxxxxxxxxx)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-performance