Re: Plan to support predicate push-down into subqueries with aggregates?

Adam Brusselback <adambrusselback@xxxxxxxxx> · Wed, 9 Mar 2016 12:25:18 -0500

I responded yesterday, but it seems to have gotten caught up because it was too big with the attachments... Here it is again.

Sorry about not posting correctly, hopefully I did it right this time.

So I wanted to see if Sql Server (2014) could handle this type of query differently than Postgres (9.5.1), so I got an instance of express installed and ported the test script to it.

I updated my Postgres script so the data is the same in each server.  The end result is Sql Server seems to be able to optimize all of these queries MUCH better than Postgres.
I disabled parallelism in Sql Server to make the comparison fair.

I've attached the explain analyze results for Postgres, and the execution plan for Sql Server (in picture form... don't know a better way)

Results are:
--Sql Server:15ms average
--Postgres: 6ms average
SELECT *
FROM header
INNER JOIN header_total
ON header.header_id = header_total.header_id
WHERE header.header_id = 26;

--Sql Server: 15ms average
--Postgres: 1250ms average
SELECT *
FROM header
INNER JOIN header_total
ON header.header_id = header_total.header_id
WHERE header.header_id < 27
AND header.header_id > 24;

--Sql Server: 567ms average
--Postgres: 1265ms average
SELECT *
FROM header
INNER JOIN header_total
ON header.header_id = header_total.header_id
WHERE header.description like '%5%';

--Sql Server: 15ms average
--Postgres: 1252ms average
SELECT *
FROM header_total
WHERE header_total.header_id IN (
SELECT header_id
FROM header
WHERE header.header_id < 27
AND header.header_id > 24);

Here are the sql server execution plans as links rather than attachments:
https://drive.google.com/file/d/0BzWRjbj6CQLeb29JZ0lMMnp4QTA/view?usp=sharing
https://drive.google.com/file/d/0BzWRjbj6CQLeM2t0MmZDdE03OHc/view?usp=sharing
https://drive.google.com/file/d/0BzWRjbj6CQLeV0hjRmM5NE9CTWc/view?usp=sharing
https://drive.google.com/file/d/0BzWRjbj6CQLeNmdlQWpHYU1BVHM/view?usp=sharing

query1:

'Nested Loop Left Join  (cost=13.22..1022.98 rows=1 width=125) (actual time=3.021..3.024 rows=1 loops=1)'
'  Join Filter: (header_1.header_id = detail_2.header_id)'
'  ->  Nested Loop Left Join  (cost=11.78..1013.93 rows=1 width=89) (actual time=2.978..2.981 rows=1 loops=1)'
'        Join Filter: (header_1.header_id = detail_1.header_id)'
'        ->  Nested Loop  (cost=0.55..4.99 rows=1 width=49) (actual time=0.028..0.030 rows=1 loops=1)'
'              ->  Index Scan using header_pkey on header  (cost=0.28..2.49 rows=1 width=33) (actual time=0.022..0.023 rows=1 loops=1)'
'                    Index Cond: (header_id = 26)'
'              ->  Index Scan using header_pkey on header header_1  (cost=0.28..2.49 rows=1 width=16) (actual time=0.002..0.003 rows=1 loops=1)'
'                    Index Cond: (header_id = 26)'
'        ->  GroupAggregate  (cost=11.23..1008.92 rows=1 width=32) (actual time=2.944..2.945 rows=1 loops=1)'
'              Group Key: detail_1.header_id'
'              ->  Bitmap Heap Scan on detail_1  (cost=11.23..999.21 rows=969 width=32) (actual time=0.301..2.067 rows=1000 loops=1)'
'                    Recheck Cond: (header_id = 26)'
'                    Heap Blocks: exact=1000'
'                    ->  Bitmap Index Scan on idx_detail_1_header_id  (cost=0.00..10.99 rows=969 width=0) (actual time=0.156..0.156 rows=1000 loops=1)'
'                          Index Cond: (header_id = 26)'
'  ->  GroupAggregate  (cost=1.44..9.01 rows=1 width=20) (actual time=0.033..0.033 rows=1 loops=1)'
'        Group Key: detail_2.header_id'
'        ->  Bitmap Heap Scan on detail_2  (cost=1.44..8.95 rows=7 width=20) (actual time=0.013..0.027 rows=7 loops=1)'
'              Recheck Cond: (header_id = 26)'
'              Heap Blocks: exact=7'
'              ->  Bitmap Index Scan on idx_detail_2_header_id  (cost=0.00..1.44 rows=7 width=0) (actual time=0.010..0.010 rows=7 loops=1)'
'                    Index Cond: (header_id = 26)'
'Planning time: 0.420 ms'
'Execution time: 3.127 ms'

query2:

'Hash Right Join  (cost=28367.90..28394.20 rows=3 width=129) (actual time=1245.897..1246.442 rows=2 loops=1)'
'  Hash Cond: (detail_1.header_id = header_1.header_id)'
'  ->  HashAggregate  (cost=28164.00..28176.50 rows=1000 width=32) (actual time=1235.943..1236.304 rows=1000 loops=1)'
'        Group Key: detail_1.header_id'
'        ->  Seq Scan on detail_1  (cost=0.00..18164.00 rows=1000000 width=32) (actual time=0.008..215.064 rows=1000000 loops=1)'
'  ->  Hash  (cost=203.86..203.86 rows=3 width=89) (actual time=9.895..9.895 rows=2 loops=1)'
'        Buckets: 1024  Batches: 1  Memory Usage: 9kB'
'        ->  Hash Right Join  (cost=177.58..203.86 rows=3 width=89) (actual time=8.835..9.891 rows=2 loops=1)'
'              Hash Cond: (detail_2.header_id = header_1.header_id)'
'              ->  HashAggregate  (cost=167.50..180.00 rows=1000 width=20) (actual time=8.743..9.407 rows=1000 loops=1)'
'                    Group Key: detail_2.header_id'
'                    ->  Seq Scan on detail_2  (cost=0.00..115.00 rows=7000 width=20) (actual time=0.008..2.092 rows=7000 loops=1)'
'              ->  Hash  (cost=10.04..10.04 rows=3 width=49) (actual time=0.048..0.048 rows=2 loops=1)'
'                    Buckets: 1024  Batches: 1  Memory Usage: 9kB'
'                    ->  Nested Loop  (cost=0.55..10.04 rows=3 width=49) (actual time=0.029..0.042 rows=2 loops=1)'
'                          ->  Index Scan using header_pkey on header  (cost=0.28..2.53 rows=3 width=33) (actual time=0.017..0.018 rows=2 loops=1)'
'                                Index Cond: ((header_id < 27) AND (header_id > 24))'
'                          ->  Index Scan using header_pkey on header header_1  (cost=0.28..2.49 rows=1 width=16) (actual time=0.005..0.005 rows=1 loops=2)'
'                                Index Cond: (header_id = header.header_id)'
'Planning time: 1.228 ms'
'Execution time: 1246.659 ms'

query3:

'Hash Right Join  (cost=28417.70..28448.72 rows=273 width=129) (actual time=1255.008..1256.036 rows=271 loops=1)'
'  Hash Cond: (detail_1.header_id = header_1.header_id)'
'  ->  HashAggregate  (cost=28164.00..28176.50 rows=1000 width=32) (actual time=1242.275..1242.657 rows=1000 loops=1)'
'        Group Key: detail_1.header_id'
'        ->  Seq Scan on detail_1  (cost=0.00..18164.00 rows=1000000 width=32) (actual time=0.004..209.026 rows=1000000 loops=1)'
'  ->  Hash  (cost=250.28..250.28 rows=273 width=89) (actual time=12.708..12.708 rows=271 loops=1)'
'        Buckets: 1024  Batches: 1  Memory Usage: 38kB'
'        ->  Hash Right Join  (cost=221.30..250.28 rows=273 width=89) (actual time=11.828..12.546 rows=271 loops=1)'
'              Hash Cond: (detail_2.header_id = header_1.header_id)'
'              ->  HashAggregate  (cost=167.50..180.00 rows=1000 width=20) (actual time=9.610..9.958 rows=1000 loops=1)'
'                    Group Key: detail_2.header_id'
'                    ->  Seq Scan on detail_2  (cost=0.00..115.00 rows=7000 width=20) (actual time=0.006..2.440 rows=7000 loops=1)'
'              ->  Hash  (cost=50.39..50.39 rows=273 width=49) (actual time=2.207..2.207 rows=271 loops=1)'
'                    Buckets: 1024  Batches: 1  Memory Usage: 31kB'
'                    ->  Hash Join  (cost=24.91..50.39 rows=273 width=49) (actual time=0.651..1.874 rows=271 loops=1)'
'                          Hash Cond: (header_1.header_id = header.header_id)'
'                          ->  Seq Scan on header header_1  (cost=0.00..19.00 rows=1000 width=16) (actual time=0.008..0.497 rows=1000 loops=1)'
'                          ->  Hash  (cost=21.50..21.50 rows=273 width=33) (actual time=0.609..0.609 rows=271 loops=1)'
'                                Buckets: 1024  Batches: 1  Memory Usage: 26kB'
'                                ->  Seq Scan on header  (cost=0.00..21.50 rows=273 width=33) (actual time=0.009..0.475 rows=271 loops=1)'
'                                      Filter: (description ~~ '%5%'::text)'
'                                      Rows Removed by Filter: 729'
'Planning time: 1.284 ms'
'Execution time: 1256.289 ms'

query4:

'Hash Right Join  (cost=28366.80..28393.10 rows=3 width=96) (actual time=1204.289..1204.851 rows=2 loops=1)'
'  Hash Cond: (detail_1.header_id = header.header_id)'
'  ->  HashAggregate  (cost=28164.00..28176.50 rows=1000 width=32) (actual time=1193.979..1194.338 rows=1000 loops=1)'
'        Group Key: detail_1.header_id'
'        ->  Seq Scan on detail_1  (cost=0.00..18164.00 rows=1000000 width=32) (actual time=0.004..202.247 rows=1000000 loops=1)'
'  ->  Hash  (cost=202.76..202.76 rows=3 width=56) (actual time=10.242..10.242 rows=2 loops=1)'
'        Buckets: 1024  Batches: 1  Memory Usage: 9kB'
'        ->  Hash Right Join  (cost=176.48..202.76 rows=3 width=56) (actual time=9.720..10.238 rows=2 loops=1)'
'              Hash Cond: (detail_2.header_id = header.header_id)'
'              ->  HashAggregate  (cost=167.50..180.00 rows=1000 width=20) (actual time=9.518..9.855 rows=1000 loops=1)'
'                    Group Key: detail_2.header_id'
'                    ->  Seq Scan on detail_2  (cost=0.00..115.00 rows=7000 width=20) (actual time=0.008..2.351 rows=7000 loops=1)'
'              ->  Hash  (cost=8.94..8.94 rows=3 width=16) (actual time=0.141..0.141 rows=2 loops=1)'
'                    Buckets: 1024  Batches: 1  Memory Usage: 9kB'
'                    ->  Nested Loop  (cost=0.55..8.94 rows=3 width=16) (actual time=0.051..0.133 rows=2 loops=1)'
'                          ->  Index Only Scan using header_pkey on header header_1  (cost=0.28..1.44 rows=3 width=4) (actual time=0.034..0.037 rows=2 loops=1)'
'                                Index Cond: ((header_id < 27) AND (header_id > 24))'
'                                Heap Fetches: 0'
'                          ->  Index Scan using header_pkey on header  (cost=0.28..2.49 rows=1 width=16) (actual time=0.008..0.040 rows=1 loops=2)'
'                                Index Cond: (header_id = header_1.header_id)'
'Planning time: 1.223 ms'
'Execution time: 1205.046 ms'
CREATE SCHEMA test;

SET search_path = 'test';

CREATE TABLE header (
  header_id serial primary key,
  description text not null,
  amount numeric not null
  );

CREATE TABLE detail_1 (
  detail_1_id serial primary key,
  header_id integer not null references header (header_id),
  quantity numeric not null,
  rate numeric not null
  );

CREATE TABLE detail_2 (
  detail_2_id serial primary key,
  header_id integer not null references header (header_id),
  amount numeric not null
  );

  INSERT INTO header (description, amount)
  SELECT 'header record ' || generate_series, random() * 100
  FROM generate_series(1, 1000);

  INSERT INTO detail_1 (header_id, quantity, rate)
  SELECT header_id, random() * 50, random() * 10
  FROM header
  INNER JOIN generate_series(1, 1000)
  ON true;

  INSERT INTO detail_2 (header_id, amount)
  SELECT header_id, random() * 120
  FROM header
  INNER JOIN generate_series(1, 7)
  ON true;

CREATE VIEW header_total AS 
SELECT header.header_id
, coalesce(detail_1.amount, 0) AS detail_1_amount
, coalesce(detail_1.detail_1_count, 0) AS detail_1_count
, coalesce(detail_2.amount, 0) AS detail_2_amount
, coalesce(detail_2.detail_2_count, 0) AS detail_2_count
, coalesce(detail_1.amount, 0) + coalesce(detail_2.amount, 0) as detail_total
, header.amount = coalesce(detail_1.amount, 0) + coalesce(detail_2.amount, 0) as balanced
FROM header
LEFT JOIN (
SELECT header_id
, sum(rate * quantity) as amount 
, count(detail_1_id) as detail_1_count
FROM detail_1
GROUP BY header_id
) detail_1
ON header.header_id = detail_1.header_id
LEFT JOIN (
SELECT header_id
, sum(amount) as amount 
, count(detail_2_id) as detail_2_count
FROM detail_2
GROUP BY header_id
) detail_2
ON header.header_id = detail_2.header_id;

CREATE INDEX idx_detail_1_header_id ON detail_1 (header_id);
CREATE INDEX idx_detail_2_header_id ON detail_2 (header_id);

VACUUM ANALYZE header;
VACUUM ANALYZE detail_1;
VACUUM ANALYZE detail_2;

--Quick, is able to push down because it's a simple equality check
SELECT *
FROM header
INNER JOIN header_total
USING (header_id)
WHERE header.header_id = 26;

--Slow, no pushdown
SELECT *
FROM header
INNER JOIN header_total
ON header.header_id = header_total.header_id
WHERE header.header_id < 27
AND header.header_id > 24;

--Slow, no pushdown
SELECT *
FROM header
INNER JOIN header_total
ON header.header_id = header_total.header_id
WHERE header.description like '%5%';

--Slow, no pushdown
SELECT *
FROM header_total
WHERE header_total.header_id IN (
SELECT header_id
FROM header
WHERE header.header_id < 27
AND header.header_id > 24);

DROP SCHEMA test CASCADE;
-- Want to make this a fair test with Postgres
sp_configure 'max degree of parallelism', 1;
GO
RECONFIGURE WITH OVERRIDE;
GO

CREATE TABLE header (
  header_id int identity(1, 1) primary key,
  description varchar(255) not null,
  amount numeric(20,10) not null
  );

CREATE TABLE detail_1 (
  detail_1_id int identity(1, 1) primary key,
  header_id int not null,
  quantity  numeric(20,10) not null,
  rate  numeric(20,10) not null
  );

CREATE TABLE detail_2 (
  detail_2_id int identity(1, 1) primary key,
  header_id int not null,
  amount  numeric(20,10) not null
  );

  with t as (
  select 1 as d
  union all
  select d + 1
  from t
  where d < 1000
  )
  INSERT INTO header (description, amount)
  SELECT 'header record ' + CAST(t.d AS varchar(255)), ABS(CAST(CAST(NEWID() AS VARBINARY) AS INT))
  FROM t
  OPTION (MAXRECURSION 10000);

  with t as (
  select 1 as d
  union all
  select d + 1
  from t
  where d < 1000
  )
  INSERT INTO detail_1 (header_id, quantity, rate)
  SELECT header_id, ABS(CAST(CAST(NEWID() AS VARBINARY) AS INT)) / 1000, ABS(CAST(CAST(NEWID() AS VARBINARY) AS INT)) / 10000
  FROM header
  INNER JOIN t
  ON 1=1
  OPTION (MAXRECURSION 10000);

  with t as (
  select 1 as d
  union all
  select d + 1
  from t
  where d < 7
  )
  INSERT INTO detail_2 (header_id, amount)
  SELECT header_id, ABS(CAST(CAST(NEWID() AS VARBINARY) AS INT)) / 1000
  FROM header
  INNER JOIN t
  ON 1=1
  OPTION (MAXRECURSION 10000);

  UPDATE detail_1 SET rate = rate / 1000, quantity = quantity / 50;
  UPDATE detail_2 SET amount = amount / 100;
  UPDATE header SET amount = amount / 100;

CREATE NONCLUSTERED INDEX idx_detail_1_header_id
ON [dbo].[detail_1] ([header_id]);

CREATE NONCLUSTERED INDEX idx_detail_2_header_id
ON [dbo].[detail_2] ([header_id]);

CREATE VIEW [dbo].[header_total]
AS
SELECT header.header_id
, coalesce(detail_1.amount, 0) AS detail_1_amount
, coalesce(detail_1.detail_1_count, 0) AS detail_1_count
, coalesce(detail_2.amount, 0) AS detail_2_amount
, coalesce(detail_2.detail_2_count, 0) AS detail_2_count
, coalesce(detail_1.amount, 0) + coalesce(detail_2.amount, 0) as detail_total
FROM header
LEFT JOIN (
SELECT header_id
, sum(rate * quantity) as amount 
, count(detail_1_id) as detail_1_count
FROM detail_1
GROUP BY header_id
) detail_1
ON header.header_id = detail_1.header_id
LEFT JOIN (
SELECT header_id
, sum(amount) as amount 
, count(detail_2_id) as detail_2_count
FROM detail_2
GROUP BY header_id
) detail_2
ON header.header_id = detail_2.header_id;

SELECT *
FROM header
INNER JOIN header_total
ON header.header_id = header_total.header_id
WHERE header.header_id = 26;

SELECT *
FROM header
INNER JOIN header_total
ON header.header_id = header_total.header_id
WHERE header.header_id < 27
AND header.header_id > 24;

SELECT *
FROM header
INNER JOIN header_total
ON header.header_id = header_total.header_id
WHERE header.description like '%5%';

SELECT *
FROM header_total
WHERE header_total.header_id IN (
SELECT header_id
FROM header
WHERE header.header_id < 27
AND header.header_id > 24);
-- 
Sent via pgsql-general mailing list (pgsql-general@xxxxxxxxxxxxxx)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-general