Search Postgresql Archives

Re: transitive pruning optimization on the right side of a join for partition tables

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hi,

On 1 October 2012 01:14, Tom Lane <tgl@xxxxxxxxxxxxx> wrote:
> "Waldo, Ethan" <ewaldo@xxxxxxxxxxxxxxxx> writes:
>> This query does a sequence scan and append across all the partition tables:
>> select "dates"."date_description" FROM "myfact" as "myfact", "dates" as "dates" where "myfact"."recorded_on_id" = "dates"."recorded_on_id" and "dates"."recorded_on_id" IN ('4617', '4618', '4619', '4620', '4621', '4622', '4623', '4624', '4625', '4626', '4627', '4628', '4629', '4630', '4631', '4632', '4633', '4634', '4635', '4636', '4637', '4638', '4639', '4640', '4641', '4642', '4643', '4644', '4645', '4646', '4647');
>
> When I try that in 9.1, I get a plan with inner indexscans for each
> child table; which, while not exactly what you're asking for, should
> perform well enough when the fact table is large enough that
> partitioning is actually a useful activity.

I do not have 9.1 handy but this is from 9.0.4:

db=# EXPLAIN select dates.datetime, myfact.recorded_on_id from myfact,
dates where myfact.recorded_on_id = dates.recorded_on_id and
dates.recorded_on_id in (1813, 1814);
                                         QUERY PLAN
--------------------------------------------------------------------------------------------
 Hash Join  (cost=33.46..86124.15 rows=2858831 width=16)
   Hash Cond: (public.myfact.recorded_on_id = public.dates.recorded_on_id)
   ->  Append  (cost=0.00..46245.73 rows=3001773 width=8)
         ->  Seq Scan on myfact  (cost=0.00..27.70 rows=1770 width=8)
         ->  Seq Scan on myfact_y2004w51 myfact  (cost=0.00..15406.01
rows=1000001 width=8)
         ->  Seq Scan on myfact_y2004w52 myfact  (cost=0.00..15406.01
rows=1000001 width=8)
         ->  Seq Scan on myfact_y2004w53 myfact  (cost=0.00..15406.01
rows=1000001 width=8)
   ->  Hash  (cost=33.21..33.21 rows=20 width=16)
         ->  Append  (cost=0.00..33.21 rows=20 width=16)
               ->  Seq Scan on dates  (cost=0.00..32.12 rows=18 width=16)
                     Filter: (recorded_on_id = ANY ('{1813,1814}'::bigint[]))
               ->  Seq Scan on dates_y2004w51 dates  (cost=0.00..1.09
rows=2 width=16)
                     Filter: (recorded_on_id = ANY ('{1813,1814}'::bigint[]))
(13 rows)

Postgres can't infer that myfact.recorded_on_id is in (1813, 1814)
from the join condition (myfact.recorded_on_id = dates.recorded_on_id)
hence all partitons are included (myfact_y2004w51, myfact_y2004w53,
myfact_y2004w53). Adding "myfact.recorded_on_id in (1813, 1814)"
creates much better plan:

db=# EXPLAIN select dates.datetime, myfact.recorded_on_id from myfact,
dates where myfact.recorded_on_id = dates.recorded_on_id and
dates.recorded_on_id in (1813, 1814) and myfact.recorded_on_id in
(1813, 1814);
                                                   QUERY PLAN
-----------------------------------------------------------------------------------------------------------------
 Hash Join  (cost=33.46..18878.72 rows=296673 width=16)
   Hash Cond: (public.myfact.recorded_on_id = public.dates.recorded_on_id)
   ->  Append  (cost=0.00..14710.38 rows=311507 width=8)
         ->  Seq Scan on myfact  (cost=0.00..32.12 rows=18 width=8)
               Filter: (recorded_on_id = ANY ('{1813,1814}'::bigint[]))
         ->  Bitmap Heap Scan on myfact_y2004w51 myfact
(cost=5378.64..14678.25 rows=311489 width=8)
               Recheck Cond: (recorded_on_id = ANY ('{1813,1814}'::bigint[]))
               ->  Bitmap Index Scan on myfact_y2004w51_recorded_on_id
 (cost=0.00..5300.77 rows=311489 width=0)
                     Index Cond: (recorded_on_id = ANY
('{1813,1814}'::bigint[]))
   ->  Hash  (cost=33.21..33.21 rows=20 width=16)
         ->  Append  (cost=0.00..33.21 rows=20 width=16)
               ->  Seq Scan on dates  (cost=0.00..32.12 rows=18 width=16)
                     Filter: (recorded_on_id = ANY ('{1813,1814}'::bigint[]))
               ->  Seq Scan on dates_y2004w51 dates  (cost=0.00..1.09
rows=2 width=16)
                     Filter: (recorded_on_id = ANY ('{1813,1814}'::bigint[]))
(15 rows)


--------------
create table myfact (
	id bigint not null,
	recorded_on_id bigint not null
);

create table myfact_y2004w51 (
	check (recorded_on_id >= 1812 and recorded_on_id <= 1818),
	primary key (id)
) inherits (myfact);
create table myfact_y2004w52 (
	check (recorded_on_id >= 1819 and recorded_on_id <= 1825),
	primary key (id)
) inherits (myfact);
create table myfact_y2004w53 (
	check (recorded_on_id >= 1826 and recorded_on_id <= 1832),
	primary key (id)
) inherits (myfact);

create table dates (
	datetime timestamp without time zone not null,
	recorded_on_id bigint not null
);

create table dates_y2004w51 (
	check (recorded_on_id >= 1812 and recorded_on_id <= 1818),
	primary key (datetime)
) inherits (dates);
create table dates_y2004w52 (
	check (recorded_on_id >= 1819 and recorded_on_id <= 1825),
	primary key (datetime)
) inherits (dates);
create table dates_y2004w53 (
	check (recorded_on_id >= 1826 and recorded_on_id <= 1832),
	primary key (datetime)
) inherits (dates);

insert into myfact_y2004w51 select s.i, (random() * 6 + 1812)::integer
from generate_series(0, 1000000, 1) as s(i);
insert into myfact_y2004w52 select s.i, (random() * 6 + 1819)::integer
from generate_series(0, 1000000, 1) as s(i);
insert into myfact_y2004w53 select s.i, (random() * 6 + 1826)::integer
from generate_series(0, 1000000, 1) as s(i);

insert into dates_y2004w51 select distinct on (recorded_on_id)
to_timestamp(recorded_on_id), recorded_on_id from myfact_y2004w51;
insert into dates_y2004w52 select distinct on (recorded_on_id)
to_timestamp(recorded_on_id), recorded_on_id from myfact_y2004w52;
insert into dates_y2004w53 select distinct on (recorded_on_id)
to_timestamp(recorded_on_id), recorded_on_id from myfact_y2004w53;

create index myfact_y2004w51_recorded_on_id on myfact_y2004w51(recorded_on_id);
create index myfact_y2004w52_recorded_on_id on myfact_y2004w52(recorded_on_id);
create index myfact_y2004w53_recorded_on_id on myfact_y2004w53(recorded_on_id);

create index dates_y2004w51_recorded_on_id on dates_y2004w51(recorded_on_id);
create index dates_y2004w52_recorded_on_id on dates_y2004w52(recorded_on_id);
create index dates_y2004w53_recorded_on_id on dates_y2004w53(recorded_on_id);

ANALYZE;ANALYZE;ANALYZE;ANALYZE;ANALYZE;

EXPLAIN select dates.datetime, myfact.recorded_on_id from myfact,
dates where myfact.recorded_on_id = dates.recorded_on_id and
dates.recorded_on_id in (1813, 1814);
EXPLAIN select dates.datetime, myfact.recorded_on_id from myfact,
dates where myfact.recorded_on_id = dates.recorded_on_id and
dates.recorded_on_id in (1813, 1814) and myfact.recorded_on_id in
(1813, 1814);
----------------------

-- 
Ondrej Ivanic
(ondrej.ivanic@xxxxxxxxx)


-- 
Sent via pgsql-general mailing list (pgsql-general@xxxxxxxxxxxxxx)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-general



[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Index of Archives]     [Postgresql Jobs]     [Postgresql Admin]     [Postgresql Performance]     [Linux Clusters]     [PHP Home]     [PHP on Windows]     [Kernel Newbies]     [PHP Classes]     [PHP Books]     [PHP Databases]     [Postgresql & PHP]     [Yosemite]
  Powered by Linux