Re: Outer joins and equivalence

Simon Riggs <simon@xxxxxxxxxxxxxxx> · Mon, 02 Jun 2008 20:47:11 +0100

On Mon, 2008-06-02 at 18:10 +0100, Simon Riggs wrote:
> On Tue, 2008-05-27 at 17:43 -0400, Tom Lane wrote:
> > Simon Riggs <simon@xxxxxxxxxxxxxxx> writes:
> > > I have a complex query where making a small change to the SQL increases
> > > run-time by > 1000 times.
> > 
> > > The first SQL statement is of the form
> > 
> > > A JOIN B ON (a.id = b.id) LEFT JOIN C ON (a.id = c.id) 
> > 
> > > and the second is like this
> > 
> > > A JOIN B ON (a.id = b.id) LEFT JOIN C ON (b.id = c.id)
> > 
> > > the only difference is the substitution of a -> b
> > 
> > Please provide an actual test case.
> 
> Getting closer, but still not able to produce a moveable test case.

I've got a test case which shows something related and weird, though not
the exact case.

The queries shown here have significantly different costs, depending
upon whether we use tables a or b in the query. Since a and b are
equivalent this result isn't expected at all.

I suspect the plan variation in the original post is somehow cost
related and we are unlikely to discover the exact plan.

-- 
 Simon Riggs           www.2ndQuadrant.com
 PostgreSQL Training, Services and Support
drop table if exists a cascade;
create table a
(id			integer		not null primary key
,partition	integer
,filler		text
);

create table a_p1 as
select generate_series(1, 10000000)::integer as id, 1::integer as partition, repeat('a',100) as filler;

alter table a_p1 add primary key (id);
alter table a_p1 inherit a; 
alter table a_p1 add check ( partition = 1);

create table a_p2 as
select generate_series(1000000000,1001000000)::integer as id, 2::integer as partition, repeat('a',100) as filler;

alter table a_p2 add primary key (id);
alter table a_p2 inherit a; 
alter table a_p2 add check ( partition = 2);

drop table if exists b cascade;
create table b
(id 		integer		not null primary key
,filler		text
);

drop table if exists c cascade;
create table c
(id			integer		not null primary key
,othercol	integer
);

create index c_idx on c (othercol);

insert into c
select generate_series(1,24667), 0;
insert into c
select generate_series(25000, 27488), 1;

explain select count(*) from a join b on (a.id = b.id)
left join c on (a.id = c.id and c.othercol = 1);

explain select count(*) from a join b on (a.id = b.id)
left join c on (b.id = c.id and c.othercol = 1);

set enable_seqscan = off;

explain select count(*) from a join b on (a.id = b.id)
left join c on (a.id = c.id and c.othercol = 1);

explain select count(*) from a join b on (a.id = b.id)
left join c on (b.id = c.id and c.othercol = 1);