See attached -- I've simplified my actual database quite a bit, but this
example shows the same results.
Thanks,
--Chris
--
-- Why does the optimizer insist on sorting a clustered table?
--
-- NOTE: This script requires 540 MB of disk space and about
-- 12 minutes to run (on my good old Sun-Blade-1000, at least).
--
SELECT version();
DROP VIEW IF EXISTS v;
DROP TABLE IF EXISTS a;
DROP TABLE IF EXISTS b;
CREATE TABLE a (
id integer PRIMARY KEY,
val float8 DEFAULT random()
);
CREATE TABLE b (
id integer PRIMARY KEY,
opt float8 DEFAULT CASE WHEN random() < .5 THEN random() END
);
CREATE VIEW v AS
SELECT id, COALESCE(opt, 0) AS opt FROM b;
-------------------------------------------------
INSERT INTO a
SELECT * FROM generate_series(1, 5000000);
INSERT INTO b
SELECT * FROM generate_series(1, 3711523);
ANALYZE a;
ANALYZE b;
-- note the correlation for the id columns is 1
SELECT * FROM pg_stats
WHERE tablename IN ('a', 'b');
-------------------------------------------------
-- Example 1: left merge join over two index scans
EXPLAIN ANALYZE
SELECT * FROM a LEFT OUTER JOIN b ON (a.id = b.id);
-- Example 2: left merge join over an index scan and seqscan + sort
EXPLAIN ANALYZE
SELECT * FROM a LEFT OUTER JOIN v ON (a.id = v.id);
-- Example 3: tricks the optimizer into a right merge join
EXPLAIN ANALYZE
SELECT * FROM a LEFT OUTER JOIN (
SELECT * FROM v ORDER BY id
) sub ON (a.id = sub.id);
version
------------------------------------------------------------------
PostgreSQL 8.3.0 on sparc-sun-solaris2.8, compiled by GCC 2.95.2
(1 row)
DROP VIEW
DROP TABLE
DROP TABLE
CREATE TABLE
CREATE TABLE
CREATE VIEW
INSERT 0 5000000
INSERT 0 3711523
ANALYZE
ANALYZE
schemaname | tablename | attname | null_frac | avg_width | n_distinct | most_common_vals | most_common_freqs | histogram_bounds | correlation
------------+-----------+---------+-----------+-----------+------------+------------------+-------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-------------
public | a | id | 0 | 4 | -1 | | | {226,480817,946403,1463901,1905168,2486162,2964834,3411486,3947522,4446167,4996780} | 1
public | a | val | 0 | 8 | -1 | | | {0.00023875804618001,0.0914572249166667,0.189253146760166,0.282982839271426,0.393971057608724,0.491479988675565,0.592469296883792,0.693580291699618,0.803486418910325,0.899317930918187,0.999949590768665} | -0.0345742
public | b | id | 0 | 4 | -1 | | | {2380,409226,804058,1186283,1525765,1874817,2199262,2566896,2939230,3316455,3709638} | 1
public | b | opt | 0.503667 | 8 | -1 | | | {0.000438648741692305,0.0946335387416184,0.194745551329106,0.308890894055367,0.403113955631852,0.50895657017827,0.62006954383105,0.724281970411539,0.805469979997724,0.907830006908625,0.999940330628306} | 0.034033
(4 rows)
QUERY PLAN
--------------------------------------------------------------------------------------------------------------------------------------
Merge Left Join (cost=0.00..330371.44 rows=5000180 width=24) (actual time=0.319..30850.276 rows=5000000 loops=1)
Merge Cond: (a.id = b.id)
-> Index Scan using a_pkey on a (cost=0.00..156882.50 rows=5000180 width=12) (actual time=0.244..12665.648 rows=5000000 loops=1)
-> Index Scan using b_pkey on b (cost=0.00..114600.84 rows=3711012 width=12) (actual time=0.061..7336.846 rows=3711523 loops=1)
Total runtime: 32191.735 ms
(5 rows)
QUERY PLAN
--------------------------------------------------------------------------------------------------------------------------------------
Merge Left Join (cost=560793.89..785842.02 rows=5000180 width=24) (actual time=23542.157..55124.203 rows=5000000 loops=1)
Merge Cond: (a.id = b.id)
-> Index Scan using a_pkey on a (cost=0.00..156882.50 rows=5000180 width=12) (actual time=0.282..12397.933 rows=5000000 loops=1)
-> Materialize (cost=560793.89..607181.54 rows=3711012 width=12) (actual time=23541.845..31825.216 rows=3711523 loops=1)
-> Sort (cost=560793.89..570071.42 rows=3711012 width=12) (actual time=23541.833..28215.551 rows=3711523 loops=1)
Sort Key: b.id
Sort Method: external sort Disk: 116056kB
-> Seq Scan on b (cost=0.00..55326.12 rows=3711012 width=12) (actual time=0.073..4694.892 rows=3711523 loops=1)
Total runtime: 56409.946 ms
(9 rows)
QUERY PLAN
-------------------------------------------------------------------------------------------------------------------------------------
Merge Right Join (cost=0.00..367481.56 rows=5000180 width=24) (actual time=1.794..28075.029 rows=5000000 loops=1)
Merge Cond: (b.id = a.id)
-> Index Scan using b_pkey on b (cost=0.00..114600.84 rows=3711012 width=12) (actual time=0.322..8132.872 rows=3711523 loops=1)
-> Index Scan using a_pkey on a (cost=0.00..156882.50 rows=5000180 width=12) (actual time=1.457..9714.089 rows=5000000 loops=1)
Total runtime: 29366.349 ms
(5 rows)
--
Sent via pgsql-general mailing list (pgsql-general@xxxxxxxxxxxxxx)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-general