Search Postgresql Archives

Re: Merge Joins and Views

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



See attached -- I've simplified my actual database quite a bit, but this example shows the same results.

Thanks,
--Chris
--
-- Why does the optimizer insist on sorting a clustered table?
-- 
-- NOTE: This script requires 540 MB of disk space and about
-- 12 minutes to run (on my good old Sun-Blade-1000, at least).
--

SELECT version();

DROP VIEW IF EXISTS v;
DROP TABLE IF EXISTS a;
DROP TABLE IF EXISTS b;

CREATE TABLE a (
  id integer PRIMARY KEY,
  val float8 DEFAULT random()
);

CREATE TABLE b (
  id integer PRIMARY KEY,
  opt float8 DEFAULT CASE WHEN random() < .5 THEN random() END
);

CREATE VIEW v AS
  SELECT id, COALESCE(opt, 0) AS opt FROM b;

-------------------------------------------------

INSERT INTO a
  SELECT * FROM generate_series(1, 5000000);

INSERT INTO b
  SELECT * FROM generate_series(1, 3711523);

ANALYZE a;
ANALYZE b;

-- note the correlation for the id columns is 1
SELECT * FROM pg_stats
WHERE tablename IN ('a', 'b');

-------------------------------------------------

-- Example 1: left merge join over two index scans
EXPLAIN ANALYZE
SELECT * FROM a LEFT OUTER JOIN b ON (a.id = b.id);

-- Example 2: left merge join over an index scan and seqscan + sort
EXPLAIN ANALYZE
SELECT * FROM a LEFT OUTER JOIN v ON (a.id = v.id);

-- Example 3: tricks the optimizer into a right merge join
EXPLAIN ANALYZE
SELECT * FROM a LEFT OUTER JOIN (
  SELECT * FROM v ORDER BY id
) sub ON (a.id = sub.id);
                             version                              
------------------------------------------------------------------
 PostgreSQL 8.3.0 on sparc-sun-solaris2.8, compiled by GCC 2.95.2
(1 row)

DROP VIEW
DROP TABLE
DROP TABLE
CREATE TABLE
CREATE TABLE
CREATE VIEW
INSERT 0 5000000
INSERT 0 3711523
ANALYZE
ANALYZE
 schemaname | tablename | attname | null_frac | avg_width | n_distinct | most_common_vals | most_common_freqs |                                                                                              histogram_bounds                                                                                              | correlation 
------------+-----------+---------+-----------+-----------+------------+------------------+-------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+-------------
 public     | a         | id      |         0 |         4 |         -1 |                  |                   | {226,480817,946403,1463901,1905168,2486162,2964834,3411486,3947522,4446167,4996780}                                                                                                                        |           1
 public     | a         | val     |         0 |         8 |         -1 |                  |                   | {0.00023875804618001,0.0914572249166667,0.189253146760166,0.282982839271426,0.393971057608724,0.491479988675565,0.592469296883792,0.693580291699618,0.803486418910325,0.899317930918187,0.999949590768665} |  -0.0345742
 public     | b         | id      |         0 |         4 |         -1 |                  |                   | {2380,409226,804058,1186283,1525765,1874817,2199262,2566896,2939230,3316455,3709638}                                                                                                                       |           1
 public     | b         | opt     |  0.503667 |         8 |         -1 |                  |                   | {0.000438648741692305,0.0946335387416184,0.194745551329106,0.308890894055367,0.403113955631852,0.50895657017827,0.62006954383105,0.724281970411539,0.805469979997724,0.907830006908625,0.999940330628306}  |    0.034033
(4 rows)

                                                              QUERY PLAN                                                              
--------------------------------------------------------------------------------------------------------------------------------------
 Merge Left Join  (cost=0.00..330371.44 rows=5000180 width=24) (actual time=0.319..30850.276 rows=5000000 loops=1)
   Merge Cond: (a.id = b.id)
   ->  Index Scan using a_pkey on a  (cost=0.00..156882.50 rows=5000180 width=12) (actual time=0.244..12665.648 rows=5000000 loops=1)
   ->  Index Scan using b_pkey on b  (cost=0.00..114600.84 rows=3711012 width=12) (actual time=0.061..7336.846 rows=3711523 loops=1)
 Total runtime: 32191.735 ms
(5 rows)

                                                              QUERY PLAN                                                              
--------------------------------------------------------------------------------------------------------------------------------------
 Merge Left Join  (cost=560793.89..785842.02 rows=5000180 width=24) (actual time=23542.157..55124.203 rows=5000000 loops=1)
   Merge Cond: (a.id = b.id)
   ->  Index Scan using a_pkey on a  (cost=0.00..156882.50 rows=5000180 width=12) (actual time=0.282..12397.933 rows=5000000 loops=1)
   ->  Materialize  (cost=560793.89..607181.54 rows=3711012 width=12) (actual time=23541.845..31825.216 rows=3711523 loops=1)
         ->  Sort  (cost=560793.89..570071.42 rows=3711012 width=12) (actual time=23541.833..28215.551 rows=3711523 loops=1)
               Sort Key: b.id
               Sort Method:  external sort  Disk: 116056kB
               ->  Seq Scan on b  (cost=0.00..55326.12 rows=3711012 width=12) (actual time=0.073..4694.892 rows=3711523 loops=1)
 Total runtime: 56409.946 ms
(9 rows)

                                                             QUERY PLAN                                                              
-------------------------------------------------------------------------------------------------------------------------------------
 Merge Right Join  (cost=0.00..367481.56 rows=5000180 width=24) (actual time=1.794..28075.029 rows=5000000 loops=1)
   Merge Cond: (b.id = a.id)
   ->  Index Scan using b_pkey on b  (cost=0.00..114600.84 rows=3711012 width=12) (actual time=0.322..8132.872 rows=3711523 loops=1)
   ->  Index Scan using a_pkey on a  (cost=0.00..156882.50 rows=5000180 width=12) (actual time=1.457..9714.089 rows=5000000 loops=1)
 Total runtime: 29366.349 ms
(5 rows)

-- 
Sent via pgsql-general mailing list (pgsql-general@xxxxxxxxxxxxxx)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-general

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Index of Archives]     [Postgresql Jobs]     [Postgresql Admin]     [Postgresql Performance]     [Linux Clusters]     [PHP Home]     [PHP on Windows]     [Kernel Newbies]     [PHP Classes]     [PHP Books]     [PHP Databases]     [Postgresql & PHP]     [Yosemite]
  Powered by Linux