Search Postgresql Archives

Re: a JOIN on same table, but 'slided over'

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 




	OK, check...

test=> CREATE TABLE test (id INTEGER PRIMARY KEY);
test=> INSERT INTO test SELECT random()*5 + n*10 FROM generate_series( 1,100000 ) AS n;
test=> SELECT * FROM test LIMIT 10;
  id
-----
  11
  23
  31
  41
  52
  63
  70
  85
  94
 103

test=> ANALYZE test;
ANALYZE

	-- Self Join 1

test=> EXPLAIN ANALYZE SELECT t1.id AS current_id, t2.id AS next_id
FROM test t1, test t2
WHERE t2.id = ( SELECT min(id) FROM test AS t3 WHERE t3.id > t1.id )
ORDER BY t1.id;
QUERY PLAN
-----------------------------------------------------------------------------------------------------------------------------------------------------------
Sort (cost=26703.19..26953.19 rows=100000 width=8) (actual time=5240.392..5271.529 rows=99999 loops=1)
   Sort Key: t1.id
-> Hash Join (cost=2691.00..18398.37 rows=100000 width=8) (actual time=106.588..5179.737 rows=99999 loops=1)
         Hash Cond: ((subplan) = t2.id)
-> Seq Scan on test t1 (cost=0.00..1441.00 rows=100000 width=4) (actual time=0.013..34.782 rows=100000 loops=1) -> Hash (cost=1441.00..1441.00 rows=100000 width=4) (actual time=106.420..106.420 rows=100000 loops=1) -> Seq Scan on test t2 (cost=0.00..1441.00 rows=100000 width=4) (actual time=0.007..43.077 rows=100000 loops=1)
         SubPlan
-> Result (cost=0.03..0.04 rows=1 width=0) (actual time=0.023..0.023 rows=1 loops=199999)
                 InitPlan
-> Limit (cost=0.00..0.03 rows=1 width=4) (actual time=0.021..0.022 rows=1 loops=199999) -> Index Scan using test_pkey on test t3 (cost=0.00..1029.59 rows=33333 width=4) (actual time=0.020..0.020 rows=1 loops=199999)
                               Index Cond: (id > $0)
                               Filter: (id IS NOT NULL)
 Total runtime: 5295.677 ms

	-- Self Join 2

test=> set enable_hashjoin TO 0;
test=> EXPLAIN ANALYZE SELECT t1.id AS current_id, t2.id AS next_id
FROM test t1, test t2
WHERE t2.id = ( SELECT min(id) FROM test AS t3 WHERE t3.id > t1.id )
ORDER BY t1.id;
QUERY PLAN
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------
Sort (cost=30806.48..31056.48 rows=100000 width=8) (actual time=2876.249..2903.011 rows=99999 loops=1)
   Sort Key: t1.id
-> Merge Join (cost=9745.82..22501.66 rows=100000 width=8) (actual time=2547.830..2820.347 rows=99999 loops=1)
         Merge Cond: (t2.id = "inner"."?column2?")
-> Index Scan using test_pkey on test t2 (cost=0.00..2828.26 rows=100000 width=4) (actual time=0.035..67.747 rows=100000 loops=1) -> Sort (cost=9745.82..9995.82 rows=100000 width=4) (actual time=2547.779..2582.889 rows=100000 loops=1)
               Sort Key: (subplan)
-> Seq Scan on test t1 (cost=0.00..1441.00 rows=100000 width=4) (actual time=0.060..2487.728 rows=100000 loops=1)
                     SubPlan
-> Result (cost=0.03..0.04 rows=1 width=0) (actual time=0.023..0.023 rows=1 loops=100000)
                             InitPlan
-> Limit (cost=0.00..0.03 rows=1 width=4) (actual time=0.021..0.022 rows=1 loops=100000) -> Index Scan using test_pkey on test t3 (cost=0.00..1029.59 rows=33333 width=4) (actual time=0.020..0.020 rows=1 loops=100000)
                                           Index Cond: (id > $0)
                                           Filter: (id IS NOT NULL)
 Total runtime: 2923.804 ms

	-- DISTINCT ON

test=> EXPLAIN SELECT DISTINCT ON (t1.id) t1.id AS current_id, t2.id AS next_id
FROM test t1 JOIN test t2 ON t2.id > t1.id
ORDER BY t1.id, t2.id;
                                           QUERY PLAN
-------------------------------------------------------------------------------------------------
 Unique  (cost=729806679.75..746473346.41 rows=100000 width=8)
   ->  Sort  (cost=729806679.75..738140013.08 rows=3333333333 width=8)
         Sort Key: t1.id, t2.id
         ->  Nested Loop  (cost=0.00..100028973.00 rows=3333333333 width=8)
-> Seq Scan on test t1 (cost=0.00..1441.00 rows=100000 width=4) -> Index Scan using test_pkey on test t2 (cost=0.00..583.61 rows=33333 width=4)
                     Index Cond: (t2.id > t1.id)
(7 lignes)

	This one takes much longer (I interrupted it).

	-- Using a function

CREATE TYPE test_type AS ( current_id INTEGER, next_id INTEGER );

CREATE OR REPLACE FUNCTION testfunc( )
    RETURNS SETOF test_type
    LANGUAGE plpgsql
    AS
$$
DECLARE
	_row		test_type;
BEGIN
	_row.current_id = NULL;
	
	FOR _row.next_id IN SELECT id FROM test ORDER BY id LOOP
		IF _row.current_id IS NOT NULL THEN
			RETURN NEXT _row;
		END IF;
		_row.current_id = _row.next_id;
	END LOOP;
END;
$$;

test=> EXPLAIN ANALYZE SELECT * FROM testfunc();
                                                     QUERY PLAN
--------------------------------------------------------------------------------------------------------------------
Function Scan on testfunc (cost=0.00..12.50 rows=1000 width=8) (actual time=211.702..238.322 rows=100000 loops=1)
 Total runtime: 262.369 ms

	Same results, at least 10x faster on large datasets...









[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Index of Archives]     [Postgresql Jobs]     [Postgresql Admin]     [Postgresql Performance]     [Linux Clusters]     [PHP Home]     [PHP on Windows]     [Kernel Newbies]     [PHP Classes]     [PHP Books]     [PHP Databases]     [Postgresql & PHP]     [Yosemite]
  Powered by Linux