Thank you Adrian, Greg and Veem.
UPSERT approach execution time: 00:00:20.921343
UPSERT approach rows: 1000000
insert/update approach execution time: 00:00:15.53612
insert/update approach update rows : 500000
insert/update approach Insert rows: 500000
MERGE approach execution time: 00:00:14.884623
MERGE approach rows: 1000000
truncate load approach execution time: 00:00:07.428826
truncate load rows: 1000000
*************** UPSERT Testcase **********I tried writing a small routine to see how the performance differs in these four approaches i.e. Upsert VS traditional update+insert VS Merge vs Truncate+load.
Initially I was thinking Upsert will perform the same as Merge as the logic looks similar but it seems it's the worst performing among all, not sure why , yet to know the reason though. Truncate+ load seems to be the best performing among all. Hope i am doing it correctly. Please correct me if I'm wrong.
UPSERT approach execution time: 00:00:20.921343
UPSERT approach rows: 1000000
insert/update approach execution time: 00:00:15.53612
insert/update approach update rows : 500000
insert/update approach Insert rows: 500000
MERGE approach execution time: 00:00:14.884623
MERGE approach rows: 1000000
truncate load approach execution time: 00:00:07.428826
truncate load rows: 1000000
********* Routine ********
drop table source_tab;
drop table target_tab;
CREATE TABLE source_tab (
id SERIAL PRIMARY KEY,
column1 VARCHAR(100),
column2 VARCHAR(100)
);
-- Create target table
CREATE TABLE target_tab (
id SERIAL PRIMARY KEY,
column1 VARCHAR(100),
column2 VARCHAR(100)
);
INSERT INTO source_tab (column1, column2)
SELECT
'Value ' || i,
'Value ' || (i * 2)
FROM generate_series(1, 1000000) AS i;
INSERT INTO target_tab (column1, column2)
SELECT
'Value ' || i,
'Value ' || (i * 2)
FROM generate_series(1, 500000) AS i;
DO $$
DECLARE
start_time timestamp;
end_time timestamp;
rows_inserted integer:=0;
rows_updated integer:=0;
rows_upserted integer:=0;
rows_merged integer:=0;
BEGIN
-- Measure performance of UPSERT
start_time := clock_timestamp();
INSERT INTO target_tab (id, column1, column2)
SELECT id, column1, column2
FROM source_tab
ON CONFLICT (id) DO UPDATE
SET
column1 = EXCLUDED.column1,
column2 = EXCLUDED.column2;
get diagnostics rows_upserted=row_count;
end_time := clock_timestamp();
RAISE NOTICE 'UPSERT approach execution time: %', end_time - start_time;
RAISE NOTICE 'UPSERT approach rows: %', rows_upserted;
rollback;
END $$;
*************** Traditional Insert+update Testcase **********
drop table source_tab;
drop table target_tab;
CREATE TABLE source_tab (
id SERIAL PRIMARY KEY,
column1 VARCHAR(100),
column2 VARCHAR(100)
);
-- Create target table
CREATE TABLE target_tab (
id SERIAL PRIMARY KEY,
column1 VARCHAR(100),
column2 VARCHAR(100)
);
INSERT INTO source_tab (column1, column2)
SELECT
'Value ' || i,
'Value ' || (i * 2)
FROM generate_series(1, 1000000) AS i;
INSERT INTO target_tab (column1, column2)
SELECT
'Value ' || i,
'Value ' || (i * 2)
FROM generate_series(1, 500000) AS i;
DO $$
DECLARE
start_time timestamp;
end_time timestamp;
rows_inserted integer:=0;
rows_updated integer:=0;
rows_upserted integer:=0;
rows_merged integer:=0;
BEGIN
-- Measure performance of insert/update approach
start_time := clock_timestamp();
-- Update existing records
UPDATE target_tab AS t
SET
column1 = s.column1,
column2 = s.column2
FROM source_tab AS s
WHERE t.id = s.id;
get diagnostics rows_updated=row_count;
-- Insert new records
INSERT INTO target_tab (id, column1, column2)
SELECT s.id, s.column1, s.column2
FROM source_tab AS s
LEFT JOIN target_tab AS t ON s.id = t.id
WHERE t.id IS NULL;
get diagnostics rows_inserted=row_count;
end_time := clock_timestamp();
RAISE NOTICE 'insert/update approach execution time: %', end_time - start_time;
RAISE NOTICE 'insert/update approach update rows : %', rows_updated;
RAISE NOTICE 'insert/update approach Insert rows: %', rows_inserted;
rollback;
END $$;
*************** MERGE Testcase **********
drop table source_tab;
drop table target_tab;
CREATE TABLE source_tab (
id SERIAL PRIMARY KEY,
column1 VARCHAR(100),
column2 VARCHAR(100)
);
-- Create target table
CREATE TABLE target_tab (
id SERIAL PRIMARY KEY,
column1 VARCHAR(100),
column2 VARCHAR(100)
);
INSERT INTO source_tab (column1, column2)
SELECT
'Value ' || i,
'Value ' || (i * 2)
FROM generate_series(1, 1000000) AS i;
INSERT INTO target_tab (column1, column2)
SELECT
'Value ' || i,
'Value ' || (i * 2)
FROM generate_series(1, 500000) AS i;
DO $$
DECLARE
start_time timestamp;
end_time timestamp;
rows_inserted integer:=0;
rows_updated integer:=0;
rows_upserted integer:=0;
rows_merged integer:=0;
begin
start_time := clock_timestamp();
merge into
target_tab t
using source_tab s on
t. id = s. id
when matched then
update
set column1 = s.column1,
column2 = s.column2
when not matched then
insert
values (id, column1, column2);
get diagnostics rows_merged=row_count;
end_time := clock_timestamp();
RAISE NOTICE 'MERGE approach execution time: %', end_time - start_time;
RAISE NOTICE 'MERGE approach rows: %', rows_merged;
rollback;
END $$;
*************** Truncate+load Testcase **********
drop table source_tab;
drop table target_tab;
CREATE TABLE source_tab (
id SERIAL PRIMARY KEY,
column1 VARCHAR(100),
column2 VARCHAR(100)
);
-- Create target table
CREATE TABLE target_tab (
id SERIAL PRIMARY KEY,
column1 VARCHAR(100),
column2 VARCHAR(100)
);
INSERT INTO source_tab (column1, column2)
SELECT
'Value ' || i,
'Value ' || (i * 2)
FROM generate_series(1, 1000000) AS i;
INSERT INTO target_tab (column1, column2)
SELECT
'Value ' || i,
'Value ' || (i * 2)
FROM generate_series(1, 500000) AS i;
DO $$
DECLARE
start_time timestamp;
end_time timestamp;
rows_inserted integer:=0;
rows_updated integer:=0;
rows_upserted integer:=0;
rows_truncate_loaded integer:=0;
begin
start_time := clock_timestamp();
truncate table target_tab;
INSERT INTO target_tab (id, column1, column2)
SELECT s.id, s.column1, s.column2
FROM source_tab AS s;
get diagnostics rows_truncate_loaded=row_count;
end_time := clock_timestamp();
RAISE NOTICE 'truncate load approach execution time: %', end_time - start_time;
RAISE NOTICE 'truncate load rows: %', rows_truncate_loaded;
rollback;
END $$;