Hi Michael
This is the script and explain plan info, please check, seems Filter remove more records took more time
DO $MAIN$
DECLARE
affect_count integer := 1000;
processed_row_count integer := 0;
BEGIN
LOOP
exit
WHEN affect_count = 0;
UPDATE
app gaa
SET
deleted_at = (
SELECT
CAST(extract(epoch FROM now() at time zone 'utc') * 1000000000 AS bigint))
WHERE
gaa.id IN (
SELECT
gab.id
FROM
app gab
LEFT JOIN pol gp ON gab.policy_id = gp.id
WHERE
gab.policy_type = 'policy.protection.total'
AND gp.name LIKE 'Mobile backup%'
AND gab.deleted_at IS NULL
AND gp.deleted_at IS NOT NULL
LIMIT 1000);
GET DIAGNOSTICS affect_count = ROW_COUNT;
COMMIT;
processed_row_count = processed_row_count + affect_count;
END LOOP;
RAISE NOTICE 'total processed rows %', processed_row_count;
END;
$MAIN$;
--early explain plan, 1000 records update take 156.488 ms
--------------------------------------------------------------------
Update on app gaa (cost=3307.57..6085.41 rows=1000 width=3943) (actual time=156.347..156.347 rows=0 loops=1)
InitPlan 1 (returns $0)
-> Result (cost=0.00..0.02 rows=1 width=8) (actual time=0.007..0.008 rows=1 loops=1)
-> Nested Loop (cost=3307.54..6085.39 rows=1000 width=3943) (actual time=18.599..33.987 rows=1000 loops=1)
-> HashAggregate (cost=3306.99..3316.99 rows=1000 width=98) (actual time=18.554..19.085 rows=1000 loops=1)
Group Key: ("ANY_subquery".id)::text
-> Subquery Scan on "ANY_subquery" (cost=2.17..3304.49 rows=1000 width=98) (actual time=0.041..18.052 rows=1000 loops=1)
-> Limit (cost=2.17..3294.49 rows=1000 width=37) (actual time=0.030..17.827 rows=1000 loops=1)
-> Merge Join (cost=2.17..877396.03 rows=266497 width=37) (actual time=0.029..17.764 rows=1000 loops=1)
Merge Cond: ((gab.policy_id)::text = (gp.id)::text)
-> Index Scan using tmp_uq_policy_id_context2 on app gab (cost=0.56..487631.06 rows=3151167 width=74) (actual time=0.018..9.192 rows=3542 loops=1)
Filter: ((policy_type)::text = 'policy.protection.total'::text)
Rows Removed by Filter: 2064
-> Index Scan using pol_pkey on pol gp (cost=0.56..378322.78 rows=361105 width=37) (actual time=0.008..7.380 rows=1006 loops=1)
Filter: ((deleted_at IS NOT NULL) AND (name ~~ 'Mobile backup%'::text))
Rows Removed by Filter: 3502
-> Index Scan using app2_pkey on app gaa (cost=0.56..2.77 rows=1 width=3874) (actual time=0.014..0.014 rows=1 loops=1000)
Index Cond: ((id)::text = ("ANY_subquery".id)::text)
Planning Time: 0.852 ms
Execution Time: 156.488 ms
--later explain plan, 1000 records update take 13301.600 ms
--------------------------------------------------------------------------
Update on app gaa (cost=3789.35..6567.19 rows=1000 width=3980) (actual time=13301.466..13301.466 rows=0 loops=1)
InitPlan 1 (returns $0)
-> Result (cost=0.00..0.02 rows=1 width=8) (actual time=0.007..0.007 rows=1 loops=1)
-> Nested Loop (cost=3789.32..6567.17 rows=1000 width=3980) (actual time=12881.004..12896.440 rows=1000 loops=1)
-> HashAggregate (cost=3788.77..3798.77 rows=1000 width=98) (actual time=12880.958..12881.378 rows=1000 loops=1)
Group Key: ("ANY_subquery".id)::text
-> Subquery Scan on "ANY_subquery" (cost=2.17..3786.27 rows=1000 width=98) (actual time=12850.663..12880.505 rows=1000 loops=1)
-> Limit (cost=2.17..3776.27 rows=1000 width=37) (actual time=12850.656..12880.233 rows=1000 loops=1)
-> Merge Join (cost=2.17..862421.74 rows=228510 width=37) (actual time=12850.655..12880.162 rows=1000 loops=1)
Merge Cond: ((gab.policy_id)::text = (gp.id)::text)
-> Index Scan using tmp_uq_policy_id_context2 on app gab (cost=0.56..474159.31 rows=2701994 width=74) (actual time=0.017..6054.269 rows=2302988 loops=1)
Filter: ((policy_type)::text = 'policy.protection.total'::text)
Rows Removed by Filter: 1822946
-> Index Scan using pol_pkey on pol gp (cost=0.56..378322.78 rows=361105 width=37) (actual time=0.007..5976.346 rows=936686 loops=1)
Filter: ((deleted_at IS NOT NULL) AND (name ~~ 'Mobile backup%'::text))
Rows Removed by Filter: 3152553
-> Index Scan using app2_pkey on app gaa (cost=0.56..2.77 rows=1 width=3911) (actual time=0.014..0.014 rows=1 loops=1000)
Index Cond: ((id)::text = ("ANY_subquery".id)::text)
Planning Time: 0.785 ms
Execution Time: 13301.600 ms
--we also choose a temporary table solution to test, script as below
DO $MAIN$
DECLARE
affect_count integer;
offset_count integer:=0;
chunk_size CONSTANT integer :=1000;
sleep_sec CONSTANT numeric :=0.1;
BEGIN
DROP TABLE IF EXISTS tmp_usage_tbl;
CREATE TEMPORARY TABLE tmp_usage_tbl(id character varying(36));
INSERT INTO tmp_usage_tbl(id)
SELECT
gab.id
FROM
app gab
LEFT JOIN pol gp ON gab.policy_id = gp.id
WHERE
gab.policy_type = 'policy.protection.total'
AND gp.name LIKE 'Mobile backup%'
AND gab.deleted_at IS NULL
AND gp.deleted_at IS NOT NULL;
loop
exit when affect_count=0;
UPDATE
app gaa
SET
deleted_at = (
SELECT
CAST(extract(epoch FROM now() at time zone 'utc') * 1000000000 AS bigint))
WHERE gaa.id IN (SELECT id
FROM tmp_usage_tbl
order by id
LIMIT chunk_size offset offset_count);
GET DIAGNOSTICS affect_count = ROW_COUNT;
commit;
offset_count:=offset_count+chunk_size;
PERFORM pg_sleep(sleep_sec);
end loop;
END;
$MAIN$;
--1000 records update take around 2000 ms(each time same as use temporay table)
which solution is better please?
This is the script and explain plan info, please check, seems Filter remove more records took more time
DO $MAIN$
DECLARE
affect_count integer := 1000;
processed_row_count integer := 0;
BEGIN
LOOP
exit
WHEN affect_count = 0;
UPDATE
app gaa
SET
deleted_at = (
SELECT
CAST(extract(epoch FROM now() at time zone 'utc') * 1000000000 AS bigint))
WHERE
gaa.id IN (
SELECT
gab.id
FROM
app gab
LEFT JOIN pol gp ON gab.policy_id = gp.id
WHERE
gab.policy_type = 'policy.protection.total'
AND gp.name LIKE 'Mobile backup%'
AND gab.deleted_at IS NULL
AND gp.deleted_at IS NOT NULL
LIMIT 1000);
GET DIAGNOSTICS affect_count = ROW_COUNT;
COMMIT;
processed_row_count = processed_row_count + affect_count;
END LOOP;
RAISE NOTICE 'total processed rows %', processed_row_count;
END;
$MAIN$;
--early explain plan, 1000 records update take 156.488 ms
--------------------------------------------------------------------
Update on app gaa (cost=3307.57..6085.41 rows=1000 width=3943) (actual time=156.347..156.347 rows=0 loops=1)
InitPlan 1 (returns $0)
-> Result (cost=0.00..0.02 rows=1 width=8) (actual time=0.007..0.008 rows=1 loops=1)
-> Nested Loop (cost=3307.54..6085.39 rows=1000 width=3943) (actual time=18.599..33.987 rows=1000 loops=1)
-> HashAggregate (cost=3306.99..3316.99 rows=1000 width=98) (actual time=18.554..19.085 rows=1000 loops=1)
Group Key: ("ANY_subquery".id)::text
-> Subquery Scan on "ANY_subquery" (cost=2.17..3304.49 rows=1000 width=98) (actual time=0.041..18.052 rows=1000 loops=1)
-> Limit (cost=2.17..3294.49 rows=1000 width=37) (actual time=0.030..17.827 rows=1000 loops=1)
-> Merge Join (cost=2.17..877396.03 rows=266497 width=37) (actual time=0.029..17.764 rows=1000 loops=1)
Merge Cond: ((gab.policy_id)::text = (gp.id)::text)
-> Index Scan using tmp_uq_policy_id_context2 on app gab (cost=0.56..487631.06 rows=3151167 width=74) (actual time=0.018..9.192 rows=3542 loops=1)
Filter: ((policy_type)::text = 'policy.protection.total'::text)
Rows Removed by Filter: 2064
-> Index Scan using pol_pkey on pol gp (cost=0.56..378322.78 rows=361105 width=37) (actual time=0.008..7.380 rows=1006 loops=1)
Filter: ((deleted_at IS NOT NULL) AND (name ~~ 'Mobile backup%'::text))
Rows Removed by Filter: 3502
-> Index Scan using app2_pkey on app gaa (cost=0.56..2.77 rows=1 width=3874) (actual time=0.014..0.014 rows=1 loops=1000)
Index Cond: ((id)::text = ("ANY_subquery".id)::text)
Planning Time: 0.852 ms
Execution Time: 156.488 ms
--later explain plan, 1000 records update take 13301.600 ms
--------------------------------------------------------------------------
Update on app gaa (cost=3789.35..6567.19 rows=1000 width=3980) (actual time=13301.466..13301.466 rows=0 loops=1)
InitPlan 1 (returns $0)
-> Result (cost=0.00..0.02 rows=1 width=8) (actual time=0.007..0.007 rows=1 loops=1)
-> Nested Loop (cost=3789.32..6567.17 rows=1000 width=3980) (actual time=12881.004..12896.440 rows=1000 loops=1)
-> HashAggregate (cost=3788.77..3798.77 rows=1000 width=98) (actual time=12880.958..12881.378 rows=1000 loops=1)
Group Key: ("ANY_subquery".id)::text
-> Subquery Scan on "ANY_subquery" (cost=2.17..3786.27 rows=1000 width=98) (actual time=12850.663..12880.505 rows=1000 loops=1)
-> Limit (cost=2.17..3776.27 rows=1000 width=37) (actual time=12850.656..12880.233 rows=1000 loops=1)
-> Merge Join (cost=2.17..862421.74 rows=228510 width=37) (actual time=12850.655..12880.162 rows=1000 loops=1)
Merge Cond: ((gab.policy_id)::text = (gp.id)::text)
-> Index Scan using tmp_uq_policy_id_context2 on app gab (cost=0.56..474159.31 rows=2701994 width=74) (actual time=0.017..6054.269 rows=2302988 loops=1)
Filter: ((policy_type)::text = 'policy.protection.total'::text)
Rows Removed by Filter: 1822946
-> Index Scan using pol_pkey on pol gp (cost=0.56..378322.78 rows=361105 width=37) (actual time=0.007..5976.346 rows=936686 loops=1)
Filter: ((deleted_at IS NOT NULL) AND (name ~~ 'Mobile backup%'::text))
Rows Removed by Filter: 3152553
-> Index Scan using app2_pkey on app gaa (cost=0.56..2.77 rows=1 width=3911) (actual time=0.014..0.014 rows=1 loops=1000)
Index Cond: ((id)::text = ("ANY_subquery".id)::text)
Planning Time: 0.785 ms
Execution Time: 13301.600 ms
--we also choose a temporary table solution to test, script as below
DO $MAIN$
DECLARE
affect_count integer;
offset_count integer:=0;
chunk_size CONSTANT integer :=1000;
sleep_sec CONSTANT numeric :=0.1;
BEGIN
DROP TABLE IF EXISTS tmp_usage_tbl;
CREATE TEMPORARY TABLE tmp_usage_tbl(id character varying(36));
INSERT INTO tmp_usage_tbl(id)
SELECT
gab.id
FROM
app gab
LEFT JOIN pol gp ON gab.policy_id = gp.id
WHERE
gab.policy_type = 'policy.protection.total'
AND gp.name LIKE 'Mobile backup%'
AND gab.deleted_at IS NULL
AND gp.deleted_at IS NOT NULL;
loop
exit when affect_count=0;
UPDATE
app gaa
SET
deleted_at = (
SELECT
CAST(extract(epoch FROM now() at time zone 'utc') * 1000000000 AS bigint))
WHERE gaa.id IN (SELECT id
FROM tmp_usage_tbl
order by id
LIMIT chunk_size offset offset_count);
GET DIAGNOSTICS affect_count = ROW_COUNT;
commit;
offset_count:=offset_count+chunk_size;
PERFORM pg_sleep(sleep_sec);
end loop;
END;
$MAIN$;
--1000 records update take around 2000 ms(each time same as use temporay table)
which solution is better please?
Michael Lewis <mlewis@xxxxxxxxxxx> 于2021年2月27日周六 上午1:46写道:
It might be a concern, but generally that should be a row level lock and only block other update/delete options on those rows. It might be helpful to look at the explain analyze output early on vs later in the process. It might be that you are getting very few hot updates and indexes are being updated constantly.