Query tuning question

"Dirschel, Steve" <steve.dirschel@xxxxxxxxxxxxxxxxxx> · Wed, 21 Aug 2024 16:06:23 +0000

Aurora Postgres version 13.7.

Table definition:

acquisition_channel_db=> \d acquisition_channel.acquired_object
                                                        Table "acquisition_channel.acquired_object"
                 Column                 |           Type           | Collation | Nullable |                             Default
----------------------------------------+--------------------------+-----------+----------+-----------------------------------------------------------------
acquired_object_uuid                   | character varying(36)    |           | not null |
acquired_object_name                   | text                     |           | not null |
acquired_object_size                   | bigint                   |           |          |
acquisition_run_record_id              | bigint                   |           |          |
correlation_id                         | character varying(36)    |           |          |
acquired_datetime                      | timestamp with time zone |           |          |
checksum                               | character varying(128)   |           |          |
acquisition_method_id                  | bigint                   |           |          |
ol_version                             | integer                  |           | not null |
created_datetime                       | timestamp with time zone |           | not null |
updated_datetime                       | timestamp with time zone |           |          |
status                                 | character varying(50)    |           |          |
parent_acquired_object_uuid            | character varying(36)    |           |          |
extracted_from_object_path             | text                     |           |          |
resource_group_id                      | bigint                   |           |          |
s3_gcs_bucket                          | character varying(100)   |           |          |
s3_key                                 | character varying(500)   |           |          |
metadata_s3_gcs_bucket                 | character varying(100)   |           |          |
metadata_s3_key                        | character varying(500)   |           |          |
routing_result_acquisition_channel_id  | bigint                   |           |          |
acquired_object_type                   | character varying(100)   |           |          |
created_by_id                          | integer                  |           |          |
updated_by_id                          | integer                  |           |          |
metadata                               | jsonb                    |           |          |
acquired_object_id                     | bigint                   |           | not null | nextval('acquisition_channel.acquired_object_id_seq'::regclass)
routed_to_acquisition_channel_datetime | timestamp with time zone |           |          |
routed_to_acquisition_channel_id       | bigint                   |           |          |
rendition_guid                         | character varying(36)    |           |          |
revision_guid                          | character varying(36)    |           |          |
channel_availability_status            | character varying(100)   |           |          |
mime_type                              | character varying(100)   |           |          |
encryption_public_key_hash             | text                     |           |          |
acquired_metadata                      | jsonb                    |           |          |
original_acquired_object_name          | text                     |           |          |
acquired_family_uuid                   | character varying(36)    |           |          |
last_broadcast_datetime                | timestamp with time zone |           |          |
original_checksum                      | character varying(128)   |           |          |
Indexes:
    "acquired_object_pkey" PRIMARY KEY, btree (acquired_object_uuid)
    "acquired_family_uuid_idx" btree (acquired_family_uuid)
    "acquired_object_acq_method_id_acq_dt_acquired_object_uuid" btree (acquisition_method_id, acquired_datetime DESC NULLS LAST, acquired_object_uuid)
    "acquired_object_acquired_items_initial_ui_page" btree (acquisition_method_id, acquired_datetime DESC NULLS LAST, acquired_object_uuid) WHERE status::text <> 'TEST'::text AND parent_acquired_object_uuid
 IS NULL
    "acquired_object_acquired_object_id_unq" UNIQUE, btree (acquired_object_id)
    "acquired_object_acquired_object_name" btree (lower(acquired_object_name))
    "acquired_object_acquisition_method_id" btree (acquisition_method_id)
    "acquired_object_acquisition_run_record_id" btree (acquisition_run_record_id)
    "acquired_object_checksum" btree (checksum)
    "acquired_object_correlation_id" btree (correlation_id)
    "acquired_object_idx2" btree (parent_acquired_object_uuid, extracted_from_object_path)
    "acquired_object_in_channel" UNIQUE, btree (routed_to_acquisition_channel_id, checksum) WHERE routed_to_acquisition_channel_id IS NOT NULL
    "acquired_object_routed_to_acq_channel_id_dt_acq_object_uuid" btree (routed_to_acquisition_channel_id, routed_to_acquisition_channel_datetime DESC NULLS LAST, acquired_object_uuid)
    "acquired_object_routed_to_acq_channel_id_dt_acq_object_uuid_asc" btree (routed_to_acquisition_channel_id, routed_to_acquisition_channel_datetime, acquired_object_uuid)
    "acquired_object_routed_to_acquisition_channel_id" btree (routed_to_acquisition_channel_id)
    "acquired_object_trgm_acquired_object_name" gin (lower(acquired_object_name) acquisition_channel.gin_trgm_ops)
Foreign-key constraints:
    "acquired_object_acquisition_channel_fk" FOREIGN KEY (routed_to_acquisition_channel_id) REFERENCES acquisition_channel.acquisition_channel(acquisition_channel_id)
    "acquired_object_fk2" FOREIGN KEY (acquisition_run_record_id) REFERENCES acquisition_channel.acquisition_run_record(acquisition_run_record_id)
    "acquired_object_fk3" FOREIGN KEY (acquisition_method_id) REFERENCES acquisition_channel.acquisition_method(acquisition_method_id)
    "acquired_object_fk4" FOREIGN KEY (parent_acquired_object_uuid) REFERENCES acquisition_channel.acquired_object(acquired_object_uuid)
    "acquired_object_fk5" FOREIGN KEY (routing_result_acquisition_channel_id) REFERENCES acquisition_channel.acquisition_channel(acquisition_channel_id)
Referenced by:
    TABLE "acquisition_channel.acquired_object" CONSTRAINT "acquired_object_fk4" FOREIGN KEY (parent_acquired_object_uuid) REFERENCES acquisition_channel.acquired_object(acquired_object_uuid)
    TABLE "acquisition_channel.broadcast_item" CONSTRAINT "broadcast_item_acquired_object_fk" FOREIGN KEY (acquired_object_uuid) REFERENCES acquisition_channel.acquired_object(acquired_object_uuid)
    TABLE "acquisition_channel.routing_record" CONSTRAINT "routing_record_acquired_object_fk" FOREIGN KEY (acquired_object_uuid) REFERENCES acquisition_channel.acquired_object(acquired_object_uuid)

Extension pg_trgm is installed at version 1.3 and index acquired_object_trgm_acquired_object_name is a gin index.

Query and plan:

acquisition_channel_db=> explain (analyze, buffers)
acquisition_channel_db->  select *
acquisition_channel_db->    from acquisition_channel.acquired_object acquiredob0_
acquisition_channel_db->   where lower(acquiredob0_.acquired_object_name) like lower('%de_fcdd7e0d-4812-4fb2-bd19-4f114f86a43f%') limit 100;
                                                                                 QUERY PLAN
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Limit  (cost=46341.49..50488.39 rows=100 width=1342) (actual time=7589.846..7593.992 rows=1 loops=1)
   Buffers: shared hit=638206 read=1127
   I/O Timings: read=932.828
   ->  Gather  (cost=46341.49..3436021.28 rows=81740 width=1342) (actual time=7589.845..7593.990 rows=1 loops=1)
         Workers Planned: 2
         Workers Launched: 2
         Buffers: shared hit=638206 read=1127
         I/O Timings: read=932.828
         ->  Parallel Bitmap Heap Scan on acquired_object acquiredob0_  (cost=45341.49..3426847.28 rows=34058 width=1342) (actual time=7584.021..7584.022 rows=0 loops=3)
               Recheck Cond: (lower(acquired_object_name) ~~ '%de_fcdd7e0d-4812-4fb2-bd19-4f114f86a43f%'::text)
               Heap Blocks: exact=1
               Buffers: shared hit=638206 read=1127
               I/O Timings: read=932.828
               ->  Bitmap Index Scan on acquired_object_trgm_acquired_object_name  (cost=0.00..45321.05 rows=81740 width=0) (actual time=7589.148..7589.149 rows=1 loops=1)
                     Index Cond: (lower(acquired_object_name) ~~ '%de_fcdd7e0d-4812-4fb2-bd19-4f114f86a43f%'::text)
                     Buffers: shared hit=638205 read=1127
                     I/O Timings: read=932.828
Planning:
   Buffers: shared hit=7
Planning Time: 0.209 ms
Execution Time: 7594.346 ms

It is using the GIN index but it did 639k total buffer reads to find 1 row which seems like a ton.  I don’t know how GIN/GIN_TRGM_OPS are implemented but does 638k buffer reads to find 1 row using that type of index seem high or does that
 seem as expected?

I noticed for this 1 row returned the acquired_object_name starts with de_  so the leading % isn’t really needed for this data point .  There is a different index that is a normal btree index that could be used by this query if the leading
 % was removed:

acquired_object_acquired_object_name btree (lower(acquired_object_name))

Here is the plan if I remove the leading %:

acquisition_channel_db=> explain (analyze, buffers)
acquisition_channel_db->  select *
acquisition_channel_db->    from acquisition_channel.acquired_object acquiredob0_
acquisition_channel_db->   where lower(acquiredob0_.acquired_object_name) like lower('de_fcdd7e0d-4812-4fb2-bd19-4f114f86a43f%') limit 100;
                                                                                 QUERY PLAN
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Limit  (cost=48145.49..52292.39 rows=100 width=1342) (actual time=8639.428..8643.529 rows=1 loops=1)
   Buffers: shared hit=827759 read=143
   I/O Timings: read=119.373
   ->  Gather  (cost=48145.49..3437825.28 rows=81740 width=1342) (actual time=8639.427..8643.527 rows=1 loops=1)
         Workers Planned: 2
         Workers Launched: 2
         Buffers: shared hit=827759 read=143
         I/O Timings: read=119.373
         ->  Parallel Bitmap Heap Scan on acquired_object acquiredob0_  (cost=47145.49..3428651.28 rows=34058 width=1342) (actual time=8633.200..8633.201 rows=0 loops=3)
               Recheck Cond: (lower(acquired_object_name) ~~ 'de_fcdd7e0d-4812-4fb2-bd19-4f114f86a43f%'::text)
               Heap Blocks: exact=1
               Buffers: shared hit=827759 read=143
               I/O Timings: read=119.373
               ->  Bitmap Index Scan on acquired_object_trgm_acquired_object_name  (cost=0.00..47125.05 rows=81740 width=0) (actual time=8638.786..8638.787 rows=1 loops=1)
                     Index Cond: (lower(acquired_object_name) ~~ 'de_fcdd7e0d-4812-4fb2-bd19-4f114f86a43f%'::text)
                     Buffers: shared hit=827758 read=143
                     I/O Timings: read=119.373
Planning:
   Buffers: shared hit=7
Planning Time: 0.226 ms
Execution Time: 8643.664 ms

I was surprised the optimizer decided to still use the gin index vs the acquired_object_acquired_object_name index.  And now the query did 827k buffer reads to find that 1 row via the index.  I tried using pg_hint_plan to get the query
 to use the acquired_object_acquired_object_name index but the optimizer still wouldn’t use it.  So then I decided to disable bitmap scan to see if it would then use this other index (this time I’m just doing an explain to see the plan):

acquisition_channel_db=> set enable_bitmapscan to off;
SET
acquisition_channel_db=> explain
acquisition_channel_db->  select *
acquisition_channel_db->    from acquisition_channel.acquired_object acquiredob0_
acquisition_channel_db->   where lower(acquiredob0_.acquired_object_name) like lower('de_fcdd7e0d-4812-4fb2-bd19-4f114f86a43f%') limit 100;
                                                  QUERY PLAN
---------------------------------------------------------------------------------------------------------------
Limit  (cost=1000.00..122287.58 rows=100 width=1342)
   ->  Gather  (cost=1000.00..99141466.60 rows=81740 width=1342)
         Workers Planned: 2
         ->  Parallel Seq Scan on acquired_object acquiredob0_  (cost=0.00..99132292.60 rows=34058 width=1342)
               Filter: (lower(acquired_object_name) ~~ 'de_fcdd7e0d-4812-4fb2-bd19-4f114f86a43f%'::text)

Wow, so rather than using  the acquired_object_acquired_object_name it thinks a full scan of the table would be better…. The table is 718 GB.  It didn’t even want to use that gin index-  when accessing a gin index can that only be done
 via a bitmap index scan?  Something seems out of whack here but I don’t know what it is.  Leaving enable_bitmapscan to off I also tried disabling seqscan-  the plan below shows it will still to a seq scan and note the cost now.

acquisition_channel_db=> set enable_seqscan to off;
SET
acquisition_channel_db=> explain
acquisition_channel_db->  select *
acquisition_channel_db->    from acquisition_channel.acquired_object acquiredob0_
acquisition_channel_db->   where lower(acquiredob0_.acquired_object_name) like lower('de_fcdd7e0d-4812-4fb2-bd19-4f114f86a43f%') limit 100;
                                                 QUERY PLAN
-------------------------------------------------------------------------------------------------------------
Limit  (cost=10000000000.00..10000130032.86 rows=100 width=1342)
   ->  Seq Scan on acquired_object acquiredob0_  (cost=10000000000.00..10106288859.64 rows=81740 width=1342)
         Filter: (lower(acquired_object_name) ~~ 'de_fcdd7e0d-4812-4fb2-bd19-4f114f86a43f%'::text)

My questions:

Does 638k buffer reads using the gin index to find 1 row seem appropriate?  That seems like a ton of work to me.
When I remove the leading % it still uses the gin index but now it does 827k buffer reads to find 1 row.  Why does it still want to use this gin index vs using the other index I note
 above and why is it doing even more work when removing the leading %?
If I disable bitmap scan it then prefers to full scan the table vs using the other index I note above.  Why would it still not want to use this other index?
If I also disable seqscan it still won’t use this other index.  Why?

I don’t know what other info would be helpful here but I’ll show settings related to Query Tuning / Planner Method Configuration (after re-logging in to show current settings) along with table/index statistics.  I assume I’m overlooking
 something rather simple here but I’m not seeing it.  Any input would be much appreciated.

Regards
Steve

acquisition_channel_db=> select name, setting from pg_settings where category = 'Query Tuning / Planner Method Configuration';
                  name                   | setting
-----------------------------------------+---------
apg_enable_correlated_any_transform     | off
apg_enable_function_migration           | on
apg_enable_not_in_transform             | on
apg_enable_remove_redundant_inner_joins | off
apg_enable_semijoin_push_down           | off
apg_force_full_key_semijoin             | off
apg_force_semijoin_push_down            | off
apg_force_single_key_semijoin           | off
enable_bitmapscan                       | on
enable_gathermerge                      | on
enable_hashagg                          | on
enable_hashjoin                         | on
enable_incremental_sort                 | on
enable_indexonlyscan                    | on
enable_indexscan                        | on
enable_material                         | on
enable_mergejoin                        | on
enable_nestloop                         | on
enable_parallel_append                  | on
enable_parallel_hash                    | on
enable_partition_pruning                | on
enable_partitionwise_aggregate          | off
enable_partitionwise_join               | off
enable_seqscan                          | on
enable_sort                             | on
enable_tidscan                          | on
(26 rows)

acquisition_channel_db=> \x on
Expanded display is on.

acquisition_channel_db=>   select * FROM pg_stat_user_tables where relname = 'acquired_object';
-[ RECORD 1 ]-------+------------------------------
relid               | 16777
schemaname          | acquisition_channel
relname             | acquired_object
seq_scan            | 1218
seq_tup_read        | 307573542582
idx_scan            | 2260152042
idx_tup_fetch       | 94409160669
n_tup_ins           | 140420690
n_tup_upd           | 642849845
n_tup_del           | 0
n_tup_hot_upd       | 251641126
n_live_tup          | 821351776
n_dead_tup          | 26646304
n_mod_since_analyze | 18564560
n_ins_since_vacuum  | 7485136
last_vacuum         |
last_autovacuum     | 2024-08-17 15:39:04.259598+00
last_analyze        |
last_autoanalyze    | 2024-08-20 06:46:16.897584+00
vacuum_count        | 0
autovacuum_count    | 6
analyze_count       | 0
autoanalyze_count   | 21

acquisition_channel_db=> select * from pg_stats where tablename = 'acquired_object' and attname = 'acquired_object_name';
-[ RECORD 1 ]----------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
schemaname             | acquisition_channel
tablename              | acquired_object
attname                | acquired_object_name
inherited              | f
null_frac              | 0
avg_width              | 43
n_distinct             | 3.053536e+06
most_common_vals       | {.xxxxxxxx.xxxxxx,xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx.pdf,yyyyyyyyyyyyyyyyyyyyyyyyyyyy.pdf} 
ß- replace actual values due to possible concerns with values shown
most_common_freqs      | {0.00026666667,0.00016666666,0.00016666666}
histogram_bounds       | {removing values also due to possible concerns with values shown}
correlation            | 0.1324247
most_common_elems      |
most_common_elem_freqs |
elem_count_histogram   |

This e-mail is for the sole use of the intended recipient and contains information that may be privileged and/or confidential. If you are not an intended recipient, please notify the sender by return e-mail and delete this e-mail and any attachments. Certain
 required legal entity disclosures can be accessed on our website: https://www.thomsonreuters.com/en/resources/disclosures.html