Hello,
We have encountered an unexpected quirk with our DB and we are unsure if this is expected behaviour or an issue.
PG version PostgreSQL 14.3 on aarch64-unknown-linux-gnu, compiled by aarch64-unknown-linux-gnu-gcc (GCC) 7.4.0, 64-bit
schema of table in question and related indexes
CREATE TABLE public.marketplace_sale (
log_index integer NOT NULL,
created_at timestamp with time zone DEFAULT now() NOT NULL,
updated_at timestamp with time zone DEFAULT now() NOT NULL,
block_timestamp timestamp with time zone NOT NULL,
block bigint NOT NULL,
contract_address character(42) NOT NULL,
buyer_address character(42) NOT NULL,
seller_address character(42) NOT NULL,
transaction_hash character(66) NOT NULL,
quantity numeric NOT NULL,
token_id numeric NOT NULL,
seller_amount_wei numeric,
marketplace_fees_wei numeric DEFAULT 0,
royalty_fees_wei numeric DEFAULT 0,
data_source text NOT NULL,
marketplace text,
original_data jsonb,
source_discriminator text,
total_amount_wei numeric NOT NULL,
unique_hash bytea GENERATED ALWAYS AS (sha512((((((((((transaction_hash)::text || (block)::text) || (log_index)::text) || (contract_address)::text) || (token_id)::text) || (buyer_address)::text) || (seller_address)::text) || (quantity)::text))::bytea)) STORED NOT NULL,
CONSTRAINT buyer_address_lower CHECK (((buyer_address)::text = lower((buyer_address)::text))),
CONSTRAINT buyer_address_prefix CHECK (starts_with((buyer_address)::text, '0x'::text)),
CONSTRAINT contract_address_lower CHECK (((contract_address)::text = lower((contract_address)::text))),
CONSTRAINT contract_address_prefix CHECK (starts_with((contract_address)::text, '0x'::text)),
CONSTRAINT seller_address_lower CHECK (((seller_address)::text = lower((seller_address)::text))),
CONSTRAINT seller_address_prefix CHECK (starts_with((seller_address)::text, '0x'::text)),
CONSTRAINT transaction_hash_lower CHECK (((transaction_hash)::text = lower((transaction_hash)::text))),
CONSTRAINT transaction_hash_prefix CHECK (starts_with((transaction_hash)::text, '0x'::text))
);
ALTER TABLE ONLY public.marketplace_sale
ADD CONSTRAINT marketplace_sale_pkey PRIMARY KEY (unique_hash);
ADD CONSTRAINT marketplace_sale_pkey PRIMARY KEY (unique_hash);
CREATE INDEX sales_contract_blocktimestamp_idx ON public.marketplace_sale USING btree (contract_address, block_timestamp);
CREATE INDEX sales_contract_date_idx ON public.marketplace_sale USING btree (contract_address, token_id, block_timestamp);
When running this query
EXPLAIN(verbose, costs, buffers) with token_pairs(contract_address, token_id) as (
values ('0xed5af388653567af2f388e6224dc7c4b3241c544', '1375'::numeric ), ('0xed5af388653567af2f388e6224dc7c4b3241c544', '4'::numeric )
)
select sales.* from token_pairs, LATERAL (
select
contract_address, token_id,
block_timestamp, total_amount_wei, buyer_address,
seller_address, block, quantity, transaction_hash
from marketplace_sale
where
(marketplace_sale.contract_address, marketplace_sale.token_id) = (token_pairs.contract_address, token_pairs.token_id)
order by contract_address desc, token_id desc, block_timestamp desc
limit 1
) sales;
values ('0xed5af388653567af2f388e6224dc7c4b3241c544', '1375'::numeric ), ('0xed5af388653567af2f388e6224dc7c4b3241c544', '4'::numeric )
)
select sales.* from token_pairs, LATERAL (
select
contract_address, token_id,
block_timestamp, total_amount_wei, buyer_address,
seller_address, block, quantity, transaction_hash
from marketplace_sale
where
(marketplace_sale.contract_address, marketplace_sale.token_id) = (token_pairs.contract_address, token_pairs.token_id)
order by contract_address desc, token_id desc, block_timestamp desc
limit 1
) sales;
we get the query plan
QUERY PLAN
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Nested Loop (cost=0.69..332764.78 rows=2 width=231)
Output: marketplace_sale.contract_address, marketplace_sale.token_id, marketplace_sale.block_timestamp, marketplace_sale.total_amount_wei, marketplace_sale.buyer_address, marketplace_sale.seller_address, marketplace_sale.block, marketplace_sale.quantity, marketplace_sale.transaction_hash
-> Values Scan on "*VALUES*" (cost=0.00..0.03 rows=2 width=64)
Output: "*VALUES*".column1, "*VALUES*".column2
-> Limit (cost=0.69..166382.36 rows=1 width=231)
Output: marketplace_sale.contract_address, marketplace_sale.token_id, marketplace_sale.block_timestamp, marketplace_sale.total_amount_wei, marketplace_sale.buyer_address, marketplace_sale.seller_address, marketplace_sale.block, marketplace_sale.quantity, marketplace_sale.transaction_hash
-> Index Scan Backward using sales_contract_date_idx on public.marketplace_sale (cost=0.69..3660397.27 rows=22 width=231)
Output: marketplace_sale.contract_address, marketplace_sale.token_id, marketplace_sale.block_timestamp, marketplace_sale.total_amount_wei, marketplace_sale.buyer_address, marketplace_sale.seller_address, marketplace_sale.block, marketplace_sale.quantity, marketplace_sale.transaction_hash
Index Cond: (marketplace_sale.token_id = "*VALUES*".column2)
Filter: ((marketplace_sale.contract_address)::text = "*VALUES*".column1)
Query Identifier: 8815736494208428864
Planning:
Buffers: shared hit=4
(13 rows)
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Nested Loop (cost=0.69..332764.78 rows=2 width=231)
Output: marketplace_sale.contract_address, marketplace_sale.token_id, marketplace_sale.block_timestamp, marketplace_sale.total_amount_wei, marketplace_sale.buyer_address, marketplace_sale.seller_address, marketplace_sale.block, marketplace_sale.quantity, marketplace_sale.transaction_hash
-> Values Scan on "*VALUES*" (cost=0.00..0.03 rows=2 width=64)
Output: "*VALUES*".column1, "*VALUES*".column2
-> Limit (cost=0.69..166382.36 rows=1 width=231)
Output: marketplace_sale.contract_address, marketplace_sale.token_id, marketplace_sale.block_timestamp, marketplace_sale.total_amount_wei, marketplace_sale.buyer_address, marketplace_sale.seller_address, marketplace_sale.block, marketplace_sale.quantity, marketplace_sale.transaction_hash
-> Index Scan Backward using sales_contract_date_idx on public.marketplace_sale (cost=0.69..3660397.27 rows=22 width=231)
Output: marketplace_sale.contract_address, marketplace_sale.token_id, marketplace_sale.block_timestamp, marketplace_sale.total_amount_wei, marketplace_sale.buyer_address, marketplace_sale.seller_address, marketplace_sale.block, marketplace_sale.quantity, marketplace_sale.transaction_hash
Index Cond: (marketplace_sale.token_id = "*VALUES*".column2)
Filter: ((marketplace_sale.contract_address)::text = "*VALUES*".column1)
Query Identifier: 8815736494208428864
Planning:
Buffers: shared hit=4
(13 rows)
As you can see it is unable to fully utilize the (contract_address, token_id, block_timestamp) index and can only use the token_id column as the index condition.
However if we explicitly cast the contract values in the values list to varchar or character(42)
Like so
EXPLAIN(verbose, costs, buffers) with token_pairs(contract_address, token_id) as (
values ('0xed5af388653567af2f388e6224dc7c4b3241c544'::varchar, '1375'::numeric ), ('0xed5af388653567af2f388e6224dc7c4b3241c544'::varchar, '4'::numeric )
)
select sales.* from token_pairs, LATERAL (
select
contract_address, token_id,
block_timestamp, total_amount_wei, buyer_address,
seller_address, block, quantity, transaction_hash
from marketplace_sale
where
(marketplace_sale.contract_address, marketplace_sale.token_id) = (token_pairs.contract_address, token_pairs.token_id)
order by contract_address desc, token_id desc, block_timestamp desc
limit 1
) sales;
values ('0xed5af388653567af2f388e6224dc7c4b3241c544'::varchar, '1375'::numeric ), ('0xed5af388653567af2f388e6224dc7c4b3241c544'::varchar, '4'::numeric )
)
select sales.* from token_pairs, LATERAL (
select
contract_address, token_id,
block_timestamp, total_amount_wei, buyer_address,
seller_address, block, quantity, transaction_hash
from marketplace_sale
where
(marketplace_sale.contract_address, marketplace_sale.token_id) = (token_pairs.contract_address, token_pairs.token_id)
order by contract_address desc, token_id desc, block_timestamp desc
limit 1
) sales;
It can now use the index
QUERY PLAN
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Nested Loop (cost=0.69..17.49 rows=2 width=231)
Output: marketplace_sale.contract_address, marketplace_sale.token_id, marketplace_sale.block_timestamp, marketplace_sale.total_amount_wei, marketplace_sale.buyer_address, marketplace_sale.seller_address, marketplace_sale.block, marketplace_sale.quantity, marketplace_sale.transaction_hash
-> Values Scan on "*VALUES*" (cost=0.00..0.03 rows=2 width=64)
Output: "*VALUES*".column1, "*VALUES*".column2
-> Limit (cost=0.69..8.71 rows=1 width=231)
Output: marketplace_sale.contract_address, marketplace_sale.token_id, marketplace_sale.block_timestamp, marketplace_sale.total_amount_wei, marketplace_sale.buyer_address, marketplace_sale.seller_address, marketplace_sale.block, marketplace_sale.quantity, marketplace_sale.transaction_hash
-> Index Scan Backward using sales_contract_date_idx on public.marketplace_sale (cost=0.69..8.71 rows=1 width=231)
Output: marketplace_sale.contract_address, marketplace_sale.token_id, marketplace_sale.block_timestamp, marketplace_sale.total_amount_wei, marketplace_sale.buyer_address, marketplace_sale.seller_address, marketplace_sale.block, marketplace_sale.quantity, marketplace_sale.transaction_hash
Index Cond: ((marketplace_sale.contract_address = ("*VALUES*".column1)::bpchar) AND (marketplace_sale.token_id = "*VALUES*".column2))
Query Identifier: -5527103051535383406
Planning:
Buffers: shared hit=4
(12 rows)
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Nested Loop (cost=0.69..17.49 rows=2 width=231)
Output: marketplace_sale.contract_address, marketplace_sale.token_id, marketplace_sale.block_timestamp, marketplace_sale.total_amount_wei, marketplace_sale.buyer_address, marketplace_sale.seller_address, marketplace_sale.block, marketplace_sale.quantity, marketplace_sale.transaction_hash
-> Values Scan on "*VALUES*" (cost=0.00..0.03 rows=2 width=64)
Output: "*VALUES*".column1, "*VALUES*".column2
-> Limit (cost=0.69..8.71 rows=1 width=231)
Output: marketplace_sale.contract_address, marketplace_sale.token_id, marketplace_sale.block_timestamp, marketplace_sale.total_amount_wei, marketplace_sale.buyer_address, marketplace_sale.seller_address, marketplace_sale.block, marketplace_sale.quantity, marketplace_sale.transaction_hash
-> Index Scan Backward using sales_contract_date_idx on public.marketplace_sale (cost=0.69..8.71 rows=1 width=231)
Output: marketplace_sale.contract_address, marketplace_sale.token_id, marketplace_sale.block_timestamp, marketplace_sale.total_amount_wei, marketplace_sale.buyer_address, marketplace_sale.seller_address, marketplace_sale.block, marketplace_sale.quantity, marketplace_sale.transaction_hash
Index Cond: ((marketplace_sale.contract_address = ("*VALUES*".column1)::bpchar) AND (marketplace_sale.token_id = "*VALUES*".column2))
Query Identifier: -5527103051535383406
Planning:
Buffers: shared hit=4
(12 rows)
We were expecting behaviour similar to
explain (verbose, costs, buffers) select * from marketplace_sale where
contract_address = '0xed5af388653567af2f388e6224dc7c4b3241c544'
and token_id = '1375'
order by contract_address desc, token_id desc, block_timestamp desc
limit 1;
QUERY PLAN
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Limit (cost=0.69..6.04 rows=1 width=1610)
Output: log_index, created_at, updated_at, block_timestamp, block, contract_address, buyer_address, seller_address, transaction_hash, quantity, token_id, seller_amount_wei, marketplace_fees_wei, royalty_fees_wei, data_source, marketplace, original_data, source_discriminator, total_amount_wei, unique_hash
-> Index Scan Backward using sales_contract_date_idx on public.marketplace_sale (cost=0.69..16.74 rows=3 width=1610)
Output: log_index, created_at, updated_at, block_timestamp, block, contract_address, buyer_address, seller_address, transaction_hash, quantity, token_id, seller_amount_wei, marketplace_fees_wei, royalty_fees_wei, data_source, marketplace, original_data, source_discriminator, total_amount_wei, unique_hash
Index Cond: ((marketplace_sale.contract_address = '0xed5af388653567af2f388e6224dc7c4b3241c544'::bpchar) AND (marketplace_sale.token_id = '1375'::numeric))
Query Identifier: -2069211501626469745
Planning:
Buffers: shared hit=2
(8 rows)
contract_address = '0xed5af388653567af2f388e6224dc7c4b3241c544'
and token_id = '1375'
order by contract_address desc, token_id desc, block_timestamp desc
limit 1;
QUERY PLAN
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
Limit (cost=0.69..6.04 rows=1 width=1610)
Output: log_index, created_at, updated_at, block_timestamp, block, contract_address, buyer_address, seller_address, transaction_hash, quantity, token_id, seller_amount_wei, marketplace_fees_wei, royalty_fees_wei, data_source, marketplace, original_data, source_discriminator, total_amount_wei, unique_hash
-> Index Scan Backward using sales_contract_date_idx on public.marketplace_sale (cost=0.69..16.74 rows=3 width=1610)
Output: log_index, created_at, updated_at, block_timestamp, block, contract_address, buyer_address, seller_address, transaction_hash, quantity, token_id, seller_amount_wei, marketplace_fees_wei, royalty_fees_wei, data_source, marketplace, original_data, source_discriminator, total_amount_wei, unique_hash
Index Cond: ((marketplace_sale.contract_address = '0xed5af388653567af2f388e6224dc7c4b3241c544'::bpchar) AND (marketplace_sale.token_id = '1375'::numeric))
Query Identifier: -2069211501626469745
Planning:
Buffers: shared hit=2
(8 rows)
Any insight into why this happens would be greatly appreciated