On 07/13/2011 08:17 AM, Tom Lane wrote:
"Kevin Grittner"<Kevin.Grittner@xxxxxxxxxxxx> writes:
... Jeff does raise a good point, though -- it seems odd
that WAL-logging of this pruning would need to be synchronous.
Yeah, we need to get to the bottom of that. If there's enough
shared_buffer space then it shouldn't be.
This thread has gotten long, let me try to compile all the relevant
information in one email.
\d test
Table "lars.test"
Column | Type | Modifiers
--------------+---------------+-----------
tenant | character(15) |
created_by | character(15) |
created_date | date |
Indexes:
"i1" btree (tenant)
"i11" btree (created_by)
-- Table is populated like this:
------------------------------------
create table test(tenant char(15), created_by char(15), created_date date);
insert into test values('x', 'y','2011-6-30');
insert into test select * from test;
insert into test select * from test;
insert into test select * from test;
insert into test select * from test;
insert into test select * from test;
insert into test select * from test;
insert into test select * from test;
insert into test select * from test;
insert into test select * from test;
insert into test select * from test;
insert into test select * from test;
insert into test select * from test;
insert into test select * from test;
insert into test select * from test;
insert into test select * from test;
insert into test select * from test;
insert into test select * from test;
insert into test select * from test; -- 256k rows
update test set tenant = lpad((random()*10000)::int::text,15,'0'),
created_by = lpad((random()*10000)::int::text,15,'0');
insert into test select * from test;
insert into test select * from test;
insert into test select * from test;
insert into test select * from test;
insert into test select * from test;
insert into test select * from test;
insert into test select * from test; -- 32m rows
create index i1 on test(tenant);
create index i11 on test(created_by);
vacuum analyze;
-- I doubt it needs that many rows.
=> SELECT
'version'::text AS "name",
version() AS "current_setting"
UNION ALL
SELECT
name,current_setting(name)
FROM pg_settings
WHERE NOT source='default' AND NOT name IN
('config_file','data_directory','hba_file','ident_file',
'log_timezone','DateStyle','lc_messages','lc_monetary',
'lc_numeric','lc_time','timezone_abbreviations',
'default_text_search_config','application_name',
'transaction_deferrable','transaction_isolation',
'transaction_read_only');
name
| current_setting
------------------------------+------------------------------------------------------------------------------------------------------------------
version | PostgreSQL 9.1beta2 on
x86_64-unknown-linux-gnu, compiled by gcc (GCC) 4.4.4 20100726 (Red Hat
4.4.4-13), 64-bit
bgwriter_delay | 10ms
bgwriter_lru_maxpages | 1000
checkpoint_completion_target | 0.9
checkpoint_segments | 128
client_encoding | UTF8
effective_cache_size | 64GB
lc_collate | en_US.UTF-8
lc_ctype | en_US.UTF-8
log_checkpoints | on
log_line_prefix | %m
maintenance_work_mem | 2GB
max_connections | 100
max_stack_depth | 2MB
server_encoding | UTF8
shared_buffers | 20GB
TimeZone | UTC
wal_buffers | 16MB
work_mem | 1GB
(19 rows)
-- Now:
----------
=> select count(*) from test where tenant = '000000000000001' and
created_date = '2011-6-30';
count
-------
3712
(1 row)
=> SELECT c.relname, isdirty, count(*) * 8192 / 1024/1024 AS buffers
FROM pg_buffercache b, pg_class c
WHERE b.relfilenode = pg_relation_filenode(c.oid)
AND b.reldatabase IN (0, (SELECT oid FROM pg_database WHERE datname =
current_database()))
GROUP BY c.relname,isdirty
ORDER BY 3 DESC
LIMIT 6;
relname | isdirty | buffers
-------------------------------+---------+---------
test | t | 14
pg_opclass_oid_index | f | 0
pg_rewrite | f | 0
i11 | t | 0
pg_rewrite_rel_rulename_index | f | 0
pg_constraint | f | 0
-- Just started the server, no nothing else is cached, yet
-- it doesn't matter if that update is executed by the same or another
backend.
=> update test set created_by = '000000000000001' where tenant =
'000000000000001';
UPDATE 3712
=> select count(*) from test where tenant = '000000000000001' and
created_date = '2011-6-30';
count
-------
3712
(1 row)
strace now shows:
-------------------------
Q\0\0\0_select count(*) from test w"..., 8192, 0, NULL, NULL) = 96
gettimeofday({1310579341, 854669}, NULL) = 0
lseek(38, 0, SEEK_END) = 187465728
lseek(41, 0, SEEK_END) = 115564544
lseek(43, 0, SEEK_END) = 101040128
lseek(38, 0, SEEK_END) = 187465728
brk(0x1a85000) = 0x1a85000
brk(0x1a5d000) = 0x1a5d000
lseek(52, 12443648, SEEK_SET) = 12443648
write(52,
"f\320\1\0\1\0\0\0\276\0\0\0\0\340\275\0016\0\0\0S\0\0\0\4\0cY\3\0\0\0"...,
122880) = 122880
fdatasync(52) = 0
semop(688135, {{9, 1, 0}}, 1) = 0
gettimeofday({1310579341, 889163}, NULL) = 0
sendto(5,
"\2\0\0\0\350\0\0\0\1@\0\0\2\0\0\0\1\0\0\0\0\0\0\0\376\200\0\0<\0\2\0"...,
232, 0, NULL, 0) = 232
sendto(6,
"T\0\0\0\36\0\1count\0\0\0\0\0\0\0\0\0\0\24\0\10\377\377\377\377\0\0D"...,
66, 0, NULL, 0) = 66
-- fd 52 was opened earlier to the WAL file in pg_xlog.
=> select count(*) from test where tenant = '000000000000001' and
created_date = '2011-6-30';
count
-------
3712
(1 row)
now strace shows:
-------------------------
Q\0\0\0_select count(*) from test w"..., 8192, 0, NULL, NULL) = 96
gettimeofday({1310579380, 862637}, NULL) = 0
lseek(38, 0, SEEK_END) = 187465728
lseek(41, 0, SEEK_END) = 115564544
lseek(43, 0, SEEK_END) = 101040128
lseek(38, 0, SEEK_END) = 187465728
gettimeofday({1310579380, 868149}, NULL) = 0
sendto(5,
"\2\0\0\0\350\0\0\0\1@\0\0\2\0\0\0\1\0\0\0\0\0\0\0\376\200\0\0@\0\2\0"...,
232, 0, NULL, 0) = 232
sendto(6,
"T\0\0\0\36\0\1count\0\0\0\0\0\0\0\0\0\0\24\0\10\377\377\377\377\0\0D"...,
66, 0, NULL, 0) = 66
=> update test set created_by = '000000000000001' where tenant =
'000000000000001';
UPDATE 3712
=> select count(*) from test where tenant = '000000000000001' and
created_date = '2011-6-30';
count
-------
3712
(1 row)
strace again indicates that a WAL log is written by the backend by the
select:
---------------------------------------------------------------------------------------
Q\0\0\0_select count(*) from test w"..., 8192, 0, NULL, NULL) = 96
gettimeofday({1310579663, 890641}, NULL) = 0
lseek(38, 0, SEEK_END) = 187596800
lseek(41, 0, SEEK_END) = 115638272
lseek(43, 0, SEEK_END) = 101122048
lseek(38, 0, SEEK_END) = 187596800
brk(0x1a85000) = 0x1a85000
brk(0x1a5d000) = 0x1a5d000
semop(688135, {{3, -1, 0}}, 1) = 0
lseek(52, 10223616, SEEK_SET) = 10223616
write(52,
"f\320\1\0\1\0\0\0\276\0\0\0\0\0\234\2\16\0\0\0C(\0\0\1\0\0\0~\0\177\0"...,
16384) = 16384
fdatasync(52) = 0
gettimeofday({1310579663, 921932}, NULL) = 0
sendto(5,
"\2\0\0\0\350\0\0\0\1@\0\0\2\0\0\0\1\0\0\0\0\0\0\0\376\200\0\0?\0\2\0"...,
232, 0, NULL, 0) = 232
sendto(6,
"T\0\0\0\36\0\1count\0\0\0\0\0\0\0\0\0\0\24\0\10\377\377\377\377\0\0D"...,
66, 0, NULL, 0) = 66
=> explain (analyze on, buffers on) select count(*) from test where
tenant = '000000000000001' and created_date = '2011-6-30';
QUERY PLAN
--------------------------------------------------------------------------------
-----------------------------------------
Aggregate (cost=12284.65..12284.66 rows=1 width=0) (actual
time=9.150..9.150 r
ows=1 loops=1)
Buffers: shared hit=1976
-> Bitmap Heap Scan on test (cost=91.78..12276.35 rows=3319
width=0) (actua
l time=2.338..6.866 rows=3712 loops=1)
Recheck Cond: (tenant = '000000000000001'::bpchar)
Filter: (created_date = '2011-06-30'::date)
Buffers: shared hit=1976
-> Bitmap Index Scan on i1 (cost=0.00..90.95 rows=3319
width=0) (actu
al time=2.063..2.063 rows=15179 loops=1)
Index Cond: (tenant = '000000000000001'::bpchar)
Buffers: shared hit=98
Total runtime: 9.206 ms
(10 rows)
=> update test set created_by = '000000000000001' where tenant =
'000000000000001';
UPDATE 3712
=> explain (analyze on, buffers on) select count(*) from test where
tenant = '000000000000001' and created_date = '2011-6-30';
QUERY PLAN
--------------------------------------------------------------------------------
-----------------------------------------
Aggregate (cost=12284.68..12284.69 rows=1 width=0) (actual
time=30.738..30.739
rows=1 loops=1)
Buffers: shared hit=2001
-> Bitmap Heap Scan on test (cost=91.78..12276.38 rows=3319
width=0) (actua
l time=2.589..28.361 rows=3712 loops=1)
Recheck Cond: (tenant = '000000000000001'::bpchar)
Filter: (created_date = '2011-06-30'::date)
Buffers: shared hit=2001
-> Bitmap Index Scan on i1 (cost=0.00..90.95 rows=3319
width=0) (actu
al time=2.301..2.301 rows=17123 loops=1)
Index Cond: (tenant = '000000000000001'::bpchar)
Buffers: shared hit=107
Total runtime: 30.785 ms
(10 rows)
----
There seems to be definitely something funky going on. Since created_by
is indexed it shouldn't do any HOT logic.
Is there any other information that I can provide? I'm happy to
recompile with a patch applied, etc.
Thanks.
-- Lars
--
Sent via pgsql-performance mailing list (pgsql-performance@xxxxxxxxxxxxxx)
To make changes to your subscription:
http://www.postgresql.org/mailpref/pgsql-performance