Hi,
PostgreSQL 9.6.2 on CentOS 7.3 x64.
This is my data set:
drop table if exists users;
drop table if exists ids;
create table users ( user_id int
, username varchar(50)
);
with generator as
( select a.*
from generate_series (1,3000000) a
order by random()
)
insert into users ( user_id
, username
)
select a
, md5(a::varchar)
from generator;
create unique index i_users on users ( user_id );
create table ids ( id int );
insert into ids (id) values ( generate_series ( 2500000, 3500000 ) );
create unique index i_ids on ids ( id );
analyze users;
analyze ids;
drop table if exists ids;
create table users ( user_id int
, username varchar(50)
);
with generator as
( select a.*
from generate_series (1,3000000) a
order by random()
)
insert into users ( user_id
, username
)
select a
, md5(a::varchar)
from generator;
create unique index i_users on users ( user_id );
create table ids ( id int );
insert into ids (id) values ( generate_series ( 2500000, 3500000 ) );
create unique index i_ids on ids ( id );
analyze users;
analyze ids;
I have set work_mem to a very low value intentionally for demonstration purposes:
postgres=# show work_mem;
work_mem
----------
16MB
(1 row)
postgres=# show shared_buffers ;
shared_buffers
----------------
128MB
(1 row)
work_mem
----------
16MB
(1 row)
postgres=# show shared_buffers ;
shared_buffers
----------------
128MB
(1 row)
When I run the following query ( I know that "not in" is not a good choice here ):
postgres=# select count(user_id) from users where user_id not in ( select id from ids);
... this seems to never complete (at least not within one hour).
Setting work_mem to 32MB and all is fine.
top shows the session at 100% CPU. No waits are listed in pg_stat_activity:
postgres=# select pid,wait_event_type,wait_event,state,query from pg_stat_activity ;
pid | wait_event_type | wait_event | state | query
-------+-----------------+------------+--------+------------------------------------------------------------------------------
17817 | | | active | select count(user_id) from users where user_id not in ( select id from ids);
17847 | | | active | select pid,wait_event_type,wait_event,state,query from pg_stat_activity ;
(2 rows)
pid | wait_event_type | wait_event | state | query
-------+-----------------+------------+--------+------------------------------------------------------------------------------
17817 | | | active | select count(user_id) from users where user_id not in ( select id from ids);
17847 | | | active | select pid,wait_event_type,wait_event,state,query from pg_stat_activity ;
(2 rows)
strace shows more ore less always this, so something is happening:
read(14, "\0\0\1\0\0\t\30\0\351G1\0\16\0\0\0\1\0\0\t\30\0\352G1\0\16\0\0\0\1\0"..., 8192) = 8192
read(14, "\1\0\0\t\30\0002J1\0\16\0\0\0\1\0\0\t\30\0003J1\0\16\0\0\0\1\0\0\t"..., 8192) = 8192
read(14, "\0\t\30\0{L1\0\16\0\0\0\1\0\0\t\30\0|L1\0\16\0\0\0\1\0\0\t\30\0"..., 8192) = 8192
read(14, "\30\0\304N1\0\16\0\0\0\1\0\0\t\30\0\305N1\0\16\0\0\0\1\0\0\t\30\0\306N"..., 8192) = 8192
read(14, "\rQ1\0\16\0\0\0\1\0\0\t\30\0\16Q1\0\16\0\0\0\1\0\0\t\30\0\17Q1\0"..., 8192) = 8192^C
read(14, "\1\0\0\t\30\0002J1\0\16\0\0\0\1\0\0\t\30\0003J1\0\16\0\0\0\1\0\0\t"..., 8192) = 8192
read(14, "\0\t\30\0{L1\0\16\0\0\0\1\0\0\t\30\0|L1\0\16\0\0\0\1\0\0\t\30\0"..., 8192) = 8192
read(14, "\30\0\304N1\0\16\0\0\0\1\0\0\t\30\0\305N1\0\16\0\0\0\1\0\0\t\30\0\306N"..., 8192) = 8192
read(14, "\rQ1\0\16\0\0\0\1\0\0\t\30\0\16Q1\0\16\0\0\0\1\0\0\t\30\0\17Q1\0"..., 8192) = 8192^C
postgres@pgbox:/u02/pgdata/PG962/ [PG962] ls -la /proc/17817/fd/
total 0
dr-x------. 2 postgres postgres 0 Apr 4 14:45 .
dr-xr-xr-x. 9 postgres postgres 0 Apr 4 14:34 ..
lr-x------. 1 postgres postgres 64 Apr 4 14:45 0 -> /dev/null
l-wx------. 1 postgres postgres 64 Apr 4 14:45 1 -> pipe:[58121]
lrwx------. 1 postgres postgres 64 Apr 4 14:45 10 -> socket:[58881]
lr-x------. 1 postgres postgres 64 Apr 4 14:45 11 -> pipe:[58882]
l-wx------. 1 postgres postgres 64 Apr 4 14:45 12 -> pipe:[58882]
lrwx------. 1 postgres postgres 64 Apr 4 14:45 13 -> /u02/pgdata/PG962/base/13323/16516
lrwx------. 1 postgres postgres 64 Apr 4 14:45 14 -> /u02/pgdata/PG962/base/pgsql_tmp/pgsql_tmp17817.1
l-wx------. 1 postgres postgres 64 Apr 4 14:45 2 -> pipe:[58121]
lrwx------. 1 postgres postgres 64 Apr 4 14:45 3 -> anon_inode:[eventpoll]
lrwx------. 1 postgres postgres 64 Apr 4 14:45 4 -> /u02/pgdata/PG962/base/13323/2601
lrwx------. 1 postgres postgres 64 Apr 4 14:45 5 -> /u02/pgdata/PG962/base/13323/16517
lr-x------. 1 postgres postgres 64 Apr 4 14:45 6 -> pipe:[58120]
lrwx------. 1 postgres postgres 64 Apr 4 14:45 7 -> /u02/pgdata/PG962/base/13323/16520
lrwx------. 1 postgres postgres 64 Apr 4 14:45 8 -> /u02/pgdata/PG962/base/13323/16513
lrwx------. 1 postgres postgres 64 Apr 4 14:45 9 -> socket:[58126]
total 0
dr-x------. 2 postgres postgres 0 Apr 4 14:45 .
dr-xr-xr-x. 9 postgres postgres 0 Apr 4 14:34 ..
lr-x------. 1 postgres postgres 64 Apr 4 14:45 0 -> /dev/null
l-wx------. 1 postgres postgres 64 Apr 4 14:45 1 -> pipe:[58121]
lrwx------. 1 postgres postgres 64 Apr 4 14:45 10 -> socket:[58881]
lr-x------. 1 postgres postgres 64 Apr 4 14:45 11 -> pipe:[58882]
l-wx------. 1 postgres postgres 64 Apr 4 14:45 12 -> pipe:[58882]
lrwx------. 1 postgres postgres 64 Apr 4 14:45 13 -> /u02/pgdata/PG962/base/13323/16516
lrwx------. 1 postgres postgres 64 Apr 4 14:45 14 -> /u02/pgdata/PG962/base/pgsql_tmp/pgsql_tmp17817.1
l-wx------. 1 postgres postgres 64 Apr 4 14:45 2 -> pipe:[58121]
lrwx------. 1 postgres postgres 64 Apr 4 14:45 3 -> anon_inode:[eventpoll]
lrwx------. 1 postgres postgres 64 Apr 4 14:45 4 -> /u02/pgdata/PG962/base/13323/2601
lrwx------. 1 postgres postgres 64 Apr 4 14:45 5 -> /u02/pgdata/PG962/base/13323/16517
lr-x------. 1 postgres postgres 64 Apr 4 14:45 6 -> pipe:[58120]
lrwx------. 1 postgres postgres 64 Apr 4 14:45 7 -> /u02/pgdata/PG962/base/13323/16520
lrwx------. 1 postgres postgres 64 Apr 4 14:45 8 -> /u02/pgdata/PG962/base/13323/16513
lrwx------. 1 postgres postgres 64 Apr 4 14:45 9 -> socket:[58126]
The size of the temp file does not change over time:
postgres@pgbox:/u02/pgdata/PG962/ [PG962] ls -lha /u02/pgdata/PG962/base/pgsql_tmp/pgsql_tmp17817.2
-rw-------. 1 postgres postgres 14M Apr 4 14:48 /u02/pgdata/PG962/base/pgsql_tmp/pgsql_tmp17817.2
-rw-------. 1 postgres postgres 14M Apr 4 14:48 /u02/pgdata/PG962/base/pgsql_tmp/pgsql_tmp17817.2
What do I miss here? Shouldn't this complete with 16MB work_mem as well, even when slower, but in less than one hour? Or is this expected?
Thanks for your help
Daniel