* Basic Connectivity from target(subscriber) to source (Publisher)-
postgres@targethost:~$ psql -h sourcedb-vip --port=5432 --user=dba -d sourcedb
Password for user dba:
psql (13.3)
Type "help" for help.
sourcedb=#
* Create Publication
sourcedb=# CREATE PUBLICATION omx_archive FOR TABLE
sample_table1, sample_table2, sample_table3
;
* Create subscription - Please see the table sample_tables exists on both and have the same structure
targetdb=# CREATE SUBSCRIPTION sub_omx_archive_tci
CONNECTION 'host=sourcedb-vip port=5432 user=dba dbname=sourcedb password=xxxxx'
PUBLICATION omx_archive with (enabled =true, create_slot = true, copy_data = true);
* Target DB replication origin status having 0/0 remote_lsb
targetdb=# select * from pg_replication_origin_status;
local_id | external_id | remote_lsn | local_lsn
----------+---------------+----------------+----------------
1 | pg_3615756798 | 9E98/9F62A7A8 | 15197/E0CA940
2 | pg_3616584803 | 0/0 | 0/0
(2 rows)
postgres@targethost:~$ ps -ef | grep subscription
postgres 30997 238163 0 18:07 pts/0 00:00:00 grep --color=auto subscription
postgres 168277 124365 1 13:46 ? 00:04:23 postgres: logical replication worker for subscription 3616584803
postgres 178901 124365 0 05:08 ? 00:00:08 postgres: logical replication worker for subscription 3615756798
* Strace for the subscription 3616584803 (PID 168277) - the one with the problem. The other one is okay and hence not shown
postgres@targethost:~$ strace -p 168277
strace: Process 168277 attached
epoll_wait(21, [], 1, 831) = 0
close(21) = 0
sendto(8, "d\0\0\0&r\0\0\236\230\343\275V\370\0\0\236\230\343\275V\370\0\0\236\230\343\275V\370\0\2"..., 39, MSG_NOSIGNAL, NULL, 0) = 39
recvfrom(8, 0x21f60f0, 16384, 0, NULL, NULL) = -1 EAGAIN (Resource temporarily unavailable)
epoll_create1(EPOLL_CLOEXEC) = 21
epoll_ctl(21, EPOLL_CTL_ADD, 12, {EPOLLIN|EPOLLERR|EPOLLHUP, {u32=34804168, u64=34804168}}) = 0
epoll_ctl(21, EPOLL_CTL_ADD, 3, {EPOLLIN|EPOLLERR|EPOLLHUP, {u32=34804192, u64=34804192}}) = 0
epoll_ctl(21, EPOLL_CTL_ADD, 8, {EPOLLIN|EPOLLERR|EPOLLHUP, {u32=34804216, u64=34804216}}) = 0
epoll_wait(21, [], 1, 1000) = 0
close(21) = 0
recvfrom(8, 0x21f60f0, 16384, 0, NULL, NULL) = -1 EAGAIN (Resource temporarily unavailable)
epoll_create1(EPOLL_CLOEXEC) = 21
epoll_ctl(21, EPOLL_CTL_ADD, 12, {EPOLLIN|EPOLLERR|EPOLLHUP, {u32=34804168, u64=34804168}}) = 0
epoll_ctl(21, EPOLL_CTL_ADD, 3, {EPOLLIN|EPOLLERR|EPOLLHUP, {u32=34804192, u64=34804192}}) = 0
epoll_ctl(21, EPOLL_CTL_ADD, 8, {EPOLLIN|EPOLLERR|EPOLLHUP, {u32=34804216, u64=34804216}}) = 0
epoll_wait(21, [], 1, 1000) = 0
close(21) = 0
sendto(8, "d\0\0\0&r\0\0\236\230\343\275V\370\0\0\236\230\343\275V\370\0\0\236\230\343\275V\370\0\2"..., 39, MSG_NOSIGNAL, NULL, 0) = 39
recvfrom(8, 0x21f60f0, 16384, 0, NULL, NULL) = -1 EAGAIN (Resource temporarily unavailable)
epoll_create1(EPOLL_CLOEXEC) = 21
epoll_ctl(21, EPOLL_CTL_ADD, 12, {EPOLLIN|EPOLLERR|EPOLLHUP, {u32=34804168, u64=34804168}}) = 0
epoll_ctl(21, EPOLL_CTL_ADD, 3, {EPOLLIN|EPOLLERR|EPOLLHUP, {u32=34804192, u64=34804192}}) = 0
epoll_ctl(21, EPOLL_CTL_ADD, 8, {EPOLLIN|EPOLLERR|EPOLLHUP, {u32=34804216, u64=34804216}}) = 0
epoll_wait(21, [], 1, 1000) = 0
close(21) = 0
recvfrom(8, 0x21f60f0, 16384, 0, NULL, NULL) = -1 EAGAIN (Resource temporarily unavailable)
epoll_create1(EPOLL_CLOEXEC) = 21
epoll_ctl(21, EPOLL_CTL_ADD, 12, {EPOLLIN|EPOLLERR|EPOLLHUP, {u32=34804168, u64=34804168}}) = 0
epoll_ctl(21, EPOLL_CTL_ADD, 3, {EPOLLIN|EPOLLERR|EPOLLHUP, {u32=34804192, u64=34804192}}) = 0
epoll_ctl(21, EPOLL_CTL_ADD, 8, {EPOLLIN|EPOLLERR|EPOLLHUP, {u32=34804216, u64=34804216}}) = 0
epoll_wait(21, [], 1, 1000) = 0
close(21) = 0
sendto(8, "d\0\0\0&r\0\0\236\230\343\275V\370\0\0\236\230\343\275V\370\0\0\236\230\343\275V\370\0\2"..., 39, MSG_NOSIGNAL, NULL, 0) = 39
recvfrom(8, 0x21f60f0, 16384, 0, NULL, NULL) = -1 EAGAIN (Resource temporarily unavailable)
sendto(9, "<133>Feb 3 18:08:21 postgres[16"..., 167, MSG_NOSIGNAL, NULL, 0) = 167
sendto(9, "<133>Feb 3 18:08:21 postgres[16"..., 184, MSG_NOSIGNAL, NULL, 0) = 184
write(2, "\0\0\371\0U\221\2\0T2022-02-03 18:08:21.440"..., 258) = 258
sendto(9, "<133>Feb 3 18:08:21 postgres[16"..., 167, MSG_NOSIGNAL, NULL, 0) = 167
sendto(9, "<133>Feb 3 18:08:21 postgres[16"..., 184, MSG_NOSIGNAL, NULL, 0) = 184
write(2, "\0\0\371\0U\221\2\0T2022-02-03 18:08:21.440"..., 258) = 258
sendto(9, "<133>Feb 3 18:08:21 postgres[16"..., 167, MSG_NOSIGNAL, NULL, 0) = 167
sendto(9, "<133>Feb 3 18:08:21 postgres[16"..., 184, MSG_NOSIGNAL, NULL, 0) = 184
write(2, "\0\0\371\0U\221\2\0T2022-02-03 18:08:21.440"..., 258) = 258
sendto(9, "<133>Feb 3 18:08:21 postgres[16"..., 167, MSG_NOSIGNAL, NULL, 0) = 167
sendto(9, "<133>Feb 3 18:08:21 postgres[16"..., 184, MSG_NOSIGNAL, NULL, 0) = 184
write(2, "\0\0\371\0U\221\2\0T2022-02-03 18:08:21.440"..., 258) = 258
sendto(9, "<133>Feb 3 18:08:21 postgres[16"..., 167, MSG_NOSIGNAL, NULL, 0) = 167
sendto(9, "<133>Feb 3 18:08:21 postgres[16"..., 184, MSG_NOSIGNAL, NULL, 0) = 184
write(2, "\0\0\371\0U\221\2\0T2022-02-03 18:08:21.441"..., 258) = 258
sendto(9, "<133>Feb 3 18:08:21 postgres[16"..., 167, MSG_NOSIGNAL, NULL, 0) = 167
sendto(9, "<133>Feb 3 18:08:21 postgres[16"..., 184, MSG_NOSIGNAL, NULL, 0) = 184
write(2, "\0\0\371\0U\221\2\0T2022-02-03 18:08:21.441"..., 258) = 258
sendto(9, "<133>Feb 3 18:08:21 postgres[16"..., 167, MSG_NOSIGNAL, NULL, 0) = 167
sendto(9, "<133>Feb 3 18:08:21 postgres[16"..., 184, MSG_NOSIGNAL, NULL, 0) = 184
write(2, "\0\0\371\0U\221\2\0T2022-02-03 18:08:21.441"..., 258) = 258
sendto(9, "<133>Feb 3 18:08:21 postgres[16"..., 167, MSG_NOSIGNAL, NULL, 0) = 167
sendto(9, "<133>Feb 3 18:08:21 postgres[16"..., 184, MSG_NOSIGNAL, NULL, 0) = 184
write(2, "\0\0\371\0U\221\2\0T2022-02-03 18:08:21.441"..., 258) = 258
sendto(9, "<133>Feb 3 18:08:21 postgres[16"..., 167, MSG_NOSIGNAL, NULL, 0) = 167
sendto(9, "<133>Feb 3 18:08:21 postgres[16"..., 184, MSG_NOSIGNAL, NULL, 0) = 184
write(2, "\0\0\371\0U\221\2\0T2022-02-03 18:08:21.442"..., 258) = 258
sendto(9, "<133>Feb 3 18:08:21 postgres[16"..., 167, MSG_NOSIGNAL, NULL, 0) = 167
sendto(9, "<133>Feb 3 18:08:21 postgres[16"..., 184, MSG_NOSIGNAL, NULL, 0) = 184
write(2, "\0\0\371\0U\221\2\0T2022-02-03 18:08:21.442"..., 258) = 258
sendto(9, "<133>Feb 3 18:08:21 postgres[16"..., 167, MSG_NOSIGNAL, NULL, 0) = 167
sendto(9, "<133>Feb 3 18:08:21 postgres[16"..., 184, MSG_NOSIGNAL, NULL, 0) = 184
write(2, "\0\0\371\0U\221\2\0T2022-02-03 18:08:21.442"..., 258) = 258
sendto(9, "<133>Feb 3 18:08:21 postgres[16"..., 167, MSG_NOSIGNAL, NULL, 0) = 167
sendto(9, "<133>Feb 3 18:08:21 postgres[16"..., 184, MSG_NOSIGNAL, NULL, 0) = 184
write(2, "\0\0\371\0U\221\2\0T2022-02-03 18:08:21.442"..., 258) = 258
sendto(9, "<133>Feb 3 18:08:21 postgres[16"..., 167, MSG_NOSIGNAL, NULL, 0) = 167
sendto(9, "<133>Feb 3 18:08:21 postgres[16"..., 184, MSG_NOSIGNAL, NULL, 0) = 184
* Source DB strace
sourcedb=# select * from pg_replication_slots ;
slot_name | plugin | slot_type | datoid | database | temporary | active | active_pid | xmin | catalog_xmin | restart_lsn | confirmed_flush_lsn | wal_status | safe_wal_size
--------------------------------+----------+-----------+--------+----------+-----------+--------+------------+------+--------------+---------------+---------------------+---
---------+---------------
sub_omx_archive_big_tables_tci | pgoutput | logical | 16452 | sourcedb | f | t | 76213 | | 78517101 | 9E98/E3D68AF8 | 9E98/E3D68B30 | reserved |
sub_omx_archive_tci | pgoutput | logical | 16452 | sourcedb | f | t | 14312 | | 78517101 | 9E98/E3D68AF8 | 9E98/E3D68B30 | reserved |
(2 rows)
postgres@sourcedb:~$ strace -p 14312
strace: Process 14312 attached
epoll_wait(8, [{EPOLLIN, {u32=33801296, u64=33801296}}], 1, 508) = 1
close(8) = 0
recvfrom(12, "d", 1, 0, NULL, NULL) = 1
recvfrom(12, "\0\0\0&r\0\0\236\230\343\326\214\210\0\0\236\230\343\326\214\210\0\0\236\230\343\326\214\210\0\2z"..., 8192, 0, NULL, NULL) = 38
recvfrom(12, 0x7ffd430d1967, 1, 0, NULL, NULL) = -1 EAGAIN (Resource temporarily unavailable)
epoll_create1(EPOLL_CLOEXEC) = 8
epoll_ctl(8, EPOLL_CTL_ADD, 13, {EPOLLIN|EPOLLERR|EPOLLHUP, {u32=33801248, u64=33801248}}) = 0
epoll_ctl(8, EPOLL_CTL_ADD, 3, {EPOLLIN|EPOLLERR|EPOLLHUP, {u32=33801272, u64=33801272}}) = 0
epoll_ctl(8, EPOLL_CTL_ADD, 12, {EPOLLIN|EPOLLERR|EPOLLHUP, {u32=33801296, u64=33801296}}) = 0
epoll_wait(8, [{EPOLLIN, {u32=33801296, u64=33801296}}], 1, 2500) = 1
close(8) = 0
recvfrom(12, "d", 1, 0, NULL, NULL) = 1
recvfrom(12, "\0\0\0&r\0\0\236\230\343\326\214\210\0\0\236\230\343\326\214\210\0\0\236\230\343\326\214\210\0\2z"..., 8192, 0, NULL, NULL) = 38
recvfrom(12, 0x7ffd430d1967, 1, 0, NULL, NULL) = -1 EAGAIN (Resource temporarily unavailable)
epoll_create1(EPOLL_CLOEXEC) = 8
epoll_ctl(8, EPOLL_CTL_ADD, 13, {EPOLLIN|EPOLLERR|EPOLLHUP, {u32=33801248, u64=33801248}}) = 0
epoll_ctl(8, EPOLL_CTL_ADD, 3, {EPOLLIN|EPOLLERR|EPOLLHUP, {u32=33801272, u64=33801272}}) = 0
epoll_ctl(8, EPOLL_CTL_ADD, 12, {EPOLLIN|EPOLLERR|EPOLLHUP, {u32=33801296, u64=33801296}}) = 0
epoll_wait(8, [{EPOLLIN, {u32=33801296, u64=33801296}}], 1, 2500) = 1
close(8) = 0
recvfrom(12, "d", 1, 0, NULL, NULL) = 1
recvfrom(12, "\0\0\0&r\0\0\236\230\343\326\214\210\0\0\236\230\343\326\214\210\0\0\236\230\343\326\214\210\0\2z"..., 8192, 0, NULL, NULL) = 38
recvfrom(12, 0x7ffd430d1967, 1, 0, NULL, NULL) = -1 EAGAIN (Resource temporarily unavailable)
epoll_create1(EPOLL_CLOEXEC) = 8
epoll_ctl(8, EPOLL_CTL_ADD, 13, {EPOLLIN|EPOLLERR|EPOLLHUP, {u32=33801248, u64=33801248}}) = 0
epoll_ctl(8, EPOLL_CTL_ADD, 3, {EPOLLIN|EPOLLERR|EPOLLHUP, {u32=33801272, u64=33801272}}) = 0
epoll_ctl(8, EPOLL_CTL_ADD, 12, {EPOLLIN|EPOLLERR|EPOLLHUP, {u32=33801296, u64=33801296}}) = 0
epoll_wait(8, [{EPOLLIN, {u32=33801296, u64=33801296}}], 1, 2500) = 1
close(8) = 0
recvfrom(12, "d", 1, 0, NULL, NULL) = 1
recvfrom(12, "\0\0\0&r\0\0\236\230\343\326\214\210\0\0\236\230\343\326\214\210\0\0\236\230\343\326\214\210\0\2z"..., 8192, 0, NULL, NULL) = 38
recvfrom(12, 0x7ffd430d1967, 1, 0, NULL, NULL) = -1 EAGAIN (Resource temporarily unavailable)
epoll_create1(EPOLL_CLOEXEC) = 8
epoll_ctl(8, EPOLL_CTL_ADD, 13, {EPOLLIN|EPOLLERR|EPOLLHUP, {u32=33801248, u64=33801248}}) = 0
epoll_ctl(8, EPOLL_CTL_ADD, 3, {EPOLLIN|EPOLLERR|EPOLLHUP, {u32=33801272, u64=33801272}}) = 0
epoll_ctl(8, EPOLL_CTL_ADD, 12, {EPOLLIN|EPOLLERR|EPOLLHUP, {u32=33801296, u64=33801296}}) = 0
close(8) = 0
recvfrom(12, "d", 1, 0, NULL, NULL) = 1
recvfrom(12, "\0\0\0&r\0\0\236\230\343\326\214\210\0\0\236\230\343\326\214\210\0\0\236\230\343\326\214\210\0\2z"..., 8192, 0, NULL, NULL) = 38
recvfrom(12, 0x7ffd430d1967, 1, 0, NULL, NULL) = -1 EAGAIN (Resource temporarily unavailable)
epoll_create1(EPOLL_CLOEXEC) = 8
epoll_ctl(8, EPOLL_CTL_ADD, 13, {EPOLLIN|EPOLLERR|EPOLLHUP, {u32=33801248, u64=33801248}}) = 0
epoll_ctl(8, EPOLL_CTL_ADD, 3, {EPOLLIN|EPOLLERR|EPOLLHUP, {u32=33801272, u64=33801272}}) = 0
epoll_ctl(8, EPOLL_CTL_ADD, 12, {EPOLLIN|EPOLLERR|EPOLLHUP, {u32=33801296, u64=33801296}}) = 0
epoll_wait(8, [{EPOLLIN, {u32=33801296, u64=33801296}}], 1, 2500) = 1
close(8) = 0
recvfrom(12, "d", 1, 0, NULL, NULL) = 1
recvfrom(12, "\0\0\0&r\0\0\236\230\343\326\214\210\0\0\236\230\343\326\214\210\0\0\236\230\343\326\214\210\0\2z"..., 8192, 0, NULL, NULL) = 38
recvfrom(12, 0x7ffd430d1967, 1, 0, NULL, NULL) = -1 EAGAIN (Resource temporarily unavailable)
epoll_create1(EPOLL_CLOEXEC) = 8
epoll_ctl(8, EPOLL_CTL_ADD, 13, {EPOLLIN|EPOLLERR|EPOLLHUP, {u32=33801248, u64=33801248}}) = 0
epoll_ctl(8, EPOLL_CTL_ADD, 3, {EPOLLIN|EPOLLERR|EPOLLHUP, {u32=33801272, u64=33801272}}) = 0
epoll_ctl(8, EPOLL_CTL_ADD, 12, {EPOLLIN|EPOLLERR|EPOLLHUP, {u32=33801296, u64=33801296}}) = 0
epoll_wait(8, [{EPOLLIN, {u32=33801296, u64=33801296}}], 1, 2500) = 1
close(8) = 0
recvfrom(12, "d", 1, 0, NULL, NULL) = 1
recvfrom(12, "\0\0\0&r\0\0\236\230\343\326\214\210\0\0\236\230\343\326\214\210\0\0\236\230\343\326\214\210\0\2z"..., 8192, 0, NULL, NULL) = 38
recvfrom(12, 0x7ffd430d1967, 1, 0, NULL, NULL) = -1 EAGAIN (Resource temporarily unavailable)
epoll_create1(EPOLL_CLOEXEC) = 8
epoll_ctl(8, EPOLL_CTL_ADD, 13, {EPOLLIN|EPOLLERR|EPOLLHUP, {u32=33801248, u64=33801248}}) = 0
epoll_ctl(8, EPOLL_CTL_ADD, 3, {EPOLLIN|EPOLLERR|EPOLLHUP, {u32=33801272, u64=33801272}}) = 0
epoll_ctl(8, EPOLL_CTL_ADD, 12, {EPOLLIN|EPOLLERR|EPOLLHUP, {u32=33801296, u64=33801296}}) = 0
epoll_wait(8, [{EPOLLIN, {u32=33801296, u64=33801296}}], 1, 2500) = 1
close(8) = 0
recvfrom(12, "d", 1, 0, NULL, NULL) = 1
recvfrom(12, "\0\0\0&r\0\0\236\230\343\326\214\210\0\0\236\230\343\326\214\210\0\0\236\230\343\326\214\210\0\2z"..., 8192, 0, NULL, NULL) = 38
recvfrom(12, 0x7ffd430d1967, 1, 0, NULL, NULL) = -1 EAGAIN (Resource temporarily unavailable)
epoll_create1(EPOLL_CLOEXEC) = 8
epoll_ctl(8, EPOLL_CTL_ADD, 13, {EPOLLIN|EPOLLERR|EPOLLHUP, {u32=33801248, u64=33801248}}) = 0
epoll_ctl(8, EPOLL_CTL_ADD, 3, {EPOLLIN|EPOLLERR|EPOLLHUP, {u32=33801272, u64=33801272}}) = 0
epoll_ctl(8, EPOLL_CTL_ADD, 12, {EPOLLIN|EPOLLERR|EPOLLHUP, {u32=33801296, u64=33801296}}) = 0
epoll_wait(8, [{EPOLLIN, {u32=33801296, u64=33801296}}], 1, 2500) = 1
close(8) = 0
recvfrom(12, "d", 1, 0, NULL, NULL) = 1
recvfrom(12, "\0\0\0&r\0\0\236\230\343\326\214\210\0\0\236\230\343\326\214\210\0\0\236\230\343\326\214\210\0\2z"..., 8192, 0, NULL, NULL) = 38
recvfrom(12, 0x7ffd430d1967, 1, 0, NULL, NULL) = -1 EAGAIN (Resource temporarily unavailable)
epoll_create1(EPOLL_CLOEXEC) = 8
epoll_ctl(8, EPOLL_CTL_ADD, 13, {EPOLLIN|EPOLLERR|EPOLLHUP, {u32=33801248, u64=33801248}}) = 0
epoll_ctl(8, EPOLL_CTL_ADD, 3, {EPOLLIN|EPOLLERR|EPOLLHUP, {u32=33801272, u64=33801272}}) = 0
epoll_ctl(8, EPOLL_CTL_ADD, 12, {EPOLLIN|EPOLLERR|EPOLLHUP, {u32=33801296, u64=33801296}}) = 0
epoll_wait(8, [{EPOLLIN, {u32=33801296, u64=33801296}}], 1, 2500) = 1
This one always says resource temporarily unavailable and doesn't change.
* FROM Publisher
sourcedb=# select * from pg_replication_slots;
slot_name | plugin | slot_type | datoid | database | temporary | active | active_pid | xmin | catalog_xmin | restart_lsn | confirmed_flush_lsn | wal_status | safe_wal_size
--------------------------------+----------+-----------+--------+----------+-----------+--------+------------+------+--------------+---------------+---------------------+------------+---------------
sub_omx_archive_big_tables_tci | pgoutput | logical | 16452 | sourcedb | f | t | 76213 | | 78517137 | 9E98/E3EAD248 | 9E98/E3EAD280 | reserved |
sub_omx_archive_tci | pgoutput | logical | 16452 | sourcedb | f | t | 14312 | | 78517137 | 9E98/E3EAD248 | 9E98/E3EAD280 | reserved |
(2 rows)
sourcedb=# select * from pg_stat_replication;
pid | usesysid | usename | application_name | client_addr | client_hostname | client_port | backend_start | backend_xmin | state | sent_lsn | write_lsn | flush_lsn | replay_lsn | write_lag | flush_lag | replay_lag | sync_priority | sync_state | reply_time
--------+------------+----------+--------------------------------+---------------+-----------------+-------------+-------------------------------+--------------+-----------+---------------+---------------+---------------+---------------+-----------+-----------+------------+---------------+------------+-------------------------------
76213 | 16420 | dba | sub_omx_archive_big_tables_tci | xx.xx.xxx.xxx | | 46048 | 2022-02-03 05:08:37.928898+09 | | streaming | 9E98/E3EAD3D8 | 9E98/E3EAD3D8 | 9E98/E3EAD3D8 | 9E98/E3EAD3D8 | | | | 0 | async | 2022-02-03 18:28:32.905235+09
14312 | 16420 | dba | sub_omx_archive_tci | xx.xx.xxx.xxx | | 11760 | 2022-02-03 13:46:25.855458+09 | | streaming | 9E98/E3EAD3D8 | 9E98/E3EAD3D8 | 9E98/E3EAD3D8 | 9E98/E3EAD3D8 | | | | 0 | async | 2022-02-03 18:28:32.882874+09
(2 rows)
sourcedb=# select * from pg_stat_activity where application_name like '%sub%';
datid | datname | pid | leader_pid | usesysid | usename | application_name | client_addr | client_hostname | client_port | backend_start| xact_start | query_start | state_change | wait_event_type | wait_event | state | backend_xid | backend_xmin | query | backend_type
-------+---------+-------+------------+----------+----------+--------------------------------+---------------+-----------------+-------------+-------------------------------+------------+-------------------------------+-------------------------------+-----------------+---------------------+--------+-------------+--------------+---------------------------------------------------------+--------------
16452 | sourcedb| 14312 | | 16420 | dba | sub_omx_archive_tci | xx.xx.xxx.xxx | | 11760 | 2022-02-03 13:46:25.855458+09| | 2022-02-03 13:46:25.860942+09 | 2022-02-03 13:46:25.862044+09 | Client | WalSenderWaitForWAL | active | | | SELECT pg_catalog.set_config('search_path', '', false); | walsender
16452 | sourcedb| 76213 | | 16420 | dba | sub_omx_archive_big_tables_tci | xx.xx.xxx.xxx | | 46048 | 2022-02-03 05:08:37.928898+09| | 2022-02-03 05:08:37.937348+09 | 2022-02-03 05:08:37.938772+09 | Client | WalSenderWaitForWAL | active | | | SELECT pg_catalog.set_config('search_path', '', false); | walsender
(2 rows)
postgres@sourcehost:~$ ps -ef | grep postgres | grep wal
postgres 14312 171767 2 13:46 ? 00:06:51 postgres: walsender dba xx.xx.xxx.xxx(11760) idle
postgres 76213 171767 1 05:08 ? 00:12:58 postgres: walsender dba xx.xx.xxx.xxx(46048) idle
* FROM Subscriber
* pg_subscription_rel <- Always in initialize phase
targetdb=# select * from pg_subscription_rel;
srsubid | srrelid | srsubstate | srsublsn
------------+------------+------------+---------------
3615756798 | 3322051793 | r | 9E7E/BF5F82D8
3615756798 | 3322051802 | r | 9E7E/C149BBD8
3616584803 | 3322052690 | i |
3616584803 | 3322052570 | i |
3616584803 | 3322052133 | i |
* pg_stat_subscription
targetdb=# select * from pg_stat_subscription ;
subid | subname | pid | relid | received_lsn | last_msg_send_time | last_msg_receipt_time | latest_end_lsn | latest_end_time
------------+--------------------------------+--------+-------+----------------+-------------------------------+-------------------------------+----------------+-------------------------------
3615756798 | sub_omx_archive_big_tables_tci | 178901 | | 9E98/E3E42DB0 | 2022-02-03 18:23:57.904696+09 | 2022-02-03 18:23:57.904768+09 | 9E98/E3E42DB0 | 2022-02-03 18:23:57.904696+09
3616584803 | sub_omx_archive_tci | 168277 | | 9E98/E3E42DB0 | 2022-02-03 18:23:57.904693+09 | 2022-02-03 18:23:57.904754+09 | 9E98/E3E42DB0 | 2022-02-03 18:23:57.904693+09
Sorry for the long message.
postgres@targethost:~$ psql -h sourcedb-vip --port=5432 --user=dba -d sourcedb
Password for user dba:
psql (13.3)
Type "help" for help.
sourcedb=#
* Create Publication
sourcedb=# CREATE PUBLICATION omx_archive FOR TABLE
sample_table1, sample_table2, sample_table3
;
* Create subscription - Please see the table sample_tables exists on both and have the same structure
targetdb=# CREATE SUBSCRIPTION sub_omx_archive_tci
CONNECTION 'host=sourcedb-vip port=5432 user=dba dbname=sourcedb password=xxxxx'
PUBLICATION omx_archive with (enabled =true, create_slot = true, copy_data = true);
* Target DB replication origin status having 0/0 remote_lsb
targetdb=# select * from pg_replication_origin_status;
local_id | external_id | remote_lsn | local_lsn
----------+---------------+----------------+----------------
1 | pg_3615756798 | 9E98/9F62A7A8 | 15197/E0CA940
2 | pg_3616584803 | 0/0 | 0/0
(2 rows)
postgres@targethost:~$ ps -ef | grep subscription
postgres 30997 238163 0 18:07 pts/0 00:00:00 grep --color=auto subscription
postgres 168277 124365 1 13:46 ? 00:04:23 postgres: logical replication worker for subscription 3616584803
postgres 178901 124365 0 05:08 ? 00:00:08 postgres: logical replication worker for subscription 3615756798
* Strace for the subscription 3616584803 (PID 168277) - the one with the problem. The other one is okay and hence not shown
postgres@targethost:~$ strace -p 168277
strace: Process 168277 attached
epoll_wait(21, [], 1, 831) = 0
close(21) = 0
sendto(8, "d\0\0\0&r\0\0\236\230\343\275V\370\0\0\236\230\343\275V\370\0\0\236\230\343\275V\370\0\2"..., 39, MSG_NOSIGNAL, NULL, 0) = 39
recvfrom(8, 0x21f60f0, 16384, 0, NULL, NULL) = -1 EAGAIN (Resource temporarily unavailable)
epoll_create1(EPOLL_CLOEXEC) = 21
epoll_ctl(21, EPOLL_CTL_ADD, 12, {EPOLLIN|EPOLLERR|EPOLLHUP, {u32=34804168, u64=34804168}}) = 0
epoll_ctl(21, EPOLL_CTL_ADD, 3, {EPOLLIN|EPOLLERR|EPOLLHUP, {u32=34804192, u64=34804192}}) = 0
epoll_ctl(21, EPOLL_CTL_ADD, 8, {EPOLLIN|EPOLLERR|EPOLLHUP, {u32=34804216, u64=34804216}}) = 0
epoll_wait(21, [], 1, 1000) = 0
close(21) = 0
recvfrom(8, 0x21f60f0, 16384, 0, NULL, NULL) = -1 EAGAIN (Resource temporarily unavailable)
epoll_create1(EPOLL_CLOEXEC) = 21
epoll_ctl(21, EPOLL_CTL_ADD, 12, {EPOLLIN|EPOLLERR|EPOLLHUP, {u32=34804168, u64=34804168}}) = 0
epoll_ctl(21, EPOLL_CTL_ADD, 3, {EPOLLIN|EPOLLERR|EPOLLHUP, {u32=34804192, u64=34804192}}) = 0
epoll_ctl(21, EPOLL_CTL_ADD, 8, {EPOLLIN|EPOLLERR|EPOLLHUP, {u32=34804216, u64=34804216}}) = 0
epoll_wait(21, [], 1, 1000) = 0
close(21) = 0
sendto(8, "d\0\0\0&r\0\0\236\230\343\275V\370\0\0\236\230\343\275V\370\0\0\236\230\343\275V\370\0\2"..., 39, MSG_NOSIGNAL, NULL, 0) = 39
recvfrom(8, 0x21f60f0, 16384, 0, NULL, NULL) = -1 EAGAIN (Resource temporarily unavailable)
epoll_create1(EPOLL_CLOEXEC) = 21
epoll_ctl(21, EPOLL_CTL_ADD, 12, {EPOLLIN|EPOLLERR|EPOLLHUP, {u32=34804168, u64=34804168}}) = 0
epoll_ctl(21, EPOLL_CTL_ADD, 3, {EPOLLIN|EPOLLERR|EPOLLHUP, {u32=34804192, u64=34804192}}) = 0
epoll_ctl(21, EPOLL_CTL_ADD, 8, {EPOLLIN|EPOLLERR|EPOLLHUP, {u32=34804216, u64=34804216}}) = 0
epoll_wait(21, [], 1, 1000) = 0
close(21) = 0
recvfrom(8, 0x21f60f0, 16384, 0, NULL, NULL) = -1 EAGAIN (Resource temporarily unavailable)
epoll_create1(EPOLL_CLOEXEC) = 21
epoll_ctl(21, EPOLL_CTL_ADD, 12, {EPOLLIN|EPOLLERR|EPOLLHUP, {u32=34804168, u64=34804168}}) = 0
epoll_ctl(21, EPOLL_CTL_ADD, 3, {EPOLLIN|EPOLLERR|EPOLLHUP, {u32=34804192, u64=34804192}}) = 0
epoll_ctl(21, EPOLL_CTL_ADD, 8, {EPOLLIN|EPOLLERR|EPOLLHUP, {u32=34804216, u64=34804216}}) = 0
epoll_wait(21, [], 1, 1000) = 0
close(21) = 0
sendto(8, "d\0\0\0&r\0\0\236\230\343\275V\370\0\0\236\230\343\275V\370\0\0\236\230\343\275V\370\0\2"..., 39, MSG_NOSIGNAL, NULL, 0) = 39
recvfrom(8, 0x21f60f0, 16384, 0, NULL, NULL) = -1 EAGAIN (Resource temporarily unavailable)
sendto(9, "<133>Feb 3 18:08:21 postgres[16"..., 167, MSG_NOSIGNAL, NULL, 0) = 167
sendto(9, "<133>Feb 3 18:08:21 postgres[16"..., 184, MSG_NOSIGNAL, NULL, 0) = 184
write(2, "\0\0\371\0U\221\2\0T2022-02-03 18:08:21.440"..., 258) = 258
sendto(9, "<133>Feb 3 18:08:21 postgres[16"..., 167, MSG_NOSIGNAL, NULL, 0) = 167
sendto(9, "<133>Feb 3 18:08:21 postgres[16"..., 184, MSG_NOSIGNAL, NULL, 0) = 184
write(2, "\0\0\371\0U\221\2\0T2022-02-03 18:08:21.440"..., 258) = 258
sendto(9, "<133>Feb 3 18:08:21 postgres[16"..., 167, MSG_NOSIGNAL, NULL, 0) = 167
sendto(9, "<133>Feb 3 18:08:21 postgres[16"..., 184, MSG_NOSIGNAL, NULL, 0) = 184
write(2, "\0\0\371\0U\221\2\0T2022-02-03 18:08:21.440"..., 258) = 258
sendto(9, "<133>Feb 3 18:08:21 postgres[16"..., 167, MSG_NOSIGNAL, NULL, 0) = 167
sendto(9, "<133>Feb 3 18:08:21 postgres[16"..., 184, MSG_NOSIGNAL, NULL, 0) = 184
write(2, "\0\0\371\0U\221\2\0T2022-02-03 18:08:21.440"..., 258) = 258
sendto(9, "<133>Feb 3 18:08:21 postgres[16"..., 167, MSG_NOSIGNAL, NULL, 0) = 167
sendto(9, "<133>Feb 3 18:08:21 postgres[16"..., 184, MSG_NOSIGNAL, NULL, 0) = 184
write(2, "\0\0\371\0U\221\2\0T2022-02-03 18:08:21.441"..., 258) = 258
sendto(9, "<133>Feb 3 18:08:21 postgres[16"..., 167, MSG_NOSIGNAL, NULL, 0) = 167
sendto(9, "<133>Feb 3 18:08:21 postgres[16"..., 184, MSG_NOSIGNAL, NULL, 0) = 184
write(2, "\0\0\371\0U\221\2\0T2022-02-03 18:08:21.441"..., 258) = 258
sendto(9, "<133>Feb 3 18:08:21 postgres[16"..., 167, MSG_NOSIGNAL, NULL, 0) = 167
sendto(9, "<133>Feb 3 18:08:21 postgres[16"..., 184, MSG_NOSIGNAL, NULL, 0) = 184
write(2, "\0\0\371\0U\221\2\0T2022-02-03 18:08:21.441"..., 258) = 258
sendto(9, "<133>Feb 3 18:08:21 postgres[16"..., 167, MSG_NOSIGNAL, NULL, 0) = 167
sendto(9, "<133>Feb 3 18:08:21 postgres[16"..., 184, MSG_NOSIGNAL, NULL, 0) = 184
write(2, "\0\0\371\0U\221\2\0T2022-02-03 18:08:21.441"..., 258) = 258
sendto(9, "<133>Feb 3 18:08:21 postgres[16"..., 167, MSG_NOSIGNAL, NULL, 0) = 167
sendto(9, "<133>Feb 3 18:08:21 postgres[16"..., 184, MSG_NOSIGNAL, NULL, 0) = 184
write(2, "\0\0\371\0U\221\2\0T2022-02-03 18:08:21.442"..., 258) = 258
sendto(9, "<133>Feb 3 18:08:21 postgres[16"..., 167, MSG_NOSIGNAL, NULL, 0) = 167
sendto(9, "<133>Feb 3 18:08:21 postgres[16"..., 184, MSG_NOSIGNAL, NULL, 0) = 184
write(2, "\0\0\371\0U\221\2\0T2022-02-03 18:08:21.442"..., 258) = 258
sendto(9, "<133>Feb 3 18:08:21 postgres[16"..., 167, MSG_NOSIGNAL, NULL, 0) = 167
sendto(9, "<133>Feb 3 18:08:21 postgres[16"..., 184, MSG_NOSIGNAL, NULL, 0) = 184
write(2, "\0\0\371\0U\221\2\0T2022-02-03 18:08:21.442"..., 258) = 258
sendto(9, "<133>Feb 3 18:08:21 postgres[16"..., 167, MSG_NOSIGNAL, NULL, 0) = 167
sendto(9, "<133>Feb 3 18:08:21 postgres[16"..., 184, MSG_NOSIGNAL, NULL, 0) = 184
write(2, "\0\0\371\0U\221\2\0T2022-02-03 18:08:21.442"..., 258) = 258
sendto(9, "<133>Feb 3 18:08:21 postgres[16"..., 167, MSG_NOSIGNAL, NULL, 0) = 167
sendto(9, "<133>Feb 3 18:08:21 postgres[16"..., 184, MSG_NOSIGNAL, NULL, 0) = 184
* Source DB strace
sourcedb=# select * from pg_replication_slots ;
slot_name | plugin | slot_type | datoid | database | temporary | active | active_pid | xmin | catalog_xmin | restart_lsn | confirmed_flush_lsn | wal_status | safe_wal_size
--------------------------------+----------+-----------+--------+----------+-----------+--------+------------+------+--------------+---------------+---------------------+---
---------+---------------
sub_omx_archive_big_tables_tci | pgoutput | logical | 16452 | sourcedb | f | t | 76213 | | 78517101 | 9E98/E3D68AF8 | 9E98/E3D68B30 | reserved |
sub_omx_archive_tci | pgoutput | logical | 16452 | sourcedb | f | t | 14312 | | 78517101 | 9E98/E3D68AF8 | 9E98/E3D68B30 | reserved |
(2 rows)
postgres@sourcedb:~$ strace -p 14312
strace: Process 14312 attached
epoll_wait(8, [{EPOLLIN, {u32=33801296, u64=33801296}}], 1, 508) = 1
close(8) = 0
recvfrom(12, "d", 1, 0, NULL, NULL) = 1
recvfrom(12, "\0\0\0&r\0\0\236\230\343\326\214\210\0\0\236\230\343\326\214\210\0\0\236\230\343\326\214\210\0\2z"..., 8192, 0, NULL, NULL) = 38
recvfrom(12, 0x7ffd430d1967, 1, 0, NULL, NULL) = -1 EAGAIN (Resource temporarily unavailable)
epoll_create1(EPOLL_CLOEXEC) = 8
epoll_ctl(8, EPOLL_CTL_ADD, 13, {EPOLLIN|EPOLLERR|EPOLLHUP, {u32=33801248, u64=33801248}}) = 0
epoll_ctl(8, EPOLL_CTL_ADD, 3, {EPOLLIN|EPOLLERR|EPOLLHUP, {u32=33801272, u64=33801272}}) = 0
epoll_ctl(8, EPOLL_CTL_ADD, 12, {EPOLLIN|EPOLLERR|EPOLLHUP, {u32=33801296, u64=33801296}}) = 0
epoll_wait(8, [{EPOLLIN, {u32=33801296, u64=33801296}}], 1, 2500) = 1
close(8) = 0
recvfrom(12, "d", 1, 0, NULL, NULL) = 1
recvfrom(12, "\0\0\0&r\0\0\236\230\343\326\214\210\0\0\236\230\343\326\214\210\0\0\236\230\343\326\214\210\0\2z"..., 8192, 0, NULL, NULL) = 38
recvfrom(12, 0x7ffd430d1967, 1, 0, NULL, NULL) = -1 EAGAIN (Resource temporarily unavailable)
epoll_create1(EPOLL_CLOEXEC) = 8
epoll_ctl(8, EPOLL_CTL_ADD, 13, {EPOLLIN|EPOLLERR|EPOLLHUP, {u32=33801248, u64=33801248}}) = 0
epoll_ctl(8, EPOLL_CTL_ADD, 3, {EPOLLIN|EPOLLERR|EPOLLHUP, {u32=33801272, u64=33801272}}) = 0
epoll_ctl(8, EPOLL_CTL_ADD, 12, {EPOLLIN|EPOLLERR|EPOLLHUP, {u32=33801296, u64=33801296}}) = 0
epoll_wait(8, [{EPOLLIN, {u32=33801296, u64=33801296}}], 1, 2500) = 1
close(8) = 0
recvfrom(12, "d", 1, 0, NULL, NULL) = 1
recvfrom(12, "\0\0\0&r\0\0\236\230\343\326\214\210\0\0\236\230\343\326\214\210\0\0\236\230\343\326\214\210\0\2z"..., 8192, 0, NULL, NULL) = 38
recvfrom(12, 0x7ffd430d1967, 1, 0, NULL, NULL) = -1 EAGAIN (Resource temporarily unavailable)
epoll_create1(EPOLL_CLOEXEC) = 8
epoll_ctl(8, EPOLL_CTL_ADD, 13, {EPOLLIN|EPOLLERR|EPOLLHUP, {u32=33801248, u64=33801248}}) = 0
epoll_ctl(8, EPOLL_CTL_ADD, 3, {EPOLLIN|EPOLLERR|EPOLLHUP, {u32=33801272, u64=33801272}}) = 0
epoll_ctl(8, EPOLL_CTL_ADD, 12, {EPOLLIN|EPOLLERR|EPOLLHUP, {u32=33801296, u64=33801296}}) = 0
epoll_wait(8, [{EPOLLIN, {u32=33801296, u64=33801296}}], 1, 2500) = 1
close(8) = 0
recvfrom(12, "d", 1, 0, NULL, NULL) = 1
recvfrom(12, "\0\0\0&r\0\0\236\230\343\326\214\210\0\0\236\230\343\326\214\210\0\0\236\230\343\326\214\210\0\2z"..., 8192, 0, NULL, NULL) = 38
recvfrom(12, 0x7ffd430d1967, 1, 0, NULL, NULL) = -1 EAGAIN (Resource temporarily unavailable)
epoll_create1(EPOLL_CLOEXEC) = 8
epoll_ctl(8, EPOLL_CTL_ADD, 13, {EPOLLIN|EPOLLERR|EPOLLHUP, {u32=33801248, u64=33801248}}) = 0
epoll_ctl(8, EPOLL_CTL_ADD, 3, {EPOLLIN|EPOLLERR|EPOLLHUP, {u32=33801272, u64=33801272}}) = 0
epoll_ctl(8, EPOLL_CTL_ADD, 12, {EPOLLIN|EPOLLERR|EPOLLHUP, {u32=33801296, u64=33801296}}) = 0
close(8) = 0
recvfrom(12, "d", 1, 0, NULL, NULL) = 1
recvfrom(12, "\0\0\0&r\0\0\236\230\343\326\214\210\0\0\236\230\343\326\214\210\0\0\236\230\343\326\214\210\0\2z"..., 8192, 0, NULL, NULL) = 38
recvfrom(12, 0x7ffd430d1967, 1, 0, NULL, NULL) = -1 EAGAIN (Resource temporarily unavailable)
epoll_create1(EPOLL_CLOEXEC) = 8
epoll_ctl(8, EPOLL_CTL_ADD, 13, {EPOLLIN|EPOLLERR|EPOLLHUP, {u32=33801248, u64=33801248}}) = 0
epoll_ctl(8, EPOLL_CTL_ADD, 3, {EPOLLIN|EPOLLERR|EPOLLHUP, {u32=33801272, u64=33801272}}) = 0
epoll_ctl(8, EPOLL_CTL_ADD, 12, {EPOLLIN|EPOLLERR|EPOLLHUP, {u32=33801296, u64=33801296}}) = 0
epoll_wait(8, [{EPOLLIN, {u32=33801296, u64=33801296}}], 1, 2500) = 1
close(8) = 0
recvfrom(12, "d", 1, 0, NULL, NULL) = 1
recvfrom(12, "\0\0\0&r\0\0\236\230\343\326\214\210\0\0\236\230\343\326\214\210\0\0\236\230\343\326\214\210\0\2z"..., 8192, 0, NULL, NULL) = 38
recvfrom(12, 0x7ffd430d1967, 1, 0, NULL, NULL) = -1 EAGAIN (Resource temporarily unavailable)
epoll_create1(EPOLL_CLOEXEC) = 8
epoll_ctl(8, EPOLL_CTL_ADD, 13, {EPOLLIN|EPOLLERR|EPOLLHUP, {u32=33801248, u64=33801248}}) = 0
epoll_ctl(8, EPOLL_CTL_ADD, 3, {EPOLLIN|EPOLLERR|EPOLLHUP, {u32=33801272, u64=33801272}}) = 0
epoll_ctl(8, EPOLL_CTL_ADD, 12, {EPOLLIN|EPOLLERR|EPOLLHUP, {u32=33801296, u64=33801296}}) = 0
epoll_wait(8, [{EPOLLIN, {u32=33801296, u64=33801296}}], 1, 2500) = 1
close(8) = 0
recvfrom(12, "d", 1, 0, NULL, NULL) = 1
recvfrom(12, "\0\0\0&r\0\0\236\230\343\326\214\210\0\0\236\230\343\326\214\210\0\0\236\230\343\326\214\210\0\2z"..., 8192, 0, NULL, NULL) = 38
recvfrom(12, 0x7ffd430d1967, 1, 0, NULL, NULL) = -1 EAGAIN (Resource temporarily unavailable)
epoll_create1(EPOLL_CLOEXEC) = 8
epoll_ctl(8, EPOLL_CTL_ADD, 13, {EPOLLIN|EPOLLERR|EPOLLHUP, {u32=33801248, u64=33801248}}) = 0
epoll_ctl(8, EPOLL_CTL_ADD, 3, {EPOLLIN|EPOLLERR|EPOLLHUP, {u32=33801272, u64=33801272}}) = 0
epoll_ctl(8, EPOLL_CTL_ADD, 12, {EPOLLIN|EPOLLERR|EPOLLHUP, {u32=33801296, u64=33801296}}) = 0
epoll_wait(8, [{EPOLLIN, {u32=33801296, u64=33801296}}], 1, 2500) = 1
close(8) = 0
recvfrom(12, "d", 1, 0, NULL, NULL) = 1
recvfrom(12, "\0\0\0&r\0\0\236\230\343\326\214\210\0\0\236\230\343\326\214\210\0\0\236\230\343\326\214\210\0\2z"..., 8192, 0, NULL, NULL) = 38
recvfrom(12, 0x7ffd430d1967, 1, 0, NULL, NULL) = -1 EAGAIN (Resource temporarily unavailable)
epoll_create1(EPOLL_CLOEXEC) = 8
epoll_ctl(8, EPOLL_CTL_ADD, 13, {EPOLLIN|EPOLLERR|EPOLLHUP, {u32=33801248, u64=33801248}}) = 0
epoll_ctl(8, EPOLL_CTL_ADD, 3, {EPOLLIN|EPOLLERR|EPOLLHUP, {u32=33801272, u64=33801272}}) = 0
epoll_ctl(8, EPOLL_CTL_ADD, 12, {EPOLLIN|EPOLLERR|EPOLLHUP, {u32=33801296, u64=33801296}}) = 0
epoll_wait(8, [{EPOLLIN, {u32=33801296, u64=33801296}}], 1, 2500) = 1
close(8) = 0
recvfrom(12, "d", 1, 0, NULL, NULL) = 1
recvfrom(12, "\0\0\0&r\0\0\236\230\343\326\214\210\0\0\236\230\343\326\214\210\0\0\236\230\343\326\214\210\0\2z"..., 8192, 0, NULL, NULL) = 38
recvfrom(12, 0x7ffd430d1967, 1, 0, NULL, NULL) = -1 EAGAIN (Resource temporarily unavailable)
epoll_create1(EPOLL_CLOEXEC) = 8
epoll_ctl(8, EPOLL_CTL_ADD, 13, {EPOLLIN|EPOLLERR|EPOLLHUP, {u32=33801248, u64=33801248}}) = 0
epoll_ctl(8, EPOLL_CTL_ADD, 3, {EPOLLIN|EPOLLERR|EPOLLHUP, {u32=33801272, u64=33801272}}) = 0
epoll_ctl(8, EPOLL_CTL_ADD, 12, {EPOLLIN|EPOLLERR|EPOLLHUP, {u32=33801296, u64=33801296}}) = 0
epoll_wait(8, [{EPOLLIN, {u32=33801296, u64=33801296}}], 1, 2500) = 1
This one always says resource temporarily unavailable and doesn't change.
* FROM Publisher
sourcedb=# select * from pg_replication_slots;
slot_name | plugin | slot_type | datoid | database | temporary | active | active_pid | xmin | catalog_xmin | restart_lsn | confirmed_flush_lsn | wal_status | safe_wal_size
--------------------------------+----------+-----------+--------+----------+-----------+--------+------------+------+--------------+---------------+---------------------+------------+---------------
sub_omx_archive_big_tables_tci | pgoutput | logical | 16452 | sourcedb | f | t | 76213 | | 78517137 | 9E98/E3EAD248 | 9E98/E3EAD280 | reserved |
sub_omx_archive_tci | pgoutput | logical | 16452 | sourcedb | f | t | 14312 | | 78517137 | 9E98/E3EAD248 | 9E98/E3EAD280 | reserved |
(2 rows)
sourcedb=# select * from pg_stat_replication;
pid | usesysid | usename | application_name | client_addr | client_hostname | client_port | backend_start | backend_xmin | state | sent_lsn | write_lsn | flush_lsn | replay_lsn | write_lag | flush_lag | replay_lag | sync_priority | sync_state | reply_time
--------+------------+----------+--------------------------------+---------------+-----------------+-------------+-------------------------------+--------------+-----------+---------------+---------------+---------------+---------------+-----------+-----------+------------+---------------+------------+-------------------------------
76213 | 16420 | dba | sub_omx_archive_big_tables_tci | xx.xx.xxx.xxx | | 46048 | 2022-02-03 05:08:37.928898+09 | | streaming | 9E98/E3EAD3D8 | 9E98/E3EAD3D8 | 9E98/E3EAD3D8 | 9E98/E3EAD3D8 | | | | 0 | async | 2022-02-03 18:28:32.905235+09
14312 | 16420 | dba | sub_omx_archive_tci | xx.xx.xxx.xxx | | 11760 | 2022-02-03 13:46:25.855458+09 | | streaming | 9E98/E3EAD3D8 | 9E98/E3EAD3D8 | 9E98/E3EAD3D8 | 9E98/E3EAD3D8 | | | | 0 | async | 2022-02-03 18:28:32.882874+09
(2 rows)
sourcedb=# select * from pg_stat_activity where application_name like '%sub%';
datid | datname | pid | leader_pid | usesysid | usename | application_name | client_addr | client_hostname | client_port | backend_start| xact_start | query_start | state_change | wait_event_type | wait_event | state | backend_xid | backend_xmin | query | backend_type
-------+---------+-------+------------+----------+----------+--------------------------------+---------------+-----------------+-------------+-------------------------------+------------+-------------------------------+-------------------------------+-----------------+---------------------+--------+-------------+--------------+---------------------------------------------------------+--------------
16452 | sourcedb| 14312 | | 16420 | dba | sub_omx_archive_tci | xx.xx.xxx.xxx | | 11760 | 2022-02-03 13:46:25.855458+09| | 2022-02-03 13:46:25.860942+09 | 2022-02-03 13:46:25.862044+09 | Client | WalSenderWaitForWAL | active | | | SELECT pg_catalog.set_config('search_path', '', false); | walsender
16452 | sourcedb| 76213 | | 16420 | dba | sub_omx_archive_big_tables_tci | xx.xx.xxx.xxx | | 46048 | 2022-02-03 05:08:37.928898+09| | 2022-02-03 05:08:37.937348+09 | 2022-02-03 05:08:37.938772+09 | Client | WalSenderWaitForWAL | active | | | SELECT pg_catalog.set_config('search_path', '', false); | walsender
(2 rows)
postgres@sourcehost:~$ ps -ef | grep postgres | grep wal
postgres 14312 171767 2 13:46 ? 00:06:51 postgres: walsender dba xx.xx.xxx.xxx(11760) idle
postgres 76213 171767 1 05:08 ? 00:12:58 postgres: walsender dba xx.xx.xxx.xxx(46048) idle
* FROM Subscriber
* pg_subscription_rel <- Always in initialize phase
targetdb=# select * from pg_subscription_rel;
srsubid | srrelid | srsubstate | srsublsn
------------+------------+------------+---------------
3615756798 | 3322051793 | r | 9E7E/BF5F82D8
3615756798 | 3322051802 | r | 9E7E/C149BBD8
3616584803 | 3322052690 | i |
3616584803 | 3322052570 | i |
3616584803 | 3322052133 | i |
* pg_stat_subscription
targetdb=# select * from pg_stat_subscription ;
subid | subname | pid | relid | received_lsn | last_msg_send_time | last_msg_receipt_time | latest_end_lsn | latest_end_time
------------+--------------------------------+--------+-------+----------------+-------------------------------+-------------------------------+----------------+-------------------------------
3615756798 | sub_omx_archive_big_tables_tci | 178901 | | 9E98/E3E42DB0 | 2022-02-03 18:23:57.904696+09 | 2022-02-03 18:23:57.904768+09 | 9E98/E3E42DB0 | 2022-02-03 18:23:57.904696+09
3616584803 | sub_omx_archive_tci | 168277 | | 9E98/E3E42DB0 | 2022-02-03 18:23:57.904693+09 | 2022-02-03 18:23:57.904754+09 | 9E98/E3E42DB0 | 2022-02-03 18:23:57.904693+09
Sorry for the long message.
On Thu, Feb 3, 2022 at 5:25 PM Vijaykumar Jain <vijaykumarjain.github@xxxxxxxxx> wrote:
On Thu, 3 Feb 2022 at 12:44, Abhishek Bhola <abhishek.bhola@xxxxxxxxxxxxxxx> wrote:Hi Vijaykumar,I checked the pg_subscription_rel and all the tables in that subscription are in the state - i (initialize).I also tried creating a new publication on the source DB with just one table and tried to subscribe it, it doesn't work either.However, when I try to subscribe it on some other DB than the one mentioned above, it works.By which I am deducing that publication and the source DB are okay, the problem is on the target DB and it's subscription.Maybe I will have to restart the DB as a last resort, but I am not sure if that will solve the problem either.its a very verbose mail, so if it noisy, kindly ignore.else,can you check basic connectivity from the subscriber to publisher using psql and run a simple query ?can you share your "create publication" and "create subscription" commands/statements too please?i am attaching a general logical replication setup on a single server and put some scenarios where replication breaks and how to monitor and how to resume.and how that is monitored.postgres@controller:~$ tail db1/postgresql.conf db2/postgresql.conf==> db1/postgresql.conf <==# Add settings for extensions herewal_level=logicalarchive_mode = onarchive_command = '/bin/true'max_wal_size = 48MBmin_wal_size = 32MBshared_buffers = 32MBport = 5001max_logical_replication_workers = 10==> db2/postgresql.conf <==# Add settings for extensions herewal_level=logicalarchive_mode = onarchive_command = '/bin/true'max_wal_size = 48MBmin_wal_size = 32MBshared_buffers = 32MBport = 5002max_logical_replication_workers = 10postgres@controller:~$ pg_ctl -D db1 -l db1.log startwaiting for server to start.... doneserver startedpostgres@controller:~$ pg_ctl -D db2 -l db2.log startwaiting for server to start.... doneserver startedpostgres@controller:~$ psql -p 5001psql (14.1 (Ubuntu 14.1-2.pgdg20.04+1))Type "help" for help.postgres=# \xExpanded display is on.postgres=# create table t1(id int primary key);CREATE TABLEpostgres=# create table t2(id int); -- this will throw error on delete, no replica identityCREATE TABLEpostgres=# insert into t1 select x from generate_series(1, 100) x;INSERT 0 100postgres=# insert into t2 select x from generate_series(1, 100) x;INSERT 0 100postgres=# checkpoint;CHECKPOINTpostgres=# \qpostgres@controller:~$ psql -p 5002psql (14.1 (Ubuntu 14.1-2.pgdg20.04+1))Type "help" for help.postgres=# create table t1(id int primary key);CREATE TABLEpostgres=# create table t2(id int);CREATE TABLEpostgres=# \qpostgres@controller:~$ ps aux | grep -i postgres:postgres 1116 0.0 0.4 113632 8232 ? Ss 13:24 0:00 postgres: checkpointerpostgres 1117 0.0 0.2 113496 5868 ? Ss 13:24 0:00 postgres: background writerpostgres 1118 0.0 0.3 113496 6964 ? Ss 13:24 0:00 postgres: walwriterpostgres 1119 0.0 0.4 114032 8432 ? Ss 13:24 0:00 postgres: autovacuum launcherpostgres 1120 0.0 0.2 113496 4132 ? Ss 13:24 0:00 postgres: archiverpostgres 1121 0.0 0.2 72112 4896 ? Ss 13:24 0:00 postgres: stats collectorpostgres 1122 0.0 0.3 113928 6732 ? Ss 13:24 0:00 postgres: logical replication launcherpostgres 1128 0.0 0.3 113496 5956 ? Ss 13:24 0:00 postgres: checkpointerpostgres 1129 0.0 0.3 113496 5916 ? Ss 13:24 0:00 postgres: background writerpostgres 1130 0.0 0.3 113496 6952 ? Ss 13:24 0:00 postgres: walwriterpostgres 1131 0.0 0.4 114032 8384 ? Ss 13:24 0:00 postgres: autovacuum launcherpostgres 1132 0.0 0.2 113496 4160 ? Ss 13:24 0:00 postgres: archiverpostgres 1133 0.0 0.2 72112 4868 ? Ss 13:24 0:00 postgres: stats collectorpostgres 1134 0.0 0.3 113928 6804 ? Ss 13:24 0:00 postgres: logical replication launcherpostgres 1186 0.0 0.0 8164 724 pts/0 S+ 13:26 0:00 grep -i postgres:postgres@controller:~$ psql -p 5001psql (14.1 (Ubuntu 14.1-2.pgdg20.04+1))Type "help" for help.postgres=# create publication mypub for all tablespostgres=# create publication mypub for all tables with (publish='insert,update,delete,truncate');CREATE PUBLICATIONpostgres=# \qpostgres@controller:~$ psql -p 5002psql (14.1 (Ubuntu 14.1-2.pgdg20.04+1))Type "help" for help.postgres=# create subscription mysub connection 'port=5001' publication mypub;NOTICE: created replication slot "mysub" on publisherCREATE SUBSCRIPTIONpostgres=# select count(1) from t1;count-------100(1 row)postgres=# select count(1) from t2;count-------100(1 row)postgres=# \qpostgres@controller:~$ ps aux | grep postgres: # strace the below pids for movementpostgres 1195 0.0 0.7 114800 14744 ? Ss 13:27 0:00 postgres: logical replication worker for subscription 16392postgres 1196 0.0 0.7 114728 14676 ? Ss 13:27 0:00 postgres: walsender postgres [local] START_REPLICATION# logical replication slot is active(=t) with an active pid (which we strace) and has movement of lsn (= healthy)postgres@controller:~$ psql -p 5001psql (14.1 (Ubuntu 14.1-2.pgdg20.04+1))Type "help" for help.postgres=# \xExpanded display is on.postgres=# select * from pg_replication_slots;-[ RECORD 1 ]-------+----------slot_name | mysubplugin | pgoutputslot_type | logicaldatoid | 13726database | postgrestemporary | factive | tactive_pid | 1196xmin |catalog_xmin | 740restart_lsn | 0/20308F0confirmed_flush_lsn | 0/2030928wal_status | reservedsafe_wal_size |two_phase | fpostgres=# \q#strace publication worker process , check if it is moving fineroot@controller:~# strace -p 1196strace: Process 1196 attachedepoll_wait(4, [{EPOLLIN, {u32=654228920, u64=94283776310712}}], 1, 29577) = 1recvfrom(9, "d", 1, 0, NULL, NULL) = 1recvfrom(9, "\0\0\0&r\0\0\0\0\2\3\t(\0\0\0\0\2\3\t(\0\0\0\0\2\3\t(\0\2z"..., 8192, 0, NULL, NULL) = 38recvfrom(9, 0x7fff98dfbf07, 1, 0, NULL, NULL) = -1 EAGAIN (Resource temporarily unavailable)sendto(9, "d\0\0\0\26k\0\0\0\0\2\3\t(\0\2z\27H\376\271\323\0", 23, 0, NULL, 0) = 23epoll_wait(4, [{EPOLLIN, {u32=654228920, u64=94283776310712}}], 1, 29999) = 1recvfrom(9, "d", 1, 0, NULL, NULL) = 1recvfrom(9, "\0\0\0&r\0\0\0\0\2\3\t(\0\0\0\0\2\3\t(\0\0\0\0\2\3\t(\0\2z"..., 8192, 0, NULL, NULL) = 38recvfrom(9, 0x7fff98dfbf07, 1, 0, NULL, NULL) = -1 EAGAIN (Resource temporarily unavailable)epoll_wait(4, [{EPOLLIN, {u32=654228920, u64=94283776310712}}], 1, 30000) = 1recvfrom(9, "d", 1, 0, NULL, NULL) = 1recvfrom(9, "\0\0\0&r\0\0\0\0\2\3\t(\0\0\0\0\2\3\t(\0\0\0\0\2\3\t(\0\2z"..., 8192, 0, NULL, NULL) = 38recvfrom(9, 0x7fff98dfbf07, 1, 0, NULL, NULL) = -1 EAGAIN (Resource temporarily unavailable)epoll_wait(4, [{EPOLLIN, {u32=654228920, u64=94283776310712}}], 1, 30000) = 1recvfrom(9, "d", 1, 0, NULL, NULL) = 1recvfrom(9, "\0\0\0&r\0\0\0\0\2\3\t(\0\0\0\0\2\3\t(\0\0\0\0\2\3\t(\0\2z"..., 8192, 0, NULL, NULL) = 38recvfrom(9, 0x7fff98dfbf07, 1, 0, NULL, NULL) = -1 EAGAIN (Resource temporarily unavailable)sendto(9, "d\0\0\0\26k\0\0\0\0\2\3\t(\0\2z\27J\311_\352\0", 23, 0, NULL, 0) = 23epoll_wait(4, [{EPOLLIN, {u32=654228920, u64=94283776310712}}], 1, 29999) = 1recvfrom(9, "d", 1, 0, NULL, NULL) = 1recvfrom(9, "\0\0\0&r\0\0\0\0\2\3\t(\0\0\0\0\2\3\t(\0\0\0\0\2\3\t(\0\2z"..., 8192, 0, NULL, NULL) = 38recvfrom(9, 0x7fff98dfbf07, 1, 0, NULL, NULL) = -1 EAGAIN (Resource temporarily unavailable)epoll_wait(4, [{EPOLLIN, {u32=654228920, u64=94283776310712}}], 1, 30000) = 1recvfrom(9, "d", 1, 0, NULL, NULL) = 1recvfrom(9, "\0\0\0&r\0\0\0\0\2\3\t(\0\0\0\0\2\3\t(\0\0\0\0\2\3\t(\0\2z"..., 8192, 0, NULL, NULL) = 38recvfrom(9, 0x7fff98dfbf07, 1, 0, NULL, NULL) = -1 EAGAIN (Resource temporarily unavailable)epoll_wait(4, [{EPOLLIN, {u32=654228920, u64=94283776310712}}], 1, 30000) = 1recvfrom(9, "d", 1, 0, NULL, NULL) = 1recvfrom(9, "\0\0\0&r\0\0\0\0\2\3\t(\0\0\0\0\2\3\t(\0\0\0\0\2\3\t(\0\2z"..., 8192, 0, NULL, NULL) = 38recvfrom(9, 0x7fff98dfbf07, 1, 0, NULL, NULL) = -1 EAGAIN (Resource temporarily unavailable)sendto(9, "d\0\0\0\26k\0\0\0\0\2\3\t(\0\2z\27L\224?s\0", 23, 0, NULL, 0) = 23epoll_wait(4, [{EPOLLIN, {u32=654228920, u64=94283776310712}}], 1, 30000) = 1recvfrom(9, "d", 1, 0, NULL, NULL) = 1recvfrom(9, "\0\0\0&r\0\0\0\0\2\3\t(\0\0\0\0\2\3\t(\0\0\0\0\2\3\t(\0\2z"..., 8192, 0, NULL, NULL) = 38recvfrom(9, 0x7fff98dfbf07, 1, 0, NULL, NULL) = -1 EAGAIN (Resource temporarily unavailable)epoll_wait(4, [{EPOLLIN, {u32=654228944, u64=94283776310736}}], 1, 30000) = 1read(10, "\27\0\0\0\0\0\0\0\0\0\0\0^\4\0\0p\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 1024) = 128recvfrom(9, 0x7fff98dfbf07, 1, 0, NULL, NULL) = -1 EAGAIN (Resource temporarily unavailable)pread64(6, "\r\321\5\0\1\0\0\0\0\0\3\2\0\0\0\0\0\3\0\0\0\0\0\0\2365\0\0\210\n\0\0"..., 8192, 196608) = 8192stat("pg_logical/snapshots/0-2030928.snap", 0x7fff98dfb6b0) = -1 ENOENT (No such file or directory)unlink("pg_logical/snapshots/0-2030928.snap.1196.tmp") = -1 ENOENT (No such file or directory)openat(AT_FDCWD, "pg_logical/snapshots/0-2030928.snap.1196.tmp", O_WRONLY|O_CREAT|O_EXCL, 0600) = 7write(7, "\1\340\241Q\346\223#\367\4\0\0\0\200\0\0\0\2\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 128) = 128fsync(7) = 0close(7) = 0openat(AT_FDCWD, "pg_logical/snapshots", O_RDONLY) = 7fsync(7) = 0close(7) = 0rename("pg_logical/snapshots/0-2030928.snap.1196.tmp", "pg_logical/snapshots/0-2030928.snap") = 0openat(AT_FDCWD, "pg_logical/snapshots/0-2030928.snap", O_RDWR) = 7fsync(7) = 0close(7) = 0openat(AT_FDCWD, "pg_logical/snapshots", O_RDONLY) = 7fsync(7) = 0close(7) = 0openat(AT_FDCWD, "pg_replslot/mysub/state.tmp", O_WRONLY|O_CREAT|O_EXCL, 0600) = 7write(7, "\241\34\5\1W\23\376Y\2\0\0\0\270\0\0\0mysub\0\0\0\0\0\0\0\0\0\0\0"..., 200) = 200fsync(7) = 0close(7) = 0rename("pg_replslot/mysub/state.tmp", "pg_replslot/mysub/state") = 0openat(AT_FDCWD, "pg_replslot/mysub/state", O_RDWR) = 7fsync(7) = 0close(7) = 0openat(AT_FDCWD, "pg_replslot/mysub", O_RDONLY) = 7fsync(7) = 0close(7) = 0openat(AT_FDCWD, "pg_replslot", O_RDONLY) = 7fsync(7) = 0close(7) = 0recvfrom(9, 0x7fff98dfc077, 1, 0, NULL, NULL) = -1 EAGAIN (Resource temporarily unavailable)recvfrom(9, 0x7fff98dfbf07, 1, 0, NULL, NULL) = -1 EAGAIN (Resource temporarily unavailable)pread64(6, "\r\321\5\0\1\0\0\0\0\0\3\2\0\0\0\0\0\3\0\0\0\0\0\0\2365\0\0\210\n\0\0"..., 8192, 196608) = 8192sendto(9, "d\0\0\0\26k\0\0\0\0\2\3\t`\0\2z\27M\223\336\351\0", 23, 0, NULL, 0) = 23recvfrom(9, "d", 1, 0, NULL, NULL) = 1recvfrom(9, "\0\0\0&r\0\0\0\0\2\3\t`\0\0\0\0\2\3\t`\0\0\0\0\2\3\t`\0\2z"..., 8192, 0, NULL, NULL) = 38recvfrom(9, 0x7fff98dfc077, 1, 0, NULL, NULL) = -1 EAGAIN (Resource temporarily unavailable)recvfrom(9, 0x7fff98dfbf07, 1, 0, NULL, NULL) = -1 EAGAIN (Resource temporarily unavailable)sendto(9, "d\0\0\0\26k\0\0\0\0\2\3\t\330\0\2z\27M\223\341P\0", 23, 0, NULL, 0) = 23epoll_wait(4, [{EPOLLIN, {u32=654228920, u64=94283776310712}}], 1, 30000) = 1recvfrom(9, "d", 1, 0, NULL, NULL) = 1recvfrom(9, "\0\0\0&r\0\0\0\0\2\3\t\330\0\0\0\0\2\3\t\330\0\0\0\0\2\3\t\330\0\2z"..., 8192, 0, NULL, NULL) = 38recvfrom(9, 0x7fff98dfbf07, 1, 0, NULL, NULL) = -1 EAGAIN (Resource temporarily unavailable)epoll_wait(4, [{EPOLLIN, {u32=654228944, u64=94283776310736}}], 1, 30000) = 1read(10, "\27\0\0\0\0\0\0\0\0\0\0\0^\4\0\0p\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 1024) = 128recvfrom(9, 0x7fff98dfbf07, 1, 0, NULL, NULL) = -1 EAGAIN (Resource temporarily unavailable)pread64(6, "\r\321\5\0\1\0\0\0\0\0\3\2\0\0\0\0\0\3\0\0\0\0\0\0\2365\0\0\210\n\0\0"..., 8192, 196608) = 8192stat("pg_logical/snapshots/0-20309D8.snap", 0x7fff98dfb6b0) = -1 ENOENT (No such file or directory)unlink("pg_logical/snapshots/0-20309D8.snap.1196.tmp") = -1 ENOENT (No such file or directory)openat(AT_FDCWD, "pg_logical/snapshots/0-20309D8.snap.1196.tmp", O_WRONLY|O_CREAT|O_EXCL, 0600) = 7write(7, "\1\340\241Q\336\33\320]\4\0\0\0\200\0\0\0\2\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 128) = 128fsync(7) = 0close(7) = 0openat(AT_FDCWD, "pg_logical/snapshots", O_RDONLY) = 7fsync(7) = 0close(7) = 0rename("pg_logical/snapshots/0-20309D8.snap.1196.tmp", "pg_logical/snapshots/0-20309D8.snap") = 0openat(AT_FDCWD, "pg_logical/snapshots/0-20309D8.snap", O_RDWR) = 7fsync(7) = 0close(7) = 0openat(AT_FDCWD, "pg_logical/snapshots", O_RDONLY) = 7fsync(7) = 0close(7) = 0openat(AT_FDCWD, "pg_replslot/mysub/state.tmp", O_WRONLY|O_CREAT|O_EXCL, 0600) = 7write(7, "\241\34\5\1\353A\375\303\2\0\0\0\270\0\0\0mysub\0\0\0\0\0\0\0\0\0\0\0"..., 200) = 200fsync(7) = 0close(7) = 0rename("pg_replslot/mysub/state.tmp", "pg_replslot/mysub/state") = 0openat(AT_FDCWD, "pg_replslot/mysub/state", O_RDWR) = 7fsync(7) = 0close(7) = 0openat(AT_FDCWD, "pg_replslot/mysub", O_RDONLY) = 7fsync(7) = 0close(7) = 0openat(AT_FDCWD, "pg_replslot", O_RDONLY) = 7fsync(7) = 0close(7) = 0recvfrom(9, 0x7fff98dfc077, 1, 0, NULL, NULL) = -1 EAGAIN (Resource temporarily unavailable)recvfrom(9, 0x7fff98dfbf07, 1, 0, NULL, NULL) = -1 EAGAIN (Resource temporarily unavailable)sendto(9, "d\0\0\0\26k\0\0\0\0\2\3\n\20\0\2z\27M\346W\275\0", 23, 0, NULL, 0) = 23epoll_wait(4, [{EPOLLIN, {u32=654228920, u64=94283776310712}}], 1, 24596) = 1recvfrom(9, "d", 1, 0, NULL, NULL) = 1recvfrom(9, "\0\0\0&r\0\0\0\0\2\3\n\20\0\0\0\0\2\3\n\20\0\0\0\0\2\3\n\20\0\2z"..., 8192, 0, NULL, NULL) = 38recvfrom(9, 0x7fff98dfbf07, 1, 0, NULL, NULL) = -1 EAGAIN (Resource temporarily unavailable)epoll_wait(4, [{EPOLLIN, {u32=654228944, u64=94283776310736}}], 1, 30000) = 1read(10, "\27\0\0\0\0\0\0\0\0\0\0\0\237\5\0\0p\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 1024) = 128recvfrom(9, 0x7fff98dfbf07, 1, 0, NULL, NULL) = -1 EAGAIN (Resource temporarily unavailable)pread64(6, "\r\321\5\0\1\0\0\0\0\0\3\2\0\0\0\0\0\3\0\0\0\0\0\0\2365\0\0\210\n\0\0"..., 8192, 196608) = 8192pread64(6, "\r\321\5\0\1\0\0\0\0 \3\2\0\0\0\0\21\10\0\0\0\0\0\0005\0\0forei"..., 8192, 204800) = 8192recvfrom(9, 0x7fff98dfc077, 1, 0, NULL, NULL) = -1 EAGAIN (Resource temporarily unavailable)pread64(6, "\r\321\5\0\1\0\0\0\0@\3\2\0\0\0\0U\7\0\0\0\0\0\0\0\0\0\0\0001\0\33"..., 8192, 212992) = 8192recvfrom(9, 0x7fff98dfc077, 1, 0, NULL, NULL) = -1 EAGAIN (Resource temporarily unavailable)pread64(6, "\r\321\5\0\1\0\0\0\0`\3\2\0\0\0\0a\16\0\0\0\0\0\0\v\30\0\337\4\0\0\203"..., 8192, 221184) = 8192recvfrom(9, 0x7fff98dfc077, 1, 0, NULL, NULL) = -1 EAGAIN (Resource temporarily unavailable)pread64(6, "\r\321\5\0\1\0\0\0\0\200\3\2\0\0\0\0\225\5\0\0\0\0\0\0\0\0\0\0\0>\0F"..., 8192, 229376) = 8192sendto(9, "d\0\0\0002w\0\0\0\0\2\3\n\20\0\0\0\0\2\3\n\20\0\2z\27N]\200\371B\0"..., 51, 0, NULL, 0) = 51brk(0x55c0270d1000) = 0x55c0270d1000brk(0x55c0270ca000) = 0x55c0270ca000openat(AT_FDCWD, "base/13726/3455", O_RDWR) = 7pread64(7, "\0\0\0\0h;g\1\0\0\0\0\330\2\360\24\360\37\4 \0\0\0\0\340\237 \0\320\237 \0"..., 8192, 8192) = 8192sendto(9, "d\0\0\0007w\0\0\0\0\2\3\233\370\0\0\0\0\2\3\233\370\0\2z\27N]\205\334C\0"..., 56, 0, NULL, 0) = 56rt_sigprocmask(SIG_SETMASK, [URG], NULL, 8) = 0sendto(8, "\27\0\0\0\220\0\0\0mysub\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 144, 0, NULL, 0) = 144recvfrom(9, "d", 1, 0, NULL, NULL) = 1recvfrom(9, "\0\0\0&r\0\0\0\0\2\3\233\370\0\0\0\0\2\3\233\370\0\0\0\0\2\3\233\370\0\2z"..., 8192, 0, NULL, NULL) = 38recvfrom(9, 0x7fff98dfc077, 1, 0, NULL, NULL) = -1 EAGAIN (Resource temporarily unavailable)recvfrom(9, 0x7fff98dfbf07, 1, 0, NULL, NULL) = -1 EAGAIN (Resource temporarily unavailable)epoll_wait(4, [{EPOLLIN, {u32=654228920, u64=94283776310712}}], 1, 30000) = 1recvfrom(9, "d", 1, 0, NULL, NULL) = 1recvfrom(9, "\0\0\0&r\0\0\0\0\2\3\233\370\0\0\0\0\2\3\233\370\0\0\0\0\2\3\233\370\0\2z"..., 8192, 0, NULL, NULL) = 38recvfrom(9, 0x7fff98dfbf07, 1, 0, NULL, NULL) = -1 EAGAIN (Resource temporarily unavailable)epoll_wait(4, [{EPOLLIN, {u32=654228944, u64=94283776310736}}], 1, 30000) = 1read(10, "\27\0\0\0\0\0\0\0\0\0\0\0^\4\0\0p\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 1024) = 128recvfrom(9, 0x7fff98dfbf07, 1, 0, NULL, NULL) = -1 EAGAIN (Resource temporarily unavailable)pread64(6, "\r\321\5\0\1\0\0\0\0\200\3\2\0\0\0\0\225\5\0\0\0\0\0\0\0\0\0\0\0>\0F"..., 8192, 229376) = 8192stat("pg_logical/snapshots/0-2039BF8.snap", 0x7fff98dfb6b0) = -1 ENOENT (No such file or directory)unlink("pg_logical/snapshots/0-2039BF8.snap.1196.tmp") = -1 ENOENT (No such file or directory)openat(AT_FDCWD, "pg_logical/snapshots/0-2039BF8.snap.1196.tmp", O_WRONLY|O_CREAT|O_EXCL, 0600) = 12write(12, "\1\340\241Q\326\370d\25\4\0\0\0\204\0\0\0\2\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"..., 132) = 132fsync(12) = 0close(12) = 0openat(AT_FDCWD, "pg_logical/snapshots", O_RDONLY) = 12fsync(12) = 0close(12) = 0rename("pg_logical/snapshots/0-2039BF8.snap.1196.tmp", "pg_logical/snapshots/0-2039BF8.snap") = 0openat(AT_FDCWD, "pg_logical/snapshots/0-2039BF8.snap", O_RDWR) = 12fsync(12) = 0close(12) = 0openat(AT_FDCWD, "pg_logical/snapshots", O_RDONLY) = 12fsync(12) = 0close(12) = 0openat(AT_FDCWD, "pg_replslot/mysub/state.tmp", O_WRONLY|O_CREAT|O_EXCL, 0600) = 12write(12, "\241\34\5\1>-\275R\2\0\0\0\270\0\0\0mysub\0\0\0\0\0\0\0\0\0\0\0"..., 200) = 200fsync(12) = 0close(12) = 0rename("pg_replslot/mysub/state.tmp", "pg_replslot/mysub/state") = 0openat(AT_FDCWD, "pg_replslot/mysub/state", O_RDWR) = 12fsync(12) = 0close(12) = 0openat(AT_FDCWD, "pg_replslot/mysub", O_RDONLY) = 12fsync(12) = 0close(12) = 0openat(AT_FDCWD, "pg_replslot", O_RDONLY) = 12fsync(12) = 0close(12) = 0openat(AT_FDCWD, "pg_replslot/mysub/state.tmp", O_WRONLY|O_CREAT|O_EXCL, 0600) = 12write(12, "\241\34\5\1\237\331q|\2\0\0\0\270\0\0\0mysub\0\0\0\0\0\0\0\0\0\0\0"..., 200) = 200fsync(12) = 0close(12) = 0rename("pg_replslot/mysub/state.tmp", "pg_replslot/mysub/state") = 0openat(AT_FDCWD, "pg_replslot/mysub/state", O_RDWR) = 12fsync(12) = 0close(12) = 0openat(AT_FDCWD, "pg_replslot/mysub", O_RDONLY) = 12fsync(12) = 0close(12) = 0openat(AT_FDCWD, "pg_replslot", O_RDONLY) = 12fsync(12) = 0close(12) = 0#check the subscription worker strace similarlystrace -p 1195# check the state og subscriptionpostgres@controller:~$ psql -p 5002psql (14.1 (Ubuntu 14.1-2.pgdg20.04+1))Type "help" for help.postgres=# \xExpanded display is on.postgres=# select * from pg_subscription_rel;-[ RECORD 1 ]---------srsubid | 16392srrelid | 16389srsubstate | rsrsublsn | 0/20308F0-[ RECORD 2 ]---------srsubid | 16392srrelid | 16384srsubstate | rsrsublsn | 0/2030928postgres=# select * from pg_stat_subscription;-[ RECORD 1 ]---------+---------------------------------subid | 16392subname | mysubpid | 1195relid |received_lsn | 0/2030928last_msg_send_time | 2022-02-03 13:30:10.969066+05:30last_msg_receipt_time | 2022-02-03 13:30:10.971322+05:30latest_end_lsn | 0/2030928latest_end_time | 2022-02-03 13:30:10.969066+05:30postgres=# \qpostgres@controller:~$ psql -p 5001psql (14.1 (Ubuntu 14.1-2.pgdg20.04+1))Type "help" for help.postgres=# \xExpanded display is on.postgres=# select * from pg_stat_replication;-[ RECORD 1 ]----+---------------------------------pid | 1196usesysid | 10usename | postgresapplication_name | mysubclient_addr |client_hostname |client_port | -1backend_start | 2022-02-03 13:27:40.690802+05:30backend_xmin |state | streamingsent_lsn | 0/2030928write_lsn | 0/2030928flush_lsn | 0/2030928replay_lsn | 0/2030928write_lag |flush_lag |replay_lag |sync_priority | 0sync_state | asyncreply_time | 2022-02-03 13:30:41.041007+05:30#i make ddl change which will break the subscription, as table not in syncpostgres=# alter table t2 add column col2 int default 0;ALTER TABLEpostgres=# select * from pg_stat_replication;-[ RECORD 1 ]----+---------------------------------pid | 1196usesysid | 10usename | postgresapplication_name | mysubclient_addr |client_hostname |client_port | -1backend_start | 2022-02-03 13:27:40.690802+05:30backend_xmin |state | streamingsent_lsn | 0/2039BF8write_lsn | 0/2039BF8flush_lsn | 0/2039BF8replay_lsn | 0/2039BF8write_lag | 00:00:00.001365flush_lag | 00:00:00.001365replay_lag | 00:00:00.001365sync_priority | 0sync_state | asyncreply_time | 2022-02-03 13:31:11.010197+05:30postgres=# \qpostgres@controller:~$ psql -p 5001psql (14.1 (Ubuntu 14.1-2.pgdg20.04+1))Type "help" for help.postgres=# insert into t2(id,col2) select 0,0 from generate_series(1, 100) x;INSERT 0 100postgres=# \qpostgres@controller:~$ psql -p 5002psql (14.1 (Ubuntu 14.1-2.pgdg20.04+1))Type "help" for help.postgres=# select count(1) from t2;count-------100 -- note this should be 200, but it did not add, so we need to check for errors(1 row)postgres=# \xExpanded display is on.postgres=# select * from pg_stat_subscription;-[ RECORD 1 ]---------+------subid | 16392subname | mysubpid | ----no pid, subscription is not activerelid |received_lsn |last_msg_send_time |last_msg_receipt_time |latest_end_lsn |latest_end_time |postgres@controller:~$ psql -p 5001psql (14.1 (Ubuntu 14.1-2.pgdg20.04+1))Type "help" for help.postgres=# \xExpanded display is on.postgres=# select * from pg_stat_replication;(0 rows)postgres=# select * from pg_replication_slots;-[ RECORD 1 ]-------+----------slot_name | mysubplugin | pgoutputslot_type | logicaldatoid | 13726database | postgrestemporary | factive | f -- active =f , publication broken, check logs, (strace would have exited as no process died)active_pid |xmin |catalog_xmin | 741restart_lsn | 0/2039BF8confirmed_flush_lsn | 0/2039C30wal_status | reservedsafe_wal_size |two_phase | fpostgres=# \qpostgres@controller:~$ tail -100 db1.log2022-02-03 13:32:06.873 IST [1456] STATEMENT: START_REPLICATION SLOT "mysub" LOGICAL 0/0 (proto_version '2', publication_names '"mypub"')2022-02-03 13:32:11.890 IST [1458] LOG: starting logical decoding for slot "mysub"2022-02-03 13:32:11.890 IST [1458] DETAIL: Streaming transactions committing after 0/2039C30, reading WAL from 0/2039BF8.2022-02-03 13:32:11.890 IST [1458] STATEMENT: START_REPLICATION SLOT "mysub" LOGICAL 0/0 (proto_version '2', publication_names '"mypub"')2022-02-03 13:32:11.891 IST [1458] LOG: logical decoding found consistent point at 0/2039BF82022-02-03 13:32:11.891 IST [1458] DETAIL: There are no running transactions.2022-02-03 13:32:11.891 IST [1458] STATEMENT: START_REPLICATION SLOT "mysub" LOGICAL 0/0 (proto_version '2', publication_names '"mypub"')2022-02-03 13:32:11.892 IST [1458] LOG: could not send data to client: Broken pipe2022-02-03 13:33:02.074 IST [1482] CONTEXT: slot "mysub", output plugin "pgoutput", in the change callback, associated LSN 0/203AAD82022-02-03 13:33:02.074 IST [1482] STATEMENT: START_REPLICATION SLOT "mysub" LOGICAL 0/0 (proto_version '2', publication_names '"mypub"')postgres@controller:~$ tail -100 db2.log2022-02-03 13:24:42.101 IST [1126] LOG: starting PostgreSQL 14.1 (Ubuntu 14.1-2.pgdg20.04+1) on x86_64-pc-linux-gnu, compiled by gcc (Ubuntu 9.3.0-17ubuntu1~20.04) 9.3.0, 64-bit2022-02-03 13:24:42.101 IST [1126] LOG: listening on IPv6 address "::1", port 50022022-02-03 13:24:42.101 IST [1126] LOG: listening on IPv4 address "127.0.0.1", port 50022022-02-03 13:24:42.104 IST [1126] LOG: listening on Unix socket "/var/run/postgresql/.s.PGSQL.5002"2022-02-03 13:24:42.109 IST [1127] LOG: database system was shut down at 2022-02-03 13:24:25 IST2022-02-03 13:24:42.112 IST [1126] LOG: database system is ready to accept connections2022-02-03 13:27:40.690 IST [1195] LOG: logical replication apply worker for subscription "mysub" has started2022-02-03 13:27:40.694 IST [1197] LOG: logical replication table synchronization worker for subscription "mysub", table "t2" has started2022-02-03 13:27:40.704 IST [1199] LOG: logical replication table synchronization worker for subscription "mysub", table "t1" has started2022-02-03 13:27:40.732 IST [1197] LOG: logical replication table synchronization worker for subscription "mysub", table "t2" has finished2022-02-03 13:27:40.746 IST [1199] LOG: logical replication table synchronization worker for subscription "mysub", table "t1" has finished2022-02-03 13:31:56.830 IST [1195] ERROR: logical replication target relation "public.t2" is missing replicated column: "col2"2022-02-03 13:31:56.831 IST [1126] LOG: background worker "logical replication worker" (PID 1195) exited with exit code 1postgres@controller:~$ psql -p 5002psql (14.1 (Ubuntu 14.1-2.pgdg20.04+1))Type "help" for help.postgres=# alter table t2 add column col2 int default 0;ALTER TABLEpostgres=# \qpostgres@controller:~$ tail -10 db2.log2022-02-03 13:33:27.156 IST [1495] LOG: logical replication apply worker for subscription "mysub" has started2022-02-03 13:33:27.164 IST [1495] ERROR: logical replication target relation "public.t2" is missing replicated column: "col2"2022-02-03 13:33:27.165 IST [1126] LOG: background worker "logical replication worker" (PID 1495) exited with exit code 12022-02-03 13:33:32.175 IST [1497] LOG: logical replication apply worker for subscription "mysub" has started2022-02-03 13:33:32.182 IST [1497] ERROR: logical replication target relation "public.t2" is missing replicated column: "col2"2022-02-03 13:33:32.183 IST [1126] LOG: background worker "logical replication worker" (PID 1497) exited with exit code 12022-02-03 13:33:37.194 IST [1502] LOG: logical replication apply worker for subscription "mysub" has started2022-02-03 13:33:37.201 IST [1502] ERROR: logical replication target relation "public.t2" is missing replicated column: "col2"2022-02-03 13:33:37.202 IST [1126] LOG: background worker "logical replication worker" (PID 1502) exited with exit code 12022-02-03 13:33:42.212 IST [1504] LOG: logical replication apply worker for subscription "mysub" has startedpostgres@controller:~$ psql -p 5002psql (14.1 (Ubuntu 14.1-2.pgdg20.04+1))Type "help" for help.postgres=# \xExpanded display is on.postgres=# select * from pg_subscription_rel;-[ RECORD 1 ]---------srsubid | 16392srrelid | 16389srsubstate | rsrsublsn | 0/20308F0-[ RECORD 2 ]---------srsubid | 16392srrelid | 16384srsubstate | rsrsublsn | 0/2030928postgres=# select * from pg_stat_subscription;-[ RECORD 1 ]---------+---------------------------------subid | 16392subname | mysubpid | 1504relid |received_lsn | 0/2040490last_msg_send_time | 2022-02-03 13:33:42.235423+05:30last_msg_receipt_time | 2022-02-03 13:33:42.235432+05:30latest_end_lsn | 0/2040490latest_end_time | 2022-02-03 13:33:42.235423+05:30postgres=# \qpostgres@controller:~$ psql -p 5002psql (14.1 (Ubuntu 14.1-2.pgdg20.04+1))Type "help" for help.postgres=# select count(1) from t2;count-------200(1 row)postgres=# \qpostgres@controller:~$ ps aux | grep postgres: # we need to strace the below new pids/process , as old ones were terminatedpostgres 1504 0.0 0.7 114904 14972 ? Ss 13:33 0:00 postgres: logical replication worker for subscription 16392postgres 1505 0.0 0.8 114864 15952 ? Ss 13:33 0:00 postgres: walsender postgres [local] START_REPLICATIONpostgres 1513 0.0 0.0 8160 672 pts/0 S+ 13:34 0:00 grep postgres:# check everything should be working.postgres@controller:~$ psql -p 5001psql (14.1 (Ubuntu 14.1-2.pgdg20.04+1))Type "help" for help.postgres=# delete from t2; -- as said earlier, delete would fail, needs a fixERROR: cannot delete from table "t2" because it does not have a replica identity and publishes deletesHINT: To enable deleting from the table, set REPLICA IDENTITY using ALTER TABLE.postgres=# delete from t1;DELETE 100postgres=# alter table t2 replica identity full; -- since no primary key or unique keyALTER TABLEpostgres=# delete from t2;DELETE 200postgres=# select count(1) from t2;count-------0(1 row)postgres=# select count(1) from t1;count-------0(1 row)postgres=# \qpostgres@controller:~$ psql -p 5002psql (14.1 (Ubuntu 14.1-2.pgdg20.04+1))Type "help" for help.postgres=# select count(1) from t1;count-------0(1 row)postgres=# select count(1) from t2;count-------0(1 row)so in short,share yourfrom publisherselect * from pg_replication_slots;select * from pg_stat_replication;select * from pg_stat_activity;ps aux | grep postgres: # ( publisher)from subscriberselect * from pg_stat_subscription;select * from pg_subscription_rel;ps aux | grep postgres: # ( subscriber)and logs of publisher and subscriber db (for errors, it there are no pids for above, logs should show errors)
This correspondence (including any attachments) is for the intended recipient(s) only. It may contain confidential or privileged information or both. No confidentiality or privilege is waived or lost by any mis-transmission. If you receive this correspondence by mistake, please contact the sender immediately, delete this correspondence (and all attachments) and destroy any hard copies. You must not use, disclose, copy, distribute or rely on any part of this correspondence (including any attachments) if you are not the intended recipient(s).