I check the status and they switch between being active
with history crawl and faulty with n/a every few second
[root@gfs1 ~]# tail -n 100 $(gluster volume
geo-replication gfsvol geo-rep-user@gfs4::gfsvol_rep config
log-file)
[2017-09-29 15:53:29.785386] I
[master(/gfs/brick2/gv0):1860:syncjob] Syncer: Sync Time
Taken duration=0.0357 num_files=1 job=3 return_code=12
[2017-09-29 15:53:29.785615] E
[resource(/gfs/brick2/gv0):208:errlog] Popen: command
returned error cmd=rsync
-aR0 --inplace --files-from=- --super --stats --numeric-ids
--no-implied-dirs --existing --xattrs --acls . -e ssh
-oPasswordAuthentication=no -oStrictHostKeyChecking=no -i
/var/lib/glusterd/geo-replication/secret.pem -p 22
-oControlMaster=auto -S
/tmp/gsyncd-aux-ssh-fdyDHm/78cf8b204207154de59d7ac32eee737f.sock
--compress geo-rep-user@gfs6:/proc/17554/cwd error=12
[2017-09-29 15:53:29.797259] I
[syncdutils(/gfs/brick2/gv0):271:finalize] <top>:
exiting.
[2017-09-29 15:53:29.799386] I
[repce(/gfs/brick2/gv0):92:service_loop] RepceServer:
terminating on reaching EOF.
[2017-09-29 15:53:29.799570] I
[syncdutils(/gfs/brick2/gv0):271:finalize] <top>:
exiting.
[2017-09-29 15:53:30.232007] I
[resource(/gfs/brick1/gv0):1772:connect_remote] SSH:
Initializing SSH connection between master and slave...
[2017-09-29 15:53:30.232738] I
[changelogagent(/gfs/brick1/gv0):73:__init__]
ChangelogAgent: Agent listining...
[2017-09-29 15:53:30.248094] I
[monitor(monitor):363:monitor] Monitor: worker died in
startup phase brick=/gfs/brick2/gv0
[2017-09-29 15:53:30.252793] I
[gsyncdstatus(monitor):242:set_worker_status] GeorepStatus:
Worker Status Change status=Faulty
[2017-09-29 15:53:30.742058] I
[master(/gfs/arbiter/gv0):1515:register] _GMaster: Working
dir path=/var/lib/misc/glusterfsd/gfsvol/ssh%3A%2F%2Fgeo-rep-user%4010.1.1.104%3Agluster%3A%2F%2F127.0.0.1%3Agfsvol_rep/40efd54bad1d5828a1221dd560de376f
[2017-09-29 15:53:30.742360] I
[resource(/gfs/arbiter/gv0):1654:service_loop] GLUSTER:
Register time time=1506700410
[2017-09-29 15:53:30.754738] I
[gsyncdstatus(/gfs/arbiter/gv0):275:set_active]
GeorepStatus: Worker Status Change status=Active
[2017-09-29 15:53:30.756040] I
[gsyncdstatus(/gfs/arbiter/gv0):247:set_worker_crawl_status]
GeorepStatus: Crawl Status Change status=History
Crawl
[2017-09-29 15:53:30.756280] I
[master(/gfs/arbiter/gv0):1429:crawl] _GMaster: starting
history crawl turns=1 stime=(1506637819,
0) entry_stime=None etime=1506700410
[2017-09-29 15:53:31.758335] I
[master(/gfs/arbiter/gv0):1458:crawl] _GMaster: slave's time stime=(1506637819,
0)
[2017-09-29 15:53:31.939471] I
[resource(/gfs/brick1/gv0):1779:connect_remote] SSH: SSH
connection between master and slave established. duration=1.7073
[2017-09-29 15:53:31.939665] I
[resource(/gfs/brick1/gv0):1494:connect] GLUSTER: Mounting
gluster volume locally...
[2017-09-29 15:53:32.284754] I
[master(/gfs/arbiter/gv0):1860:syncjob] Syncer: Sync Time
Taken duration=0.0372 num_files=1 job=3 return_code=12
[2017-09-29 15:53:32.284996] E
[resource(/gfs/arbiter/gv0):208:errlog] Popen: command
returned error cmd=rsync
-aR0 --inplace --files-from=- --super --stats --numeric-ids
--no-implied-dirs --existing --xattrs --acls . -e ssh
-oPasswordAuthentication=no -oStrictHostKeyChecking=no -i
/var/lib/glusterd/geo-replication/secret.pem -p 22
-oControlMaster=auto -S
/tmp/gsyncd-aux-ssh-i_wIMu/5f1d38555e12d0018fb6ed1e6bd63023.sock
--compress geo-rep-user@gfs5:/proc/8334/cwd error=12
[2017-09-29 15:53:32.300786] I
[syncdutils(/gfs/arbiter/gv0):271:finalize] <top>:
exiting.
[2017-09-29 15:53:32.303261] I
[repce(/gfs/arbiter/gv0):92:service_loop] RepceServer:
terminating on reaching EOF.
[2017-09-29 15:53:32.303452] I
[syncdutils(/gfs/arbiter/gv0):271:finalize] <top>:
exiting.
[2017-09-29 15:53:32.732858] I
[monitor(monitor):363:monitor] Monitor: worker died in
startup phase brick=/gfs/arbiter/gv0
[2017-09-29 15:53:32.736538] I
[gsyncdstatus(monitor):242:set_worker_status] GeorepStatus:
Worker Status Change status=Faulty
[2017-09-29 15:53:33.35219] I
[resource(/gfs/brick1/gv0):1507:connect] GLUSTER: Mounted
gluster volume duration=1.0954
[2017-09-29 15:53:33.35403] I
[gsyncd(/gfs/brick1/gv0):799:main_i] <top>: Closing
feedback fd, waking up the monitor
[2017-09-29 15:53:35.50920] I
[master(/gfs/brick1/gv0):1515:register] _GMaster: Working
dir path=/var/lib/misc/glusterfsd/gfsvol/ssh%3A%2F%2Fgeo-rep-user%4010.1.1.104%3Agluster%3A%2F%2F127.0.0.1%3Agfsvol_rep/f0393acbf9a1583960edbbd2f1dfb6b4
[2017-09-29 15:53:35.51227] I
[resource(/gfs/brick1/gv0):1654:service_loop] GLUSTER:
Register time time=1506700415
[2017-09-29 15:53:35.64343] I
[gsyncdstatus(/gfs/brick1/gv0):275:set_active] GeorepStatus:
Worker Status Change status=Active
[2017-09-29 15:53:35.65696] I
[gsyncdstatus(/gfs/brick1/gv0):247:set_worker_crawl_status]
GeorepStatus: Crawl Status Change status=History
Crawl
[2017-09-29 15:53:35.65915] I
[master(/gfs/brick1/gv0):1429:crawl] _GMaster: starting
history crawl turns=1 stime=(1506637819,
0) entry_stime=None etime=1506700415
[2017-09-29 15:53:36.68135] I
[master(/gfs/brick1/gv0):1458:crawl] _GMaster: slave's time stime=(1506637819,
0)
[2017-09-29 15:53:36.578717] I
[master(/gfs/brick1/gv0):1860:syncjob] Syncer: Sync Time
Taken duration=0.0376 num_files=1 job=1 return_code=12
[2017-09-29 15:53:36.578946] E
[resource(/gfs/brick1/gv0):208:errlog] Popen: command
returned error cmd=rsync
-aR0 --inplace --files-from=- --super --stats --numeric-ids
--no-implied-dirs --existing --xattrs --acls . -e ssh
-oPasswordAuthentication=no -oStrictHostKeyChecking=no -i
/var/lib/glusterd/geo-replication/secret.pem -p 22
-oControlMaster=auto -S
/tmp/gsyncd-aux-ssh-2pGnVA/78cf8b204207154de59d7ac32eee737f.sock
--compress geo-rep-user@gfs6:/proc/17648/cwd error=12
[2017-09-29 15:53:36.590887] I
[syncdutils(/gfs/brick1/gv0):271:finalize] <top>:
exiting.
[2017-09-29 15:53:36.596421] I
[repce(/gfs/brick1/gv0):92:service_loop] RepceServer:
terminating on reaching EOF.
[2017-09-29 15:53:36.596635] I
[syncdutils(/gfs/brick1/gv0):271:finalize] <top>:
exiting.
[2017-09-29 15:53:37.41075] I
[monitor(monitor):363:monitor] Monitor: worker died in
startup phase brick=/gfs/brick1/gv0
[2017-09-29 15:53:37.44637] I
[gsyncdstatus(monitor):242:set_worker_status] GeorepStatus:
Worker Status Change status=Faulty
[2017-09-29 15:53:40.484637] I
[resource(/gfs/brick2/gv0):1772:connect_remote] SSH:
Initializing SSH connection between master and slave...
[2017-09-29 15:53:40.497215] I
[changelogagent(/gfs/brick2/gv0):73:__init__]
ChangelogAgent: Agent listining...
[2017-09-29 15:53:42.278539] I
[resource(/gfs/brick2/gv0):1779:connect_remote] SSH: SSH
connection between master and slave established. duration=1.7936
[2017-09-29 15:53:42.278747] I
[resource(/gfs/brick2/gv0):1494:connect] GLUSTER: Mounting
gluster volume locally...
[2017-09-29 15:53:42.985567] I
[resource(/gfs/arbiter/gv0):1772:connect_remote] SSH:
Initializing SSH connection between master and slave...
[2017-09-29 15:53:42.986390] I
[changelogagent(/gfs/arbiter/gv0):73:__init__]
ChangelogAgent: Agent listining...
[2017-09-29 15:53:43.377480] I
[resource(/gfs/brick2/gv0):1507:connect] GLUSTER: Mounted
gluster volume duration=1.0986
[2017-09-29 15:53:43.377681] I
[gsyncd(/gfs/brick2/gv0):799:main_i] <top>: Closing
feedback fd, waking up the monitor
[2017-09-29 15:53:44.767873] I
[resource(/gfs/arbiter/gv0):1779:connect_remote] SSH: SSH
connection between master and slave established. duration=1.7821
[2017-09-29 15:53:44.768059] I
[resource(/gfs/arbiter/gv0):1494:connect] GLUSTER: Mounting
gluster volume locally...
[2017-09-29 15:53:45.393150] I
[master(/gfs/brick2/gv0):1515:register] _GMaster: Working
dir path=/var/lib/misc/glusterfsd/gfsvol/ssh%3A%2F%2Fgeo-rep-user%4010.1.1.104%3Agluster%3A%2F%2F127.0.0.1%3Agfsvol_rep/1eb15856c627f181513bf23f8bf2f9d0
[2017-09-29 15:53:45.393373] I
[resource(/gfs/brick2/gv0):1654:service_loop] GLUSTER:
Register time time=1506700425
[2017-09-29 15:53:45.404992] I
[gsyncdstatus(/gfs/brick2/gv0):275:set_active] GeorepStatus:
Worker Status Change status=Active
[2017-09-29 15:53:45.406404] I
[gsyncdstatus(/gfs/brick2/gv0):247:set_worker_crawl_status]
GeorepStatus: Crawl Status Change status=History
Crawl
[2017-09-29 15:53:45.406660] I
[master(/gfs/brick2/gv0):1429:crawl] _GMaster: starting
history crawl turns=1 stime=(1506637819,
0) entry_stime=None etime=1506700425
[2017-09-29 15:53:45.863256] I
[resource(/gfs/arbiter/gv0):1507:connect] GLUSTER: Mounted
gluster volume duration=1.0950
[2017-09-29 15:53:45.863430] I
[gsyncd(/gfs/arbiter/gv0):799:main_i] <top>: Closing
feedback fd, waking up the monitor
[2017-09-29 15:53:46.408814] I
[master(/gfs/brick2/gv0):1458:crawl] _GMaster: slave's time stime=(1506637819,
0)
[2017-09-29 15:53:46.920937] I
[master(/gfs/brick2/gv0):1860:syncjob] Syncer: Sync Time
Taken duration=0.0363 num_files=1 job=3 return_code=12
[2017-09-29 15:53:46.921140] E
[resource(/gfs/brick2/gv0):208:errlog] Popen: command
returned error cmd=rsync
-aR0 --inplace --files-from=- --super --stats --numeric-ids
--no-implied-dirs --existing --xattrs --acls . -e ssh
-oPasswordAuthentication=no -oStrictHostKeyChecking=no -i
/var/lib/glusterd/geo-replication/secret.pem -p 22
-oControlMaster=auto -S
/tmp/gsyncd-aux-ssh-DCruqU/78cf8b204207154de59d7ac32eee737f.sock
--compress geo-rep-user@gfs6:/proc/17747/cwd error=12
[2017-09-29 15:53:46.937288] I
[syncdutils(/gfs/brick2/gv0):271:finalize] <top>:
exiting.
[2017-09-29 15:53:46.940479] I
[repce(/gfs/brick2/gv0):92:service_loop] RepceServer:
terminating on reaching EOF.
[2017-09-29 15:53:46.940772] I
[syncdutils(/gfs/brick2/gv0):271:finalize] <top>:
exiting.
[2017-09-29 15:53:47.303791] I
[resource(/gfs/brick1/gv0):1772:connect_remote] SSH:
Initializing SSH connection between master and slave...
[2017-09-29 15:53:47.316878] I
[changelogagent(/gfs/brick1/gv0):73:__init__]
ChangelogAgent: Agent listining...
[2017-09-29 15:53:47.382605] I
[monitor(monitor):363:monitor] Monitor: worker died in
startup phase brick=/gfs/brick2/gv0
[2017-09-29 15:53:47.387926] I
[gsyncdstatus(monitor):242:set_worker_status] GeorepStatus:
Worker Status Change status=Faulty
[2017-09-29 15:53:47.876825] I
[master(/gfs/arbiter/gv0):1515:register] _GMaster: Working
dir path=/var/lib/misc/glusterfsd/gfsvol/ssh%3A%2F%2Fgeo-rep-user%4010.1.1.104%3Agluster%3A%2F%2F127.0.0.1%3Agfsvol_rep/40efd54bad1d5828a1221dd560de376f
[2017-09-29 15:53:47.877044] I
[resource(/gfs/arbiter/gv0):1654:service_loop] GLUSTER:
Register time time=1506700427
[2017-09-29 15:53:47.888930] I
[gsyncdstatus(/gfs/arbiter/gv0):275:set_active]
GeorepStatus: Worker Status Change status=Active
[2017-09-29 15:53:47.890043] I
[gsyncdstatus(/gfs/arbiter/gv0):247:set_worker_crawl_status]
GeorepStatus: Crawl Status Change status=History
Crawl
[2017-09-29 15:53:47.890285] I
[master(/gfs/arbiter/gv0):1429:crawl] _GMaster: starting
history crawl turns=1 stime=(1506637819,
0) entry_stime=None etime=1506700427
[2017-09-29 15:53:48.891966] I
[master(/gfs/arbiter/gv0):1458:crawl] _GMaster: slave's time stime=(1506637819,
0)
[2017-09-29 15:53:48.998140] I
[resource(/gfs/brick1/gv0):1779:connect_remote] SSH: SSH
connection between master and slave established. duration=1.6942
[2017-09-29 15:53:48.998330] I
[resource(/gfs/brick1/gv0):1494:connect] GLUSTER: Mounting
gluster volume locally...
[2017-09-29 15:53:49.406749] I
[master(/gfs/arbiter/gv0):1860:syncjob] Syncer: Sync Time
Taken duration=0.0383 num_files=1 job=2 return_code=12
[2017-09-29 15:53:49.406999] E
[resource(/gfs/arbiter/gv0):208:errlog] Popen: command
returned error cmd=rsync
-aR0 --inplace --files-from=- --super --stats --numeric-ids
--no-implied-dirs --existing --xattrs --acls . -e ssh
-oPasswordAuthentication=no -oStrictHostKeyChecking=no -i
/var/lib/glusterd/geo-replication/secret.pem -p 22
-oControlMaster=auto -S
/tmp/gsyncd-aux-ssh-5VeNKp/5f1d38555e12d0018fb6ed1e6bd63023.sock
--compress geo-rep-user@gfs5:/proc/8448/cwd error=12
[2017-09-29 15:53:49.426301] I
[syncdutils(/gfs/arbiter/gv0):271:finalize] <top>:
exiting.
[2017-09-29 15:53:49.428428] I
[repce(/gfs/arbiter/gv0):92:service_loop] RepceServer:
terminating on reaching EOF.
[2017-09-29 15:53:49.428618] I
[syncdutils(/gfs/arbiter/gv0):271:finalize] <top>:
exiting.
[2017-09-29 15:53:49.868974] I
[monitor(monitor):363:monitor] Monitor: worker died in
startup phase brick=/gfs/arbiter/gv0
[2017-09-29 15:53:49.872705] I
[gsyncdstatus(monitor):242:set_worker_status] GeorepStatus:
Worker Status Change status=Faulty
[2017-09-29 15:53:50.78377] I
[resource(/gfs/brick1/gv0):1507:connect] GLUSTER: Mounted
gluster volume duration=1.0799
[2017-09-29 15:53:50.78643] I
[gsyncd(/gfs/brick1/gv0):799:main_i] <top>: Closing
feedback fd, waking up the monitor
[2017-09-29 15:53:52.93027] I
[master(/gfs/brick1/gv0):1515:register] _GMaster: Working
dir path=/var/lib/misc/glusterfsd/gfsvol/ssh%3A%2F%2Fgeo-rep-user%4010.1.1.104%3Agluster%3A%2F%2F127.0.0.1%3Agfsvol_rep/f0393acbf9a1583960edbbd2f1dfb6b4
[2017-09-29 15:53:52.93331] I
[resource(/gfs/brick1/gv0):1654:service_loop] GLUSTER:
Register time time=1506700432
[2017-09-29 15:53:52.107558] I
[gsyncdstatus(/gfs/brick1/gv0):275:set_active] GeorepStatus:
Worker Status Change status=Active
[2017-09-29 15:53:52.108943] I
[gsyncdstatus(/gfs/brick1/gv0):247:set_worker_crawl_status]
GeorepStatus: Crawl Status Change status=History
Crawl
[2017-09-29 15:53:52.109178] I
[master(/gfs/brick1/gv0):1429:crawl] _GMaster: starting
history crawl turns=1 stime=(1506637819,
0) entry_stime=None etime=1506700432
[2017-09-29 15:53:53.111017] I
[master(/gfs/brick1/gv0):1458:crawl] _GMaster: slave's time stime=(1506637819,
0)
[2017-09-29 15:53:53.622422] I
[master(/gfs/brick1/gv0):1860:syncjob] Syncer: Sync Time
Taken duration=0.0369 num_files=1 job=2 return_code=12
[2017-09-29 15:53:53.622683] E
[resource(/gfs/brick1/gv0):208:errlog] Popen: command
returned error cmd=rsync
-aR0 --inplace --files-from=- --super --stats --numeric-ids
--no-implied-dirs --existing --xattrs --acls . -e ssh
-oPasswordAuthentication=no -oStrictHostKeyChecking=no -i
/var/lib/glusterd/geo-replication/secret.pem -p 22
-oControlMaster=auto -S
/tmp/gsyncd-aux-ssh-DBB9pL/78cf8b204207154de59d7ac32eee737f.sock
--compress geo-rep-user@gfs6:/proc/17837/cwd error=12
[2017-09-29 15:53:53.635057] I
[syncdutils(/gfs/brick1/gv0):271:finalize] <top>:
exiting.
[2017-09-29 15:53:53.639909] I
[repce(/gfs/brick1/gv0):92:service_loop] RepceServer:
terminating on reaching EOF.
[2017-09-29 15:53:53.640172] I
[syncdutils(/gfs/brick1/gv0):271:finalize] <top>:
exiting.
[2017-09-29 15:53:54.85591] I
[monitor(monitor):363:monitor] Monitor: worker died in
startup phase brick=/gfs/brick1/gv0
[2017-09-29 15:53:54.89509] I
[gsyncdstatus(monitor):242:set_worker_status] GeorepStatus:
Worker Status Change status=Faulty
rsync -aR0 --inplace --files-from=- --super --stats
--numeric-ids --no-implied-dirs --existing --xattrs --acls .
-e ssh -oPasswordAuthentication=no -oStrictHostKeyChecking=no
-i /var/lib/glusterd/geo-replication/secret.pem -p 22
-oControlMaster=auto -S
/tmp/gsyncd-aux-ssh-DBB9pL/78cf8b204207154de59d7ac32eee737f.sock
--compress geo-rep-user@gfs6:/proc/17837/cwd
especially the ssh part since I notice a lot of failed log
in attempts when geo replication is running