Hi,
Good day!
I see your change
I292f1e58d6971e111725e1baea1fe98b890b43e2 in glusterfs code.
However from my test glusterfs 3.12.3 (already contain that modification) still has something wrong,
gluster volume status mstate
Status of volume: mstate
Gluster process TCP Port RDMA Port Online Pid
------------------------------------------------------------------------------
Brick sn-0.local:/mnt/bricks/mstate/brick
49157 0 Y 1155
Brick sn-1.local:/mnt/bricks/mstate/brick 49154 0 Y 1201
Self-heal Daemon on localhost N/A N/A Y 1188
Self-heal Daemon on sn-2.local N/A N/A Y 27109
Self-heal Daemon on sn-1.local N/A N/A Y 4836
Task Status of Volume mstate
------------------------------------------------------------------------------
There are no active volume tasks
But ps shows
ps aux | grep gluster
root 380 0.0 0.6 377892 13760 ? Ss 20:07 0:00 python /usr/sbin/glustereventsd --pid-file /var/run/glustereventsd.pid
root 409 0.0 0.4 82964 9148 ? S 20:07 0:00 python /usr/sbin/glustereventsd --pid-file /var/run/glustereventsd.pid
root 811 0.5 1.0 528720 21636 ? Ssl 20:07 0:01 /usr/sbin/glusterd --volfile=/opt/nokia/libexec/StorageUtils/etc/glusterd/glusterd.vol -p /run/glusterd.pid
root 1088 0.1 0.6 1036648 13996 ? Ssl 20:07 0:00 /usr/sbin/glusterfsd -s sn-0.local --volfile-id export.sn-0.local.mnt-bricks-export-brick -p /var/run/gluster/vols/export/sn-0.local-mnt-bricks-export-brick.pid
-S /var/run/gluster/44f098f14fe45b74b61c5a1d98993d15.socket --brick-name /mnt/bricks/export/brick -l /var/log/glusterfs/bricks/mnt-bricks-export-brick.log --xlator-option *-posix.glusterd-uuid=f830d809-c935-451f-853a-1364f76a4160 --brick-port 49152 --xlator-option
export-server.listen-port=49152 --xlator-option transport.socket.bind-address=sn-0.local
root 1153 1.8 0.7 1234036 14664 ? Ssl 20:07 0:06 /usr/sbin/glusterfsd -s sn-0.local --volfile-id log.sn-0.local.mnt-bricks-log-brick -p /var/run/gluster/vols/log/sn-0.local-mnt-bricks-log-brick.pid
-S /var/run/gluster/0af76d9541921cccad34e5fd3041a44d.socket --brick-name /mnt/bricks/log/brick -l /var/log/glusterfs/bricks/mnt-bricks-log-brick.log --xlator-option *-posix.glusterd-uuid=f830d809-c935-451f-853a-1364f76a4160 --brick-port 49156 --xlator-option
log-server.listen-port=49156 --xlator-option transport.socket.bind-address=sn-0.local
root 1155 0.0 0.5 772996 10580 ? Ssl 20:07 0:00 /usr/sbin/glusterfsd -s sn-0.local --volfile-id mstate.sn-0.local.mnt-bricks-mstate-brick -p /var/run/gluster/vols/mstate/sn-0.local-mnt-bricks-mstate-brick.pid
-S /var/run/gluster/79d00394b1085adcf7dc1fd4c57f71bc.socket --brick-name /mnt/bricks/mstate/brick -l /var/log/glusterfs/bricks/mnt-bricks-mstate-brick.log --xlator-option *-posix.glusterd-uuid=f830d809-c935-451f-853a-1364f76a4160 --brick-port 49154 --xlator-option
mstate-server.listen-port=49154 --xlator-option transport.socket.bind-address=sn-0.local
root 1167 0.4 0.7 1235512 15488 ? Ssl 20:07 0:01 /usr/sbin/glusterfsd -s sn-0.local --volfile-id services.sn-0.local.mnt-bricks-services-brick -p /var/run/gluster/vols/services/sn-0.local-mnt-bricks-services-brick.pid
-S /var/run/gluster/fc16d9fe6fd0695555a5592343da7ba3.socket --brick-name /mnt/bricks/services/brick -l /var/log/glusterfs/bricks/mnt-bricks-services-brick.log --xlator-option *-posix.glusterd-uuid=f830d809-c935-451f-853a-1364f76a4160 --brick-port 49155 --xlator-option
services-server.listen-port=49155 --xlator-option transport.socket.bind-address=sn-0.local
root 1188 0.0 0.5 644916 11896 ? Ssl 20:07 0:00 /usr/sbin/glusterfs -s sn-0.local --volfile-id gluster/glustershd -p /var/run/gluster/glustershd/glustershd.pid -l
/var/log/glusterfs/glustershd.log -S /var/run/gluster/c405d28a3a79d88436a826d75cf7153a.socket --xlator-option *replicate*.node-uuid=f830d809-c935-451f-853a-1364f76a4160
root 2324 0.0 0.6 629784 13932 ? Ssl 20:07 0:00 /usr/sbin/glusterfs --acl --volfile-server=sn-0.local --volfile-server=sn-1.local --volfile-id=/services /mnt/services
root 2422 0.0 0.5 431036 10604 ? Ssl 20:07 0:00 /usr/sbin/glusterfs --acl --volfile-server=sn-0.local --volfile-server=sn-1.local --volfile-id=/export /mnt/export
root 2520 0.0 0.5 498712 11024 ? Ssl 20:07 0:00 /usr/sbin/glusterfs --acl --volfile-server=sn-0.local --volfile-server=sn-1.local --volfile-id=/log /mnt/log
root 2618 0.0 0.5 431036 10476 ? Ssl 20:07 0:00 /usr/sbin/glusterfs --acl --volfile-server=sn-0.local --volfile-server=sn-1.local --volfile-id=/mstate /mnt/mstate
root 8136 0.0 0.0 8224 784 pts/0 S+ 20:13 0:00 grep gluster
[root@sn-0:/root]
So glustershd can not connect with mstate brick on sn-0:
gluster volume heal mstate info
Brick sn-0.local:/mnt/bricks/mstate/brick
Status: Transport endpoint is not connected
Number of entries: -
Brick sn-1.local:/mnt/bricks/mstate/brick
Status: Connected
Number of entries: 0
From sn-0 glusterd log I find:
[2017-12-09 18:07:34.366851] I [glusterd-utils.c:5926:glusterd_brick_start] 0-management: starting a fresh brick process for brick /mnt/bricks/export/brick
[2017-12-09 18:07:34.368484] I [rpc-clnt.c:1044:rpc_clnt_connection_init] 0-management: setting frame-timeout to 600
[2017-12-09 18:07:34.406488] I [glusterd-utils.c:5926:glusterd_brick_start] 0-management: starting a fresh brick process for brick /mnt/bricks/log/brick
[2017-12-09 18:07:34.410384] I [rpc-clnt.c:1044:rpc_clnt_connection_init] 0-management: setting frame-timeout to 600
[2017-12-09 18:07:34.410530] I [glusterd-utils.c:5926:glusterd_brick_start] 0-management: starting a fresh brick process for brick /mnt/bricks/mstate/brick
[2017-12-09 18:07:34.413819] I [rpc-clnt.c:1044:rpc_clnt_connection_init] 0-management: setting frame-timeout to 600
[2017-12-09 18:07:34.413926] C [MSGID: 106003] [glusterd-server-quorum.c:352:glusterd_do_volume_quorum_action] 0-management: Server quorum regained for volume services. Starting local
bricks.
[2017-12-09 18:07:34.414031] I [glusterd-utils.c:5926:glusterd_brick_start] 0-management: starting a fresh brick process for brick /mnt/bricks/services/brick
[2017-12-09 18:07:34.417573] I [rpc-clnt.c:1044:rpc_clnt_connection_init] 0-management: setting frame-timeout to 600
[2017-12-09 18:07:34.462163] I [MSGID: 106492] [glusterd-handler.c:2718:__glusterd_handle_friend_update] 0-glusterd: Received friend update from uuid: e4ea0550-873b-410c-a29e-3901c4737cd8
[2017-12-09 18:07:34.470228] I [MSGID: 106502] [glusterd-handler.c:2763:__glusterd_handle_friend_update] 0-management: Received my uuid as Friend
[2017-12-09 18:07:34.470432] I [MSGID: 106492] [glusterd-handler.c:2718:__glusterd_handle_friend_update] 0-glusterd: Received friend update from uuid: feddd74c-c768-4157-b152-31d5dd488dcc
[2017-12-09 18:07:34.470493] I [MSGID: 106502] [glusterd-handler.c:2763:__glusterd_handle_friend_update] 0-management: Received my uuid as Friend
[2017-12-09 18:07:34.479540] I [MSGID: 106163] [glusterd-handshake.c:1316:__glusterd_mgmt_hndsk_versions_ack] 0-management: using the op-version 31202
[2017-12-09 18:07:34.488767] I [socket.c:2478:socket_event_handler] 0-transport: EPOLLERR - disconnecting now
[2017-12-09 18:07:34.489020] I [MSGID: 106005] [glusterd-handler.c:6071:__glusterd_brick_rpc_notify] 0-management: Brick sn-0.local:/mnt/bricks/export/brick has disconnected from glusterd.
[2017-12-09 18:07:34.489450] I [socket.c:2478:socket_event_handler] 0-transport: EPOLLERR - disconnecting now
[2017-12-09 18:07:34.489717] I [MSGID: 106005] [glusterd-handler.c:6071:__glusterd_brick_rpc_notify] 0-management: Brick sn-0.local:/mnt/bricks/log/brick has disconnected from glusterd.
[2017-12-09 18:07:34.490114] I [socket.c:2478:socket_event_handler] 0-transport: EPOLLERR - disconnecting now
[2017-12-09 18:07:34.490398] I [MSGID: 106005] [glusterd-handler.c:6071:__glusterd_brick_rpc_notify] 0-management: Brick sn-0.local:/mnt/bricks/mstate/brick has disconnected from glusterd.
[2017-12-09 18:07:34.490785] I [socket.c:2478:socket_event_handler] 0-transport: EPOLLERR - disconnecting now
[2017-12-09 18:07:34.491049] I [MSGID: 106005] [glusterd-handler.c:6071:__glusterd_brick_rpc_notify] 0-management: Brick sn-0.local:/mnt/bricks/services/brick has disconnected from
glusterd.
[2017-12-09 18:07:34.491514] I [MSGID: 106493] [glusterd-rpc-ops.c:486:__glusterd_friend_add_cbk] 0-glusterd: Received ACC from uuid: feddd74c-c768-4157-b152-31d5dd488dcc, host: sn-1.local,
port: 0
[2017-12-09 18:07:34.501902] I [glusterd-utils.c:5926:glusterd_brick_start] 0-management: starting a fresh brick process for brick /mnt/bricks/log/brick
[2017-12-09 18:07:34.503376] I [rpc-clnt.c:1044:rpc_clnt_connection_init] 0-management: setting frame-timeout to 600
[2017-12-09 18:07:34.503502] I [glusterd-utils.c:5926:glusterd_brick_start] 0-management: starting a fresh brick process for brick /mnt/bricks/mstate/brick
[2017-12-09 18:07:34.506655] I [rpc-clnt.c:1044:rpc_clnt_connection_init] 0-management: setting frame-timeout to 600
[2017-12-09 18:07:34.507887] I [socket.c:2478:socket_event_handler] 0-transport: EPOLLERR - disconnecting now
[2017-12-09 18:07:34.508188] I [MSGID: 106005] [glusterd-handler.c:6071:__glusterd_brick_rpc_notify] 0-management: Brick sn-0.local:/mnt/bricks/log/brick has disconnected from glusterd.
[2017-12-09 18:07:34.509479] I [socket.c:2478:socket_event_handler] 0-transport: EPOLLERR - disconnecting now
……
[2017-12-09 18:07:36.023087] I [glusterd-utils.c:5834:glusterd_brick_start] 0-management: discovered already-running brick /mnt/bricks/export/brick
[2017-12-09 18:07:36.023106] I [MSGID: 106143] [glusterd-pmap.c:280:pmap_registry_bind] 0-pmap: adding brick /mnt/bricks/export/brick on port 49152
[2017-12-09 18:07:36.060419] I [glusterd-utils.c:5834:glusterd_brick_start] 0-management: discovered already-running brick /mnt/bricks/log/brick
[2017-12-09 18:07:36.060442] I [MSGID: 106143] [glusterd-pmap.c:280:pmap_registry_bind] 0-pmap: adding brick /mnt/bricks/log/brick on port 49156
[2017-12-09 18:07:36.094987] I [glusterd-utils.c:5834:glusterd_brick_start] 0-management: discovered already-running brick /mnt/bricks/mstate/brick
[2017-12-09 18:07:36.095008] I [MSGID: 106143] [glusterd-pmap.c:280:pmap_registry_bind] 0-pmap: adding brick /mnt/bricks/mstate/brick on port
49157
[2017-12-09 18:07:36.129059] I [glusterd-utils.c:5834:glusterd_brick_start] 0-management: discovered already-running brick /mnt/bricks/services/brick
[2017-12-09 18:07:36.129108] I [MSGID: 106143] [glusterd-pmap.c:280:pmap_registry_bind] 0-pmap: adding brick /mnt/bricks/services/brick on port 49155
[2017-12-09 18:07:36.159408] I [rpc-clnt.c:1044:rpc_clnt_connection_init] 0-snapd: setting frame-timeout to 600
……
[2017-12-09 18:07:36.919192] I [MSGID: 106493] [glusterd-rpc-ops.c:701:__glusterd_friend_update_cbk] 0-management: Received ACC from uuid: e4ea0550-873b-410c-a29e-3901c4737cd8
[2017-12-09 18:07:36.920269] I [MSGID: 106492] [glusterd-handler.c:2718:__glusterd_handle_friend_update] 0-glusterd: Received friend update from uuid: e4ea0550-873b-410c-a29e-3901c4737cd8
[2017-12-09 18:07:36.927413] I [MSGID: 106502] [glusterd-handler.c:2763:__glusterd_handle_friend_update] 0-management: Received my uuid as Friend
[2017-12-09 18:07:36.949762] I [MSGID: 106493] [glusterd-rpc-ops.c:701:__glusterd_friend_update_cbk] 0-management: Received ACC from uuid: feddd74c-c768-4157-b152-31d5dd488dcc
[2017-12-09 18:07:36.949817] I [MSGID: 106492] [glusterd-handler.c:2718:__glusterd_handle_friend_update] 0-glusterd: Received friend update from uuid: feddd74c-c768-4157-b152-31d5dd488dcc
[2017-12-09 18:07:36.949882] I [MSGID: 106502] [glusterd-handler.c:2763:__glusterd_handle_friend_update] 0-management: Received my uuid as Friend
[2017-12-09 18:07:37.768326] W [MSGID: 101095] [xlator.c:213:xlator_dynload] 0-xlator: /usr/lib64/glusterfs/3.12.3/xlator/nfs/server.so: cannot open shared object file: No such file
or directory
[2017-12-09 18:07:37.783811] I [MSGID: 106143] [glusterd-pmap.c:280:pmap_registry_bind] 0-pmap: adding brick /mnt/bricks/services/brick on port 49155
[2017-12-09 18:07:37.783901] I [MSGID: 106143] [glusterd-pmap.c:280:pmap_registry_bind] 0-pmap: adding brick /mnt/bricks/mstate/brick on port
49154
[2017-12-09 18:07:37.783966] I [MSGID: 106143] [glusterd-pmap.c:280:pmap_registry_bind] 0-pmap: adding brick /mnt/bricks/log/brick on port 49156
[2017-12-09 18:07:37.784032] I [MSGID: 106143] [glusterd-pmap.c:280:pmap_registry_bind] 0-pmap: adding brick /mnt/bricks/export/brick on port 49152
[2017-12-09 18:07:37.810305] W [MSGID: 101095] [xlator.c:162:xlator_volopt_dynload] 0-xlator: /usr/lib64/glusterfs/3.12.3/xlator/nfs/server.so: cannot open shared object file: No such
file or directory
And the mstate brick process on sn-0 shows:
[2017-12-09 18:07:02.743046] W [glusterfsd.c:1375:cleanup_and_exit] (-->/lib64/libpthread.so.0(+0x7454) [0x7f5df4db6454] -->/usr/sbin/glusterfsd(glusterfs_sigwaiter+0xdc) [0x40a50c]
-->/usr/sbin/glusterfsd(cleanup_and_exit+0x77) [0x4089ec] ) 0-: received signum (15), shutting down
[2017-12-09 18:07:34.416169] I [MSGID: 100030] [glusterfsd.c:2524:main] 0-/usr/sbin/glusterfsd: Started running /usr/sbin/glusterfsd version 3.12.3 (args: /usr/sbin/glusterfsd -s sn-0.local
--volfile-id mstate.sn-0.local.mnt-bricks-mstate-brick -p /var/run/gluster/vols/mstate/sn-0.local-mnt-bricks-mstate-brick.pid -S /var/run/gluster/79d00394b1085adcf7dc1fd4c57f71bc.socket --brick-name /mnt/bricks/mstate/brick -l /var/log/glusterfs/bricks/mnt-bricks-mstate-brick.log
--xlator-option *-posix.glusterd-uuid=f830d809-c935-451f-853a-1364f76a4160 --brick-port
49154 --xlator-option mstate-server.listen-port=49154 --xlator-option transport.socket.bind-address=sn-0.local)
[2017-12-09 18:07:34.511598] I [MSGID: 100030] [glusterfsd.c:2524:main] 0-/usr/sbin/glusterfsd: Started running /usr/sbin/glusterfsd version 3.12.3 (args: /usr/sbin/glusterfsd -s sn-0.local
--volfile-id mstate.sn-0.local.mnt-bricks-mstate-brick -p /var/run/gluster/vols/mstate/sn-0.local-mnt-bricks-mstate-brick.pid -S /var/run/gluster/79d00394b1085adcf7dc1fd4c57f71bc.socket --brick-name /mnt/bricks/mstate/brick -l /var/log/glusterfs/bricks/mnt-bricks-mstate-brick.log
--xlator-option *-posix.glusterd-uuid=f830d809-c935-451f-853a-1364f76a4160 --brick-port
49157 --xlator-option mstate-server.listen-port=49157 --xlator-option transport.socket.bind-address=sn-0.local)
[2017-12-09 18:07:34.542268] E [MSGID: 100018] [glusterfsd.c:2109:glusterfs_pidfile_update] 0-glusterfsd: pidfile /var/run/gluster/vols/mstate/sn-0.local-mnt-bricks-mstate-brick.pid
lock failed [Resource temporarily unavailable]
[2017-12-09 18:07:34.549242] W [MSGID: 101002] [options.c:995:xl_opt_validate] 0-glusterfs: option 'address-family' is deprecated, preferred is 'transport.address-family', continuing
with correction
[2017-12-09 18:07:34.550119] I [MSGID: 101190] [event-epoll.c:613:event_dispatch_epoll_worker] 0-epoll: Started thread with index 1
[2017-12-09 18:07:37.690037] I [rpcsvc.c:2322:rpcsvc_set_outstanding_rpc_limit] 0-rpc-service: Configured rpc.outstanding-rpc-limit with value 64
[2017-12-09 18:07:37.690125] W [MSGID: 101002] [options.c:995:xl_opt_validate] 0-mstate-server: option 'listen-port' is deprecated, preferred is 'transport.socket.listen-port', continuing
with correction
……
Final graph:
+------------------------------------------------------------------------------+
1: volume mstate-posix
2: type storage/posix
3: option glusterd-uuid f830d809-c935-451f-853a-1364f76a4160
4: option directory /mnt/bricks/mstate/brick
5: option volume-id 5fc29728-b5de-4b5f-8f3c-9654acda2577
6: option shared-brick-count 1
7: end-volume
8:
9: volume mstate-trash
10: type features/trash
11: option trash-dir .trashcan
12: option brick-path /mnt/bricks/mstate/brick
13: option trash-internal-op off
14: subvolumes mstate-posix
15: end-volume
16:
17: volume mstate-changetimerecorder
18: type features/changetimerecorder
19: option db-type sqlite3
20: option hot-brick off
21: option db-name brick.db
22: option db-path /mnt/bricks/mstate/brick/.glusterfs/
23: option record-exit off
24: option ctr_link_consistency off
25: option ctr_lookupheal_link_timeout 300
26: option ctr_lookupheal_inode_timeout 300
27: option record-entry on
28: option ctr-enabled off
29: option record-counters off
30: option ctr-record-metadata-heat off
31: option sql-db-cachesize 12500
32: option sql-db-wal-autocheckpoint 25000
33: subvolumes mstate-trash
34: end-volume
35:
36: volume mstate-changelog
37: type features/changelog
38: option changelog-brick /mnt/bricks/mstate/brick
39: option changelog-dir /mnt/bricks/mstate/brick/.glusterfs/changelogs
40: option changelog-barrier-timeout 120
41: subvolumes mstate-changetimerecorder
42: end-volume
43:
44: volume mstate-bitrot-stub
45: type features/bitrot-stub
46: option export /mnt/bricks/mstate/brick
47: option bitrot disable
48: subvolumes mstate-changelog
49: end-volume
50:
51: volume mstate-access-control
52: type features/access-control
53: subvolumes mstate-bitrot-stub
54: end-volume
55:
56: volume mstate-locks
57: type features/locks
58: subvolumes mstate-access-control
59: end-volume
60:
61: volume mstate-worm
62: type features/worm
63: option worm off
64: option worm-file-level off
65: subvolumes mstate-locks
66: end-volume
67:
68: volume mstate-read-only
69: type features/read-only
70: option read-only off
71: subvolumes mstate-worm
72: end-volume
73:
74: volume mstate-leases
75: type features/leases
76: option leases off
77: subvolumes mstate-read-only
78: end-volume
79:
80: volume mstate-upcall
81: type features/upcall
82: option cache-invalidation off
83: subvolumes mstate-leases
84: end-volume
85:
86: volume mstate-io-threads
87: type performance/io-threads
88: subvolumes mstate-upcall
89: end-volume
90:
91: volume mstate-selinux
92: type features/selinux
93: option selinux on
94: subvolumes mstate-io-threads
95: end-volume<