Re: server-side-replication work correct!?

Raghavendra G <raghavendra@xxxxxxxxxxx> · Mon, 21 Dec 2009 23:46:35 +0400

Hi Roland,

Please find the Comments inlined

On Fri, Dec 18, 2009 at 3:16 PM, Roland Fischer <roland.fischer@xxxxxxxxxx> wrote:

hi all,

glusterfsversion 3.0.0

replication: server-side-replication

we have 2 glusterfs server with glusterfs v 3.0.0 and one clienthost with glusterfs v3.0.0. Now we have one partition which contains office-files and short videos.

We simulate a serverfailture on gfs-01-01 (rcnetwork stop; sleep 300; rcnetwork start). After rcnetwork stop on gfs-01-01 we see at the client host at mountpoints

/GFS/office-data02': Transport endpoint is not connected   => we cant do anything on this partition. If gfs-01-01 comes back online the partition is accessable. same situation at gfs-01-02

but here if we make a df the clienthost does nothing. if gfs-01-02 is back online the partition is again accessable.

clientlog. (rcnetwork down on gfs-01-01)

[2009-12-18 12:06:51] W [fuse-bridge.c:722:fuse_attr_cbk] glusterfs-fuse: 41: STAT() /funny => -1 (Transport endpoint is not connected)

[2009-12-18 12:06:55] W [fuse-bridge.c:722:fuse_attr_cbk] glusterfs-fuse: 42: STAT() /funny => -1 (Transport endpoint is not connected)

[2009-12-18 12:06:55] W [fuse-bridge.c:2342:fuse_statfs_cbk] glusterfs-fuse: 43: ERR => -1 (Transport endpoint is not connected)

clientlog: (rcnetwork down on gfs-01-02)

[2009-12-18 12:11:48] E [client-protocol.c:415:client_ping_timer_expired] office-data02-rr: Server 192.168.11.82:7000 has not responded in the last 42 seconds, disconnecting.

[2009-12-18 12:11:48] E [saved-frames.c:165:saved_frames_unwind] office-data02-rr: forced unwinding frame type(1) op(STAT)

[2009-12-18 12:11:48] W [fuse-bridge.c:722:fuse_attr_cbk] glusterfs-fuse: 58: STAT() /funny => -1 (Transport endpoint is not connected)

[2009-12-18 12:11:48] E [saved-frames.c:165:saved_frames_unwind] office-data02-rr: forced unwinding frame type(1) op(STATFS)

[2009-12-18 12:11:48] W [fuse-bridge.c:2342:fuse_statfs_cbk] glusterfs-fuse: 59: ERR => -1 (Transport endpoint is not connected)

[2009-12-18 12:11:48] E [saved-frames.c:165:saved_frames_unwind] office-data02-rr: forced unwinding frame type(2) op(PING)

[2009-12-18 12:11:48] N [client-protocol.c:6972:notify] office-data02-rr: disconnected

[2009-12-18 12:11:48] N [client-protocol.c:6224:client_setvolume_cbk] office-data02-rr: Connected to 192.168.11.11:7000, attached to remote volume 'office-data02'.

Did we do anything wrong? Are the volfile wrong? This couldnt be a normal behavior?

gfs-01-01: /etc/glusterfs/export-office-data02-server_repl_gfs-01-01.vol

# export-web-data-server_repl

# gfs-01-01 /GFS/web-data

# gfs-01-02 /GFS/web-data

volume posix

  type storage/posix

  option directory /GFS/office-data02

end-volume

volume locks

  type features/locks

  subvolumes posix

end-volume

volume posix-remote

  type protocol/client

  option transport-type tcp

  option ping-timeout 5

  option remote-host gfs-01-02

  option remote-port 7000

  option remote-subvolume locks

end-volume

volume gfs-replicate

    type cluster/replicate

    subvolumes posix-remote

subvolumes should be:
    subvolumes posix-remote posix

end-volume

volume writebehind

  type performance/write-behind

  option cache-size 2MB

  option flush-behind on

  subvolumes gfs-replicate

end-volume

volume office-data02

  type performance/io-threads

  option thread-count 32 # default is 16

  subvolumes writebehind

end-volume

volume quickread

    type performance/quick-read

    option cache-timeout 1

    option max-file-size 512kB

#    subvolumes web-data

    subvolumes office-data02

end-volume

volume server

  type protocol/server

  option transport-type tcp

  option transport.socket.listen-port 7000

  option auth.addr.office-data02.allow 192.xxxx.xxx.*

  option auth.addr.locks.allow 192.xxxx.xxx.*

  subvolumes office-data02 locks

end-volume

gfs-01-02: /etc/glusterfs/export-office-data02-server_repl_gfs-01-02.vol

# export-office-data02-server_repl

# gfs-01-01 /GFS/office-data02

# gfs-01-02 /GFS/office-data02

volume posix

  type storage/posix

  option directory /GFS/office-data02

end-volume

volume locks

  type features/locks

  subvolumes posix

end-volume

volume posix-remote

  type protocol/client

  option transport-type tcp

  option ping-timeout 5

  option remote-host gfs-01-01

  option remote-port 7000

  option remote-subvolume locks

end-volume

volume gfs-replicate

    type cluster/replicate

    subvolumes posix-remote

suvolumes should be:
      subvolumes posix posix-remote

Note that the first entry here is posix, not posix-remote. This order should be maintained as replicate uses the children as lock-servers in the same order as they are given for subvolumes option. Changing the order results in different replicates (server1 and server2) using different lock-servers thereby causing race conditions.

end-volume

volume writebehind

  type performance/write-behind

  option cache-size 2MB

  option flush-behind on

  subvolumes gfs-replicate

end-volume

volume office-data02

  type performance/io-threads

  option thread-count 32 # default is 16

  subvolumes writebehind

end-volume

volume quickread

    type performance/quick-read

    option cache-timeout 1

    option max-file-size 512kB

#    subvolumes web-data

    subvolumes office-data02

end-volume

volume server

  type protocol/server

  option transport-type tcp

  option transport.socket.listen-port 7000

  option auth.addr.office-data02.allow 192.xxx.xxx.*

  option auth.addr.locks.allow 192.xxx.xxx.*

  subvolumes office-data02 locks

end-volume

client volfile:

volume office-data02-rr

  type protocol/client

  option transport-type tcp

  option remote-host cluster-gfs.xxxx.xxxxx # RRDNS

  option remote-port 7000

  option remote-subvolume office-data02

end-volume

volume readahead

  type performance/read-ahead

  option page-count 16             # cache per file  = (page-count x page-size)

  option force-atime-update off # default is off

  subvolumes office-data02-rr

end-volume

volume writebehind

  type performance/write-behind

  option cache-size 512MB # default is equal to aggregate-size

  option flush-behind on # default is 'off'

  subvolumes readahead

end-volume

volume iocache

  type performance/io-cache

  option cache-size 64MB             # default is 32MB

#  option priority *.h:3,*.html:2,*:1 # default is '*:0'

  option cache-timeout 3             # default is 1 second

  subvolumes writebehind

end-volume

volume quickread

 type performance/quick-read

  option cache-timeout 1

  option max-file-size 512

  subvolumes iocache

end-volume

volume stat-prefetch

  type performance/stat-prefetch

  subvolumes quickread

end-volume

Roland Fischer

_______________________________________________

Gluster-devel mailing list

Gluster-devel@xxxxxxxxxx

http://lists.nongnu.org/mailman/listinfo/gluster-devel

regards,
-- 
Raghavendra G