Re: Re: afr :2 HA setup question

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hi Amar,

yes, I'm using afr and  do a unify over afr.

Here are my configs.


---snip---
Serverside: gluster3 example (configs from gluster1-2-4 are similar)

### file: server-volume.spec
# Namespace brick
volume brick
     type storage/posix                     # POSIX FS translator
     option directory /var/tmp         # Export this directory
end-volume

volume server
       type protocol/server
option transport-type tcp/server subvolumes brick
       option auth.ip.brick.allow *
end-volume

#
# local storage bricks
#

volume local-hdb1
type storage/posix option directory /export/hdb1 end-volume

volume local-sda1
type storage/posix option directory /export/sda1 end-volume

volume local-sdb1
type storage/posix option directory /export/sdb1 end-volume

volume local-lvm
type storage/posix option directory /export/lvm end-volume

#
# performance translators
#

volume hdb1
       type performance/io-threads
       option thread-count 8
       subvolumes local-hdb1
end-volume

volume sda1
       type performance/io-threads
       option thread-count 8
       subvolumes local-sda1
end-volume

volume sdb1
       type performance/io-threads
       option thread-count 8
       subvolumes local-sdb1
end-volume

volume lvm
       type performance/io-threads
       option thread-count 8
       subvolumes local-lvm
end-volume

volume server
       type protocol/server
       option transport-type tcp/server     # For TCP/IP transport
       option listen-port 6997                   # Default is 6996
        subvolumes hdb1 sda1 sdb1
       option auth.ip.hdb1.allow * # Allow access to "brick" volume
       option auth.ip.sda1.allow * # Allow access to "brick" volume
       option auth.ip.sdb1.allow * # Allow access to "brick" volume
       option auth.ip.lvm.allow * # Allow access to "brick" volume
end-volume
---snap---

---snip---
Clientside

#
# glusterfs Client Configuration
#

#
# Namespace Volume
#

volume brick
 type protocol/client
 option transport-type tcp/client     # for TCP/IP transport
 option remote-host gluster1     # IP address of the remote brick
 option remote-port 6996
 option remote-subvolume brick        # name of the remote volume
end-volume


# Remote Volumes from gluster1 - gluster4

#
# gluster1
#

volume gluster1-sdb1
 type protocol/client
 option transport-type tcp/client     # for TCP/IP transport
 option remote-host gluster1     # IP address of the remote brick
 option remote-port 6997
 option remote-subvolume sdb1        # name of the remote volume
end-volume

volume gluster1-sdc1
 type protocol/client
 option transport-type tcp/client     # for TCP/IP transport
 option remote-host gluster1     # IP address of the remote brick
 option remote-port 6997
 option remote-subvolume sdc1        # name of the remote volume
end-volume

volume gluster1-sdd1
 type protocol/client
 option transport-type tcp/client     # for TCP/IP transport
 option remote-host gluster1     # IP address of the remote brick
 option remote-port 6997
 option remote-subvolume sdd1        # name of the remote volume
end-volume

volume gluster1-sde1
 type protocol/client
 option transport-type tcp/client     # for TCP/IP transport
 option remote-host gluster1     # IP address of the remote brick
 option remote-port 6997
 option remote-subvolume sde1        # name of the remote volume
end-volume

volume gluster1-sdf1
 type protocol/client
 option transport-type tcp/client     # for TCP/IP transport
 option remote-host gluster1     # IP address of the remote brick
 option remote-port 6997
 option remote-subvolume sdf1        # name of the remote volume
end-volume

#
# gluster2
#

volume gluster2-hdb1
 type protocol/client
 option transport-type tcp/client     # for TCP/IP transport
 option remote-host gluster2     # IP address of the remote brick
 option remote-port 6997
 option remote-subvolume hdb1        # name of the remote volume
end-volume

volume gluster2-hdc1
 type protocol/client
 option transport-type tcp/client     # for TCP/IP transport
 option remote-host gluster2     # IP address of the remote brick
 option remote-port 6997
 option remote-subvolume hdc1        # name of the remote volume
end-volume

#
# gluster3
#

volume gluster3-hdb1
 type protocol/client
 option transport-type tcp/client     # for TCP/IP transport
 option remote-host gluster3     # IP address of the remote brick
 option remote-port 6997
 option remote-subvolume hdb1        # name of the remote volume
end-volume

volume gluster3-sda1
 type protocol/client
 option transport-type tcp/client     # for TCP/IP transport
 option remote-host gluster3     # IP address of the remote brick
 option remote-port 6997
 option remote-subvolume sda1        # name of the remote volume
end-volume

volume gluster3-sdb1
 type protocol/client
 option transport-type tcp/client     # for TCP/IP transport
 option remote-host gluster3     # IP address of the remote brick
 option remote-port 6997
 option remote-subvolume sdb1        # name of the remote volume
end-volume

volume gluster3-lvm
 type protocol/client
 option transport-type tcp/client     # for TCP/IP transport
 option remote-host gluster3     # IP address of the remote brick
 option remote-port 6997
 option remote-subvolume lvm        # name of the remote volume
end-volume


#
# gluster4
#

volume gluster4-hdc1
 type protocol/client
 option transport-type tcp/client     # for TCP/IP transport
 option remote-host gluster4     # IP address of the remote brick
 option remote-port 6997
 option remote-subvolume hdc1        # name of the remote volume
end-volume

volume gluster4-hdb1
 type protocol/client
 option transport-type tcp/client     # for TCP/IP transport
 option remote-host gluster4     # IP address of the remote brick
 option remote-port 6997
 option remote-subvolume hdb1        # name of the remote volume
end-volume

volume gluster4-sda1
 type protocol/client
 option transport-type tcp/client     # for TCP/IP transport
 option remote-host gluster4     # IP address of the remote brick
 option remote-port 6997
 option remote-subvolume sda1        # name of the remote volume
end-volume


#
# Replication Transplaters
# AFR (Automatic File Replication )
#

volume afr1
 type cluster/afr
 subvolumes gluster3-hdb1 gluster4-hdc1
 option replicate *:2
end-volume

volume afr2
 type cluster/afr
 subvolumes gluster2-hdc1 gluster3-lvm
 option replicate *:2
end-volume

volume afr3
 type cluster/afr
 subvolumes gluster1-sde1 gluster1-sdf1
 option replicate *:2
end-volume

volume afr4
 type cluster/afr
 subvolumes gluster3-sda1 gluster3-sdb1
 option replicate *:2
end-volume

#
# Unify all gluster servers together to ONE share
#

volume cluster
 type cluster/unify
subvolumes afr1 afr2 afr3 afr4 gluster2-hdb1 gluster4-hdb1 gluster4-sda1 gluster4-hdc1
 option scheduler alu   # use the ALU scheduler
option alu.limits.min-free-disk 6GB # Don't create files one a volume with less than 6GB free diskspace option alu.limits.max-open-files 10000 # Don't create files on a volume with more than 10000 files open
 option namespace brick
option alu.order disk-usage:read-usage:write-usage:open-files-usage:disk-speed-usage option alu.disk-usage.entry-threshold 100GB # Kick in if the discrepancy in disk-usage between volumes is 2GB option alu.disk-usage.exit-threshold 60MB # Don't stop until you've written at least 60MB to the least-used volume option alu.open-files-usage.entry-threshold 1024 # Kick in if the discrepancy in open files is 1024 option alu.open-files-usage.exit-threshold 32 # Don't stop until you've written at least 32 files to the least-used volume option alu.stat-refresh.interval 10sec # Refresh the statistics used for decision-making every 10 seconds

end-volume

#
# Performance Translator
# writebehind improves write performance a lot
#

volume writebehind
 type performance/write-behind
 option aggregate-size 131072 # unit in bytes
 option flush-behind off
 subvolumes cluster
end-volume

#
# Add readahead feature
#

volume readahead
 type performance/read-ahead
 option page-size 1MB     # unit in bytes
 option page-count 2       # cache per file  = (page-count x page-size)
 subvolumes writebehind
end-volume


volume io-perf
 type performance/io-cache
 option page-size 128KB
 option page-count 128
 subvolumes readahead
end-volume

---snap---

gluster4 had the hardware problems ... ups.... oh :-). I see that my subvolumes on client side has not only the afr volumes included but also single volumes from gluster4. That means, if gluster4 isn't responding (e.g. has no network connection, or whatever), and any client is writing e.g. to gluster4-sda1 or gluster4-hdc1 as you can see in my example -> it is absolutly normal, that my client will hang, because it has nothing to do with afr1, afr2 or afr3...

So in my case it is a configuration problem or a error in reasoning.

Or am I wrong?

Regards,

 Matthias





Amar S. Tumballi schrieb:
Hi Matthias,
Can I have a look at your spec file? Btw, are you using AFR? unify? or unify over afr? because if the node which went down had unify's namespace and if it was not afr'd, then there is a high chance it can happen.

Anyways, posting your spec files may help us to solve your problem.

Regards,
Amar

On 9/11/07, *Matthias Albert* < gluster@xxxxxxxxxxxxxxxx <mailto:gluster@xxxxxxxxxxxxxxxx>> wrote:

    Hi August,

    I can confirm your problem with your setup. I' ve a 4 Server
    glusterfsd
    setup also with  1.3.1 running and some glusterfs clients with
    fuse glfs3.

    One of these 4 servers had a hardware failure and was no longer
    reachable -> so the side effect was, that all of my glusterfs Clients
    couldn't write anything in the mounted glusterfs share. I've build
    a new
    test Machine changed the old one with this new machine. Probably this
    week, I have more time for playing and testing with glusterfs
    (also with
    some performance translators).

    I will test the "option transport-timeout X" and will see what
    happen if
    I take one of them of the net.

    Regards,

       Matthias

    August R. Wohlt schrieb:
    > Hi all -
    >
    > After combing through the archives, I found the transport-timeout
    > option mentioned by avati. Is this described in the wiki docs
    > anywhere? I thought I had read through every page, but don't recall
    > seeing it. The e-mail from avati mentioned that it was described in
    > "doc/translator-options.txt" but this file does not appear in my
    > glusterfs-1.3.1 tarball.
    >
    > In any case, for those who have similar issues, making transport
    > timeout much smaller is your friend :-)
    >
    > Many Thanks!!
    > :august
    >
    > On 9/10/07, August R. Wohlt <glusterfs@xxxxxxxxxxx
    <mailto:glusterfs@xxxxxxxxxxx>> wrote:
    >
    >> Hi devs et al,
    >>
    >> After many hours of sublimation, I was able to condense my
    previous hanging
    >> issue down to this simplest case.
    >>
    >> To summarize: I have two physical machines, each afr'ing a
    directory to the
    >> other. both are glusterfs(d) 1.3.1 with glfs3 fuse. iptables is
    suspended
    >> during these tests. Spec files are below.
    >>
    >> The four situations:
    >>
    >> 1) If I start up both machines and start up glusterfsd on both
    machines, I
    >> can mount either one from the other and view its files as expected.
    >>
    >> 2) If I start up only one machine and glusterfsd, I can mount that
    >> glusterfsd brick from the same machine and use it (ie edit the
    files) while
    >> it tries to connect to the 2nd machine in the background. When
    I bring up
    >> the 2nd machine, it connects and afrs as expected. Compare this
    to #4).
    >>
    >> 3) If I start up both machines and glusterfsd on both, mount
    each others'
    >> bricks, verify I can see the files and then kill glusterfsd on
    one of them,
    >> I can still use and view files on the other one while it tries
    to reconnect
    >> in the background to the glusterfsd that was killed. When it
    comes back up
    >> everything continues as expected.
    >>
    >> 4) But, if I startup both machines with glusterfsd on both,
    mount either
    >> brick and view the files and then bring down the other machine
    (ie not kill
    >> glusterfsd, but bring down the whole machine suddenly, or pull
    the ethernet
    >> cable) , I can no longer see any files on the remaining
    machine. It just
    >> hangs until the machine that is down comes back up and then it
    continues on
    >> its merry way.
    >>
    >> This is presumably not the expected behavior since it is not
    the behavior in
    >> 2) and 3). It is only after the machines have both started up
    and then one
    >> of them goes away that I see this problem. Obviously, however
    this is the
    >> very situation that calls for an HA setup in the real world.
    When one server
    >> goes offline suddenly, you want to be able to keep on using the
    first.
    >>
    >> Here is the simplest spec file configuration that exhibits this
    problem:
    >>
    >> Simple server configuration:
    >>
    >> volume brick-ds
    >>     type storage/posix
    >>     option directory /.brick-ds
    >> end-volume
    >>
    >>  volume brick-ds-afr
    >>     type storage/posix
    >>     option directory /.brick-ds-afr
    >> end-volume
    >>
    >> volume server
    >>     type protocol/server
    >>     option transport-type tcp/server
    >>     option bind-address 192.168.16.128 <http://192.168.16.128>
    # 192.168.16.1 <http://192.168.16.1> on the other server
    >>     subvolumes brick-ds brick-ds-afr
    >>     option auth.ip.brick-ds.allow 192.168.16.*
    >>     option auth.ip.brick-ds-afr.allow 192.168.16.*
    >> end-volume
    >>
    >>
    >> Client Configuration :
    >>
    >>    volume brick-ds-local
    >>      type protocol/client
    >>      option transport-type tcp/client
    >>      option remote-host 192.168.16.128 <http://192.168.16.128>
    # 192.168.16.1 <http://192.168.16.1> on the other machine
    >>      option remote-subvolume brick-ds
    >>    end-volume
    >>
    >>    volume brick-ds-remote
    >>       type protocol/client
    >>       option transport-type tcp/client
    >>       option remote-host 192.168.16.1 <http://192.168.16.1> #
    192.168.16.128 <http://192.168.16.128> on the other machine
    >>       option remote-subvolume brick-ds-afr
    >>     end-volume
    >>
    >>      volume brick-ds-afr
    >>       type cluster/afr
    >>       subvolumes brick-ds-local brick-ds-remote
    >>       option replicate *:2
    >>     end-volume
    >>
    >> These are both stock CentOS/RHEL 5 machines. You can
    demonstrate the
    >> behavior by rebooting one machine, pulling out the ethernet
    cable, or
    >> sending the route out into space (ie route add -host
    192.168.16.1 <http://192.168.16.1>
    >> some_disconnected_device). Everything will be frozen until the
    connection
    >> returns and then when it comes back up, things keep working
    again after
    >> that.
    >>
    >> Because of this problem, any kind of  HA / unify setup will not
    work for me
    >> when one of the nodes fails.
    >>
    >> Can someone else verify this behavior? If there is some part of
    the logs /
    >> strace / gdb output you'd like to see , just let me know. I'd
    really like to
    >> use glusterfs in an HA setup, but don't see how with this behavior.
    >>
    >> Thanks in advance!!
    >> :august
    >>
    >>
    >> On 9/7/07, August R. Wohlt < glusterfs@xxxxxxxxxxx
    <mailto:glusterfs@xxxxxxxxxxx>> wrote:
    >>
    >>> Hi all -
    >>>
    >>> I have a setup based on this :
    >>>
    >>>
    >>  http://www.gluster.org/docs/index.php/GlusterFS_High_Availability_Storage_with_GlusterFS
    <http://www.gluster.org/docs/index.php/GlusterFS_High_Availability_Storage_with_GlusterFS>
    >>
    >>> but with only 2 machines. Effectively just a mirror (glusterfsd
    >>>
    >> configuration below). 1.3.1 client and server.
    >>
    >>>
    >>
    >
    >
    > _______________________________________________
    > Gluster-devel mailing list
    > Gluster-devel@xxxxxxxxxx <mailto:Gluster-devel@xxxxxxxxxx>
    > http://lists.nongnu.org/mailman/listinfo/gluster-devel
    >



    _______________________________________________
    Gluster-devel mailing list
    Gluster-devel@xxxxxxxxxx <mailto:Gluster-devel@xxxxxxxxxx>
    http://lists.nongnu.org/mailman/listinfo/gluster-devel




--
Amar Tumballi
Engineer - Gluster Core Team
[bulde on #gluster/irc.gnu.org]
http://www.zresearch.com - Commoditizing Supercomputing and Superstorage!





[Index of Archives]     [Gluster Users]     [Ceph Users]     [Linux ARM Kernel]     [Linux ARM]     [Linux Omap]     [Fedora ARM]     [IETF Annouce]     [Security]     [Bugtraq]     [Linux]     [Linux OMAP]     [Linux MIPS]     [eCos]     [Asterisk Internet PBX]     [Linux API]

  Powered by Linux