Re: explanation of errors

Mickey Mazarick <mic@xxxxxxxxxxxxxxxxxx> · Mon, 17 Mar 2008 13:07:45 -0400

They are separate, I meant to imply that there is a Storage-01ns -> 
Storage-02ns ->Storage-03ns.
The only thing I'm not doing is double-mirroring the afr volumes.
(ie there is no Storage-01 -> Storage-02afr unified to Storage-01afr -> 
Storage-02)
I never really understood the reason for doing this in the examples, but 
assumed it would help throughput.

my spec is included below.

####gluster-syster.vol#####
volume main1
 type protocol/client
 option transport-type ib-verbs/client
 option remote-host RTPST201
 option remote-subvolume system
end-volume
volume main2
 type protocol/client
 option transport-type ib-verbs/client
 option remote-host RTPST202
 option remote-subvolume system
end-volume
volume main3
 type protocol/client
 option transport-type ib-verbs/client
 option remote-host RTPST203
 option remote-subvolume system
end-volume
volume main4
 type protocol/client
 option transport-type ib-verbs/client
 option remote-host RTPST204
 option remote-subvolume system
end-volume
volume main5
 type protocol/client
 option transport-type ib-verbs/client
 option remote-host RTPST205
 option remote-subvolume system
end-volume
volume main6
 type protocol/client
 option transport-type ib-verbs/client
 option remote-host RTPST206
 option remote-subvolume system
end-volume

volume main1-2
 type cluster/afr
 subvolumes main1 main2
#  option replicate *:2
end-volume
volume main3-4
 type cluster/afr
 subvolumes main3 main4
#  option replicate *:2
end-volume
volume main5-6
 type cluster/afr
 subvolumes main5 main6
#  option replicate *:2
end-volume

volume main-ns-1
 type protocol/client
 option transport-type ib-verbs/client
 option remote-host RTPST201
 option remote-subvolume system-ns
end-volume
volume main-ns-2
 type protocol/client
 option transport-type ib-verbs/client
 option remote-host RTPST202
 option remote-subvolume system-ns
end-volume
volume main-ns-3
 type protocol/client
 option transport-type ib-verbs/client
 option remote-host RTPST203
 option remote-subvolume system-ns
end-volume

volume main-ns
 type cluster/afr
 subvolumes main-ns-1 main-ns-2 main-ns-3
#  option replicate *:3
end-volume

volume main
 type cluster/unify
 option namespace main-ns
 subvolumes main1-2 main3-4 main5-6
 option scheduler alu   # use the ALU scheduler
#  option alu.limits.min-free-disk  10GB   # Don't create files one a 
volume with less than 60GB free diskspace
#  option alu.limits.max-open-files 10000   # Don't create files on a 
volume with more than 10000 files open
 # When deciding where to place a file, first look at the disk-usage, 
then at 
 # read-usage, write-usage, open files, and finally the disk-speed-usage.
 option alu.order 
disk-usage:read-usage:write-usage:open-files-usage:disk-speed-usage
#  option alu.disk-usage.entry-threshold 2GB   # Kick in if the 
discrepancy in disk-usage between volumes is 2GB
# option alu.disk-usage.exit-threshold  60MB   # Don't stop until you've 
written at least 60MB to the least-used volume
#  option alu.open-files-usage.entry-threshold 1024   # Kick in if the 
discrepancy in open files is 1024
# option alu.open-files-usage.exit-threshold 32   # Don't stop until 
you've written at least 32 files to the least-used volume
# option alu.read-usage.entry-threshold 20%   # Kick in when the 
read-usage discrepancy is 20%
# option alu.read-usage.exit-threshold 4%   # Don't stop until the 
discrepancy has been reduced with 4%
# option alu.write-usage.entry-threshold 20%   # Kick in when the 
write-usage discrepancy is 20%
# option alu.write-usage.exit-threshold 4%   # Don't stop until the 
discrepancy has been reduced with 4%
 option alu.stat-refresh.interval 60sec   # Refresh the statistics used 
for decision-making every 10 seconds
# option alu.stat-refresh.num-file-create 10   # Refresh the statistics 
used for decision-making after creating 10 files
end-volume

volume writebehind
 type performance/write-behind
 subvolumes main
end-volume

volume readahead
 type performance/read-ahead
 subvolumes writebehind
end-volume

volume io-cache
 type performance/io-cache
 subvolumes readahead
end-volume

### If you are not concerned about performance of interactive commands
### like "ls -l", you wouldn't need this translator.
#volume statprefetch
#  type performance/stat-prefetch
#  option cache-seconds 2  # cache expires in 2 seconds
#  subvolumes readahead    # add "stat-prefetch" feature to "readahead" 
volume
#end-volume

Basavanagowda Kanur wrote:
Mickey,
   You cannot re-use the namespace as storage volume.
   Make sure you have seperate namespaces, other than the ones in 
storage for glusterfs to work properly.

--
Gowda

On Mon, Mar 17, 2008 at 10:10 PM, Mickey Mazarick 
<mic@xxxxxxxxxxxxxxxxxx <mailto:mic@xxxxxxxxxxxxxxxxxx>> wrote:

    I'm getting a lot of errors on an AFR/unify setup with 6 storage
    bricks
    using ib-verbs and just want some help understanding what is critical.
    for some reason this setup is very unstable and we want to know how to
    make it as robust as the architecture suggests it should be.

    The problem is that when we copy any files we get hundreds of the
    following three errors in the client:
    2008-03-17 12:31:00 E [fuse-bridge.c:699:fuse_fd_cbk] glusterfs-fuse:
    38: /tftpboot/node_root/lib/modules/2.6.24.1/modules.symbols => -1 (5)
    2008-03-17 12:31:00 E [unify.c:850:unify_open] main:
    /tftpboot/node_root/lib/modules/2.6.24.1/kernel/arch/x86/kernel/cpuid.ko:
    entry_count is 3
    2008-03-17 12:31:00 E [unify.c:853:unify_open] main:
    /tftpboot/node_root/lib/modules/2.6.24.1/kernel/arch/x86/kernel/cpuid.ko:
    found on main-ns

    Files still copy with these errors but very slowly.
    Additionally we are unable to lose even one storage brick without the
    cluster freezing.

    We have the pretty common afr/unify setup with 6 storage bricks.

    namespace:
    Storage_01 <- AFR -> RTPST202 <-AFR-> Storage_03

    storage:
    Storage_01 <- AFR -> Storage_02
    Storage_03 <- AFR -> Storage_04
    Storage_05 <- AFR -> Storage_06

    All this is running on TLA ver 703 with a the latest patched fuse
    module.

    Any suggestions would be appreciated!
    Thanks!
    -Mickey Mazarick

    --

    _______________________________________________
    Gluster-devel mailing list
    Gluster-devel@xxxxxxxxxx <mailto:Gluster-devel@xxxxxxxxxx>
    http://lists.nongnu.org/mailman/listinfo/gluster-devel

--