Problems with files not being copied into GlusterFS

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



I have a very serious problem that I need the communitys response to.

My web application recieves uploaded files and copies them into the
GlusterFS distributed file system. Now recently it looks like files aren't
copied into GlusterFS, and there are no error messages in GlusterFS client
or server logs. As far as my web application is concerned, everything is OK
during copy. But the files does not exist in GlusterFS after copy. I have
spent a lot of time debugging my web-app without finding an issue with it.

I have tried to set log-level=DEBUG but no errors are printed in the logs. I
am running GlusterFS 3.0.5 on Ubuntu 9.0.4. My GlusterFS config is
distributed and replicated.

I am using two separate Gluster file systems. FS1 consists of two servers
with 36 disks each. They are all exposed to GlusterFS. I have feedback from
the community that this is not an optimal setup, and that i should use RAID
or LVM to join these disk before I expose them to GlusterFS. The servers
CPU's are spiking at 6-700%.

In FS2 I have 4 servers with the same configuration that does not show this
problem. These servers have 12 disks each. When I use FS2 all copies are
completed successfully without problems.

So to me it looks like GlusterFS on FS1 is wrongly reporting copy errors as
successful copies. The same web-app have no problems when it uses FS2.

Som to my questions:
1. What is the best way to debug this problem?
2. What tests can I run on my GlusterFS system during production to verify
that everything is working under load?
3. Is this a known problem?

Here is an examle of my client config file for FS1:

# The remote servers
# DN-004
volume dn-004-01
    type protocol/client
    option transport-type tcp
    option transport.socket.nodelay on
    option remote-host dn-004
    option remote-subvolume brick-01
end-volume

volume dn-004-02
        type protocol/client
        option transport-type tcp
    option transport.socket.nodelay on
        option remote-host dn-004
        option remote-subvolume brick-02
end-volume

volume dn-004-03
        type protocol/client
        option transport-type tcp
    option transport.socket.nodelay on
        option remote-host dn-004
        option remote-subvolume brick-03
end-volume

volume dn-004-04
        type protocol/client
        option transport-type tcp
    option transport.socket.nodelay on
        option remote-host dn-004
        option remote-subvolume brick-04
end-volume

volume dn-004-05
        type protocol/client
        option transport-type tcp
    option transport.socket.nodelay on
        option remote-host dn-004
        option remote-subvolume brick-05
end-volume

volume dn-004-06
        type protocol/client
        option transport-type tcp
    option transport.socket.nodelay on
        option remote-host dn-004
        option remote-subvolume brick-06
end-volume

volume dn-004-07
        type protocol/client
        option transport-type tcp
    option transport.socket.nodelay on
        option remote-host dn-004
        option remote-subvolume brick-07
end-volume

volume dn-004-08
        type protocol/client
        option transport-type tcp
    option transport.socket.nodelay on
        option remote-host dn-004
        option remote-subvolume brick-08
end-volume

volume dn-004-09
        type protocol/client
        option transport-type tcp
    option transport.socket.nodelay on
        option remote-host dn-004
        option remote-subvolume brick-09
end-volume

volume dn-004-10
        type protocol/client
        option transport-type tcp
    option transport.socket.nodelay on
        option remote-host dn-004
        option remote-subvolume brick-10
end-volume

volume dn-004-11
        type protocol/client
        option transport-type tcp
    option transport.socket.nodelay on
        option remote-host dn-004
        option remote-subvolume brick-11
end-volume

volume dn-004-12
        type protocol/client
        option transport-type tcp
    option transport.socket.nodelay on
        option remote-host dn-004
        option remote-subvolume brick-12
end-volume

volume dn-004-13
        type protocol/client
        option transport-type tcp
    option transport.socket.nodelay on
        option remote-host dn-004
        option remote-subvolume brick-13
end-volume

volume dn-004-14
        type protocol/client
        option transport-type tcp
    option transport.socket.nodelay on
        option remote-host dn-004
        option remote-subvolume brick-14
end-volume

volume dn-004-15
        type protocol/client
        option transport-type tcp
    option transport.socket.nodelay on
        option remote-host dn-004
        option remote-subvolume brick-15
end-volume

volume dn-004-16
        type protocol/client
        option transport-type tcp
    option transport.socket.nodelay on
        option remote-host dn-004
        option remote-subvolume brick-16
end-volume

volume dn-004-17
        type protocol/client
        option transport-type tcp
    option transport.socket.nodelay on
        option remote-host dn-004
        option remote-subvolume brick-17
end-volume

volume dn-004-18
        type protocol/client
        option transport-type tcp
    option transport.socket.nodelay on
        option remote-host dn-004
        option remote-subvolume brick-18
end-volume

volume dn-004-19
        type protocol/client
        option transport-type tcp
    option transport.socket.nodelay on
        option remote-host dn-004
        option remote-subvolume brick-19
end-volume

volume dn-004-20
        type protocol/client
        option transport-type tcp
    option transport.socket.nodelay on
        option remote-host dn-004
        option remote-subvolume brick-20
end-volume

volume dn-004-21
        type protocol/client
        option transport-type tcp
    option transport.socket.nodelay on
        option remote-host dn-004
        option remote-subvolume brick-21
end-volume

volume dn-004-22
        type protocol/client
        option transport-type tcp
    option transport.socket.nodelay on
        option remote-host dn-004
        option remote-subvolume brick-22
end-volume

volume dn-004-23
        type protocol/client
        option transport-type tcp
    option transport.socket.nodelay on
        option remote-host dn-004
        option remote-subvolume brick-23
end-volume

volume dn-004-24
        type protocol/client
        option transport-type tcp
    option transport.socket.nodelay on
        option remote-host dn-004
        option remote-subvolume brick-24
end-volume

volume dn-004-25
        type protocol/client
        option transport-type tcp
    option transport.socket.nodelay on
        option remote-host dn-004
        option remote-subvolume brick-25
end-volume

volume dn-004-26
        type protocol/client
        option transport-type tcp
    option transport.socket.nodelay on
        option remote-host dn-004
        option remote-subvolume brick-26
end-volume

volume dn-004-27
        type protocol/client
        option transport-type tcp
    option transport.socket.nodelay on
        option remote-host dn-004
        option remote-subvolume brick-27
end-volume

volume dn-004-28
        type protocol/client
        option transport-type tcp
    option transport.socket.nodelay on
        option remote-host dn-004
        option remote-subvolume brick-28
end-volume

volume dn-004-29
        type protocol/client
        option transport-type tcp
    option transport.socket.nodelay on
        option remote-host dn-004
        option remote-subvolume brick-29
end-volume

volume dn-004-30
        type protocol/client
        option transport-type tcp
    option transport.socket.nodelay on
        option remote-host dn-004
        option remote-subvolume brick-30
end-volume

volume dn-004-31
        type protocol/client
        option transport-type tcp
    option transport.socket.nodelay on
        option remote-host dn-004
        option remote-subvolume brick-31
end-volume

volume dn-004-32
        type protocol/client
        option transport-type tcp
    option transport.socket.nodelay on
        option remote-host dn-004
        option remote-subvolume brick-32
end-volume

volume dn-004-33
        type protocol/client
        option transport-type tcp
    option transport.socket.nodelay on
        option remote-host dn-004
        option remote-subvolume brick-33
end-volume

volume dn-004-34
        type protocol/client
        option transport-type tcp
    option transport.socket.nodelay on
        option remote-host dn-004
        option remote-subvolume brick-34
end-volume

volume dn-004-35
        type protocol/client
        option transport-type tcp
    option transport.socket.nodelay on
        option remote-host dn-004
        option remote-subvolume brick-35
end-volume

volume dn-004-36
        type protocol/client
        option transport-type tcp
    option transport.socket.nodelay on
        option remote-host dn-004
        option remote-subvolume brick-36
end-volume


# DN-005
volume dn-005-01
    type protocol/client
    option transport-type tcp
    option transport.socket.nodelay on
    option remote-host dn-005
    option remote-subvolume brick-01
end-volume

volume dn-005-02
        type protocol/client
        option transport-type tcp
    option transport.socket.nodelay on
        option remote-host dn-005
        option remote-subvolume brick-02
end-volume

volume dn-005-03
        type protocol/client
        option transport-type tcp
    option transport.socket.nodelay on
        option remote-host dn-005
        option remote-subvolume brick-03
end-volume

volume dn-005-04
        type protocol/client
        option transport-type tcp
    option transport.socket.nodelay on
        option remote-host dn-005
        option remote-subvolume brick-04
end-volume

volume dn-005-05
        type protocol/client
        option transport-type tcp
    option transport.socket.nodelay on
        option remote-host dn-005
        option remote-subvolume brick-05
end-volume

volume dn-005-06
        type protocol/client
        option transport-type tcp
    option transport.socket.nodelay on
        option remote-host dn-005
        option remote-subvolume brick-06
end-volume

volume dn-005-07
        type protocol/client
        option transport-type tcp
    option transport.socket.nodelay on
        option remote-host dn-005
        option remote-subvolume brick-07
end-volume

volume dn-005-08
        type protocol/client
        option transport-type tcp
    option transport.socket.nodelay on
        option remote-host dn-005
        option remote-subvolume brick-08
end-volume

volume dn-005-09
        type protocol/client
        option transport-type tcp
    option transport.socket.nodelay on
        option remote-host dn-005
        option remote-subvolume brick-09
end-volume

volume dn-005-10
        type protocol/client
        option transport-type tcp
    option transport.socket.nodelay on
        option remote-host dn-005
        option remote-subvolume brick-10
end-volume

volume dn-005-11
        type protocol/client
        option transport-type tcp
    option transport.socket.nodelay on
        option remote-host dn-005
        option remote-subvolume brick-11
end-volume

volume dn-005-12
        type protocol/client
        option transport-type tcp
    option transport.socket.nodelay on
        option remote-host dn-005
        option remote-subvolume brick-12
end-volume

volume dn-005-13
        type protocol/client
        option transport-type tcp
    option transport.socket.nodelay on
        option remote-host dn-005
        option remote-subvolume brick-13
end-volume

volume dn-005-14
        type protocol/client
        option transport-type tcp
    option transport.socket.nodelay on
        option remote-host dn-005
        option remote-subvolume brick-14
end-volume

volume dn-005-15
        type protocol/client
        option transport-type tcp
    option transport.socket.nodelay on
        option remote-host dn-005
        option remote-subvolume brick-15
end-volume

volume dn-005-16
        type protocol/client
        option transport-type tcp
    option transport.socket.nodelay on
        option remote-host dn-005
        option remote-subvolume brick-16
end-volume

volume dn-005-17
        type protocol/client
        option transport-type tcp
    option transport.socket.nodelay on
        option remote-host dn-005
        option remote-subvolume brick-17
end-volume

volume dn-005-18
        type protocol/client
        option transport-type tcp
    option transport.socket.nodelay on
        option remote-host dn-005
        option remote-subvolume brick-18
end-volume

volume dn-005-19
        type protocol/client
        option transport-type tcp
    option transport.socket.nodelay on
        option remote-host dn-005
        option remote-subvolume brick-19
end-volume

volume dn-005-20
        type protocol/client
        option transport-type tcp
    option transport.socket.nodelay on
        option remote-host dn-005
        option remote-subvolume brick-20
end-volume

volume dn-005-21
        type protocol/client
        option transport-type tcp
    option transport.socket.nodelay on
        option remote-host dn-005
        option remote-subvolume brick-21
end-volume

volume dn-005-22
        type protocol/client
        option transport-type tcp
    option transport.socket.nodelay on
        option remote-host dn-005
        option remote-subvolume brick-22
end-volume

volume dn-005-23
        type protocol/client
        option transport-type tcp
    option transport.socket.nodelay on
        option remote-host dn-005
        option remote-subvolume brick-23
end-volume

volume dn-005-24
        type protocol/client
        option transport-type tcp
    option transport.socket.nodelay on
        option remote-host dn-005
        option remote-subvolume brick-24
end-volume

volume dn-005-25
        type protocol/client
        option transport-type tcp
    option transport.socket.nodelay on
        option remote-host dn-005
        option remote-subvolume brick-25
end-volume

volume dn-005-26
        type protocol/client
        option transport-type tcp
    option transport.socket.nodelay on
        option remote-host dn-005
        option remote-subvolume brick-26
end-volume

volume dn-005-27
        type protocol/client
        option transport-type tcp
    option transport.socket.nodelay on
        option remote-host dn-005
        option remote-subvolume brick-27
end-volume

volume dn-005-28
        type protocol/client
        option transport-type tcp
    option transport.socket.nodelay on
        option remote-host dn-005
        option remote-subvolume brick-28
end-volume

volume dn-005-29
        type protocol/client
        option transport-type tcp
    option transport.socket.nodelay on
        option remote-host dn-005
        option remote-subvolume brick-29
end-volume

volume dn-005-30
        type protocol/client
        option transport-type tcp
    option transport.socket.nodelay on
        option remote-host dn-005
        option remote-subvolume brick-30
end-volume

volume dn-005-31
        type protocol/client
        option transport-type tcp
    option transport.socket.nodelay on
        option remote-host dn-005
        option remote-subvolume brick-31
end-volume

volume dn-005-32
        type protocol/client
        option transport-type tcp
    option transport.socket.nodelay on
        option remote-host dn-005
        option remote-subvolume brick-32
end-volume

volume dn-005-33
        type protocol/client
        option transport-type tcp
    option transport.socket.nodelay on
        option remote-host dn-005
        option remote-subvolume brick-33
end-volume

volume dn-005-34
        type protocol/client
        option transport-type tcp
    option transport.socket.nodelay on
        option remote-host dn-005
        option remote-subvolume brick-34
end-volume

volume dn-005-35
        type protocol/client
        option transport-type tcp
    option transport.socket.nodelay on
        option remote-host dn-005
        option remote-subvolume brick-35
end-volume

volume dn-005-36
        type protocol/client
        option transport-type tcp
    option transport.socket.nodelay on
        option remote-host dn-005
        option remote-subvolume brick-36
end-volume



# Replicate data between the servers
# Use pairs, but swtich the order to distribute read load
volume repl-004-005-01
    type cluster/replicate
    subvolumes dn-004-01 dn-005-01
end-volume

volume repl-004-005-02
        type cluster/replicate
        subvolumes dn-004-02 dn-005-02
end-volume

volume repl-004-005-03
        type cluster/replicate
        subvolumes dn-004-03 dn-005-03
end-volume

volume repl-004-005-04
        type cluster/replicate
        subvolumes dn-004-04 dn-005-04
end-volume

volume repl-004-005-05
        type cluster/replicate
        subvolumes dn-004-05 dn-005-05
end-volume

volume repl-004-005-06
        type cluster/replicate
        subvolumes dn-004-06 dn-005-06
end-volume

volume repl-004-005-07
        type cluster/replicate
        subvolumes dn-004-07 dn-005-07
end-volume

volume repl-004-005-08
        type cluster/replicate
        subvolumes dn-004-08 dn-005-08
end-volume

volume repl-004-005-09
        type cluster/replicate
        subvolumes dn-004-09 dn-005-09
end-volume

volume repl-004-005-10
        type cluster/replicate
        subvolumes dn-004-10 dn-005-10
end-volume

volume repl-004-005-11
        type cluster/replicate
        subvolumes dn-004-11 dn-005-11
end-volume

volume repl-004-005-12
        type cluster/replicate
        subvolumes dn-004-12 dn-005-12
end-volume

volume repl-004-005-13
        type cluster/replicate
        subvolumes dn-004-13 dn-005-13
end-volume

volume repl-004-005-14
        type cluster/replicate
        subvolumes dn-004-14 dn-005-14
end-volume

volume repl-004-005-15
        type cluster/replicate
        subvolumes dn-004-15 dn-005-15
end-volume

volume repl-004-005-16
        type cluster/replicate
        subvolumes dn-004-16 dn-005-16
end-volume

volume repl-004-005-17
        type cluster/replicate
        subvolumes dn-004-17 dn-005-17
end-volume

volume repl-004-005-18
        type cluster/replicate
        subvolumes dn-004-18 dn-005-18
end-volume

volume repl-005-004-19
        type cluster/replicate
        subvolumes dn-005-19 dn-004-19
end-volume

volume repl-005-004-20
        type cluster/replicate
        subvolumes dn-005-20 dn-004-20
end-volume

volume repl-005-004-21
        type cluster/replicate
        subvolumes dn-005-21 dn-004-21
end-volume

volume repl-005-004-22
        type cluster/replicate
        subvolumes dn-005-22 dn-004-22
end-volume

volume repl-005-004-23
        type cluster/replicate
        subvolumes dn-005-23 dn-004-23
end-volume

volume repl-005-004-24
        type cluster/replicate
        subvolumes dn-005-24 dn-004-24
end-volume

volume repl-005-004-25
        type cluster/replicate
        subvolumes dn-005-25 dn-004-25
end-volume

volume repl-005-004-26
        type cluster/replicate
        subvolumes dn-005-26 dn-004-26
end-volume

volume repl-005-004-27
        type cluster/replicate
        subvolumes dn-005-27 dn-004-27
end-volume

volume repl-005-004-28
        type cluster/replicate
        subvolumes dn-005-28 dn-004-28
end-volume

volume repl-005-004-29
        type cluster/replicate
        subvolumes dn-005-29 dn-004-29
end-volume

volume repl-005-004-30
        type cluster/replicate
        subvolumes dn-005-30 dn-004-30
end-volume

volume repl-005-004-31
        type cluster/replicate
        subvolumes dn-005-31 dn-004-31
end-volume

volume repl-005-004-32
        type cluster/replicate
        subvolumes dn-005-32 dn-004-32
end-volume

volume repl-005-004-33
        type cluster/replicate
        subvolumes dn-005-33 dn-004-33
end-volume

volume repl-005-004-34
        type cluster/replicate
        subvolumes dn-005-34 dn-004-34
end-volume

volume repl-005-004-35
        type cluster/replicate
        subvolumes dn-005-35 dn-004-35
end-volume

volume repl-005-004-36
        type cluster/replicate
        subvolumes dn-005-36 dn-004-36
end-volume

# The distribute translator
# Use hashing to distribute data to all subvolumes, stopping when there is
only 5% left on the disk
volume distribute
    type cluster/distribute
    option lookup-unhashed off
    option min-free-disk 5%
    subvolumes repl-004-005-01 repl-004-005-02 repl-004-005-03
repl-004-005-04 repl-004-005-05 repl-004-005-06 repl-004-005-07
repl-004-005-08 repl-004-005-09 repl-004-005-10 repl-004-005-11
repl-004-005-12 repl-004-005-13 repl-004-005-14 repl-004-005-15
repl-004-005-16 repl-004-005-17 repl-004-005-18 repl-005-004-19
repl-005-004-20 repl-005-004-21 repl-005-004-22 repl-005-004-23
repl-005-004-24 repl-005-004-25 repl-005-004-26 repl-005-004-27
repl-005-004-28 repl-005-004-29 repl-005-004-30 repl-005-004-31
repl-005-004-32 repl-005-004-33 repl-005-004-34 repl-005-004-35
repl-005-004-36
end-volume

# Enable write-behind to decrease write latency
#volume wb
#    type performance/write-behind
#    option flush-behind on
#    option cache-size 64MB
#    subvolumes distribute
#end-volume

volume cache
    type performance/io-cache
    option cache-size 64MB
    subvolumes distribute
end-volume

### Add io-threads for parallel requisitions
volume iothreads
  type performance/io-threads
  option thread-count 8 # default is 16
  subvolumes cache
end-volume



Thanks

Roland Rabben
Founder & CEO Jotta AS
Cell: +47 90 85 85 39
Phone: +47 21 04 29 00
Email: roland at jotta.no


[Index of Archives]     [Gluster Development]     [Linux Filesytems Development]     [Linux ARM Kernel]     [Linux ARM]     [Linux Omap]     [Fedora ARM]     [IETF Annouce]     [Bugtraq]     [Linux OMAP]     [Linux MIPS]     [eCos]     [Asterisk Internet PBX]     [Linux API]

  Powered by Linux