Re: ESTALE / "Stale NFS file handle"

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hi Dan,
 These problems poped up when we did some bug fixes in our hashing algorithm. (it was alright in 1.4.0preX releases, and got corrected from rc2). This behavior can happen for all those who used dht in rc1, and now shifted to higher rcX releases.
 If you notice the name length is multiple of 16 (32 in this case), and to fix this, we have two approach as of now. Have a separate '/mnt/debug' mountpoint with 'option lookup-unhashed yes' in dht. Now stat the files which are of filename length 16 or multiple of 16 over this debug mountpoint. This should fix your missing file problem on main mountpoint also (as it creates a proper linkfile in proper hashed volume).

Sorry for this in-convenience.

Regards,
Amar


On Mon, Mar 9, 2009 at 10:40 PM, Dan Parsons <dparsons@xxxxxxxx> wrote:
I'm getting the below error messages in rc4. Like my previous email, there doesn't seem to be any pattern as to which server/client it's happening on, though the errors are occurring fairly frequently.

2009-03-09 17:32:26 E [unify.c:585:unify_lookup] unify: returning ESTALE for /bio/data/fast-hmmsearch-all/tmpP986E__fast-hmmsearch-all_job/result.tigrfam.TIGR02622.hmmhits: file count is 1
2009-03-09 17:32:26 E [unify.c:591:unify_lookup] unify: /bio/data/fast-hmmsearch-all/tmpP986E__fast-hmmsearch-all_job/result.tigrfam.TIGR02622.hmmhits: found on unify-switch-ns
2009-03-09 17:32:26 W [fuse-bridge.c:301:need_fresh_lookup] fuse-bridge: revalidate of /bio/data/fast-hmmsearch-all/tmpP986E__fast-hmmsearch-all_job/result.tigrfam.TIGR02622.hmmhits failed (Stale NFS file handle)

2009-03-09 17:32:28 E [unify.c:360:unify_lookup_cbk] unify: child(dht0): path(/bio/data/fast-hmmsearch-all/tmpP986E__fast-hmmsearch-all_job/result.tigrfam.TIGR01420.hmmhits): No such file or directory
2009-03-09 17:32:28 E [unify.c:360:unify_lookup_cbk] unify: child(unify-switch-ns): path(/bio/data/fast-hmmsearch-all/tmpP986E__fast-hmmsearch-all_job/result.tigrfam.TIGR01420.hmmhits): No such file or directory

As you can see, there are two separate sets of errors for two different files, though both errors are troubling. This problem has persisted from rc2 to rc4, though I can't say for certain that it was introduced in rc2 (I think it was there prior to that as well). No matching errors in server logs.

Any suggestions? My configs are below. Thanks!

CLIENT CONFIG:

volume unify-switch-ns
   type protocol/client
   option transport-type tcp
   option remote-host 10.8.101.51
   option remote-subvolume posix-unify-switch-ns
end-volume

#volume distfs01-ns-readahead
#   type performance/read-ahead
#   option page-size 1MB
#   option page-count 8
#   subvolumes distfs01-ns-brick
#end-volume

#volume unify-switch-ns
#   type performance/write-behind
#   option block-size 1MB
#   option cache-size 3MB
#   subvolumes distfs01-ns-readahead
#end-volume

volume distfs01-unify
   type protocol/client
   option transport-type tcp
   option remote-host 10.8.101.51
   option remote-subvolume posix-unify
end-volume

volume distfs02-unify
   type protocol/client
   option transport-type tcp
   option remote-host 10.8.101.52
   option remote-subvolume posix-unify
end-volume

volume distfs03-unify
   type protocol/client
   option transport-type tcp
   option remote-host 10.8.101.53
   option remote-subvolume posix-unify
end-volume

volume distfs04-unify
   type protocol/client
   option transport-type tcp
   option remote-host 10.8.101.54
   option remote-subvolume posix-unify
end-volume

volume distfs01-stripe
   type protocol/client
   option transport-type tcp
   option remote-host 10.8.101.51
   option remote-subvolume posix-stripe
end-volume

volume distfs02-stripe
   type protocol/client
   option transport-type tcp
   option remote-host 10.8.101.52
   option remote-subvolume posix-stripe
end-volume

volume distfs03-stripe
   type protocol/client
   option transport-type tcp
   option remote-host 10.8.101.53
   option remote-subvolume posix-stripe
end-volume

volume distfs04-stripe
   type protocol/client
   option transport-type tcp
   option remote-host 10.8.101.54
   option remote-subvolume posix-stripe
end-volume

volume stripe0
type cluster/stripe
option block-size *.jar,*.pin:1MB,*:2MB
subvolumes distfs01-stripe distfs02-stripe distfs03-stripe distfs04-stripe
end-volume

volume dht0
type cluster/dht
# option lookup-unhashed yes
subvolumes distfs01-unify distfs02-unify distfs03-unify distfs04-unify
end-volume

volume unify
type cluster/unify
option namespace unify-switch-ns
option self-heal off
option scheduler switch
# send *.phr/psq/pnd etc to stripe0, send the rest to hash
# extensions have to be *.foo* and not simply *.foo or rsync's tmp file naming will prevent files from being matched
option scheduler.switch.case *.phr*:stripe0;*.psq*:stripe0;*.pnd*:stripe0;*.psd*:stripe0;*.pin*:stripe0;*.nsi*:stripe0;*.nin*:stripe0;*.nsd*:stripe0;*.nhr*:stripe0;*.nsq*:stripe0;*.tar*:stripe0;*.tar.gz*:stripe0;*.jar*:stripe0;*.img*:stripe0;*.perf*:stripe0;*.tgz*:stripe0;*.fasta*:stripe0;*.huge*:stripe0
subvolumes stripe0 dht0
end-volume

volume ioc
   type performance/io-cache
   subvolumes unify
   option cache-size 3000MB
option cache-timeout 3600
end-volume

volume filter
  type features/filter
  option fixed-uid 0
  option fixed-gid 900
  subvolumes ioc
end-volume




SERVER CONFIG:
volume posix-unify-brick
type storage/posix
option directory /distfs-storage-space/glusterfs/unify
# the below line is here to make the output of 'df' accurate, as both volumes are served from the same local drive
option export-statfs-size off
end-volume

volume posix-stripe-brick
        type storage/posix
        option directory /distfs-storage-space/glusterfs/stripe
end-volume

volume posix-unify-switch-ns-brick
        type storage/posix
        option directory /distfs-storage-space/glusterfs/unify-switch-ns
end-volume

volume posix-unify
type performance/io-threads
option thread-count 4
subvolumes posix-unify-brick
end-volume

volume posix-stripe
type performance/io-threads
option thread-count 4
subvolumes posix-stripe-brick
end-volume

volume posix-unify-switch-ns
type performance/io-threads
option thread-count 2
subvolumes posix-unify-switch-ns-brick
end-volume

volume server
type protocol/server
option transport-type tcp
option auth.addr.posix-unify.allow 10.8.101.*,10.8.15.50
option auth.addr.posix-stripe.allow 10.8.101.*,10.8.15.50
option auth.addr.posix-unify-switch-ns.allow 10.8.101.*,10.8.15.50
subvolumes posix-unify posix-stripe posix-unify-switch-ns
end-volume


_______________________________________________
Gluster-devel mailing list
Gluster-devel@xxxxxxxxxx
http://lists.nongnu.org/mailman/listinfo/gluster-devel




--
Amar Tumballi


[Index of Archives]     [Gluster Users]     [Ceph Users]     [Linux ARM Kernel]     [Linux ARM]     [Linux Omap]     [Fedora ARM]     [IETF Annouce]     [Security]     [Bugtraq]     [Linux]     [Linux OMAP]     [Linux MIPS]     [eCos]     [Asterisk Internet PBX]     [Linux API]

  Powered by Linux