CephFS fsync failed and read error

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Dear Ceph experts,

I've got a problem with CephFS one day.
When I use vim to edit a file on cephfs, it will show fsync failed, and later the file cannot be read/open anymore.
Strangely there is no error I can spot on ceph logs, dmesg, etc.
Here is an example below: (all machines in my ceph cluster have the same OS, kernel, and ceph version)

[root@dl-disk4 ceph-dir]# uname -a
Linux dl-disk4 3.10.0-327.4.4.el7.x86_64 #1 SMP Tue Jan 5 16:07:00 UTC 2016 x86_64 x86_64 x86_64 GNU/Linux

[root@dl-disk4 ceph-dir]# ceph version
ceph version 0.94.5 (9764da52395923e0b32908d83a9f7304401fee43)

[root@dl-disk4 ceph-dir]# mount | grep cephfs
xxx.xxx.xxx.xxx:6789:/ on /cephfs type ceph (rw,relatime,name=user,secret=<hidden>)

[root@dl-disk4 ceph-dir]# ceph -s
    cluster 13c231fc-837e-48bb-b4d4-8a0ce1c12a24
     health HEALTH_WARN
            too many PGs per OSD (645 > max 300)
     monmap e1: 3 mons at {dl-disk1=xxx.xxx.xxx.xxx:6789/0,dl-disk2=xxx.xxx.xxx.xxx:6789/0,dl-disk3=xxx.xxx.xxx.xxx:6789/0}
            election epoch 60, quorum 0,1,2 dl-disk1,dl-disk2,dl-disk3
     mdsmap e76: 1/1/1 up {0=dl-disk4=up:active}
     osdmap e307: 32 osds: 32 up, 32 in
      pgmap v239602: 8288 pgs, 4 pools, 375 GB data, 1311 kobjects
            924 GB used, 348 TB / 349 TB avail
                8288 active+clean
[root@dl-disk4 ceph-dir]# ceph health detail
HEALTH_WARN too many PGs per OSD (645 > max 300)
too many PGs per OSD (645 > max 300)

[root@dl-disk4 ceph-dir]# echo "hello123" > /cephfs/test1
[root@dl-disk4 ceph-dir]# cat /cephfs/test1
hello123

[root@dl-disk4 ~]# ll /cephfs/test1
-rw-r--r-- 1 root root 9 Jan 28 02:27 /cephfs/test1

[root@dl-disk4 ceph-dir]# vim /cephfs/test1
(in vim)
"/cephfs/test1"
"/cephfs/test1" E667: Fsync failed

[root@dl-disk4 ceph-dir]# cat /cephfs/test1
cat: /cephfs/test1: Operation not permitted

[root@dl-disk4 ceph-dir]# less /cephfs/test1
(read error)

[root@dl-disk4 ceph-dir]# strace /cephfs/test1
execve("/cephfs/test1", ["/cephfs/test1"], [/* 32 vars */]) = -1 EACCES (Permission denied)
write(2, "strace: exec: Permission denied\n", 32strace: exec: Permission denied
) = 32
exit_group(1)                           = ?
+++ exited with 1 +++
[root@dl-disk4 ceph-dir]# strace cat /cephfs/test1
execve("/usr/bin/cat", ["cat", "/cephfs/test1"], [/* 32 vars */]) = 0
brk(0)                                  = 0x977000
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f44e7a67000
access("/etc/ld.so.preload", R_OK)      = -1 ENOENT (No such file or directory)
open("/etc/ld.so.cache", O_RDONLY|O_CLOEXEC) = 3
fstat(3, {st_mode=S_IFREG|0644, st_size=129385, ...}) = 0
mmap(NULL, 129385, PROT_READ, MAP_PRIVATE, 3, 0) = 0x7f44e7a47000
close(3)                                = 0
open("/lib64/libc.so.6", O_RDONLY|O_CLOEXEC) = 3
read(3, "\177ELF\2\1\1\3\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0 \34\2\0\0\0\0\0"..., 832) = 832
fstat(3, {st_mode=S_IFREG|0755, st_size=2107816, ...}) = 0
mmap(NULL, 3932736, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = 0x7f44e7486000
mprotect(0x7f44e763c000, 2097152, PROT_NONE) = 0
mmap(0x7f44e783c000, 24576, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x1b6000) = 0x7f44e783c000
mmap(0x7f44e7842000, 16960, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x7f44e7842000
close(3)                                = 0
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f44e7a46000
mmap(NULL, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f44e7a44000
arch_prctl(ARCH_SET_FS, 0x7f44e7a44740) = 0
mprotect(0x7f44e783c000, 16384, PROT_READ) = 0
mprotect(0x60b000, 4096, PROT_READ)     = 0
mprotect(0x7f44e7a68000, 4096, PROT_READ) = 0
munmap(0x7f44e7a47000, 129385)          = 0
brk(0)                                  = 0x977000
brk(0x998000)                           = 0x998000
brk(0)                                  = 0x998000
open("/usr/lib/locale/locale-archive", O_RDONLY|O_CLOEXEC) = 3
fstat(3, {st_mode=S_IFREG|0644, st_size=106065056, ...}) = 0
mmap(NULL, 106065056, PROT_READ, MAP_PRIVATE, 3, 0) = 0x7f44e0f5f000
close(3)                                = 0
fstat(1, {st_mode=S_IFCHR|0620, st_rdev=makedev(136, 0), ...}) = 0
open("/cephfs/test1", O_RDONLY)         = 3
fstat(3, {st_mode=S_IFREG|0644, st_size=9, ...}) = 0
fadvise64(3, 0, 0, POSIX_FADV_SEQUENTIAL) = 0
mmap(NULL, 4202496, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f44e0b5d000
read(3, 0x7f44e0b5e000, 4194304)        = -1 EPERM (Operation not permitted)
write(2, "cat: ", 5cat: )                    = 5
write(2, "/cephfs/test1", 13/cephfs/test1)           = 13
open("/usr/share/locale/locale.alias", O_RDONLY|O_CLOEXEC) = 4
fstat(4, {st_mode=S_IFREG|0644, st_size=2502, ...}) = 0
mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = 0x7f44e7a66000
read(4, "# Locale name alias data base.\n#"..., 4096) = 2502
read(4, "", 4096)                       = 0
close(4)                                = 0
munmap(0x7f44e7a66000, 4096)            = 0
open("/usr/share/locale/en_US.UTF-8/LC_MESSAGES/libc.mo", O_RDONLY) = -1 ENOENT (No such file or directory)
open("/usr/share/locale/en_US.utf8/LC_MESSAGES/libc.mo", O_RDONLY) = -1 ENOENT (No such file or directory)
open("/usr/share/locale/en_US/LC_MESSAGES/libc.mo", O_RDONLY) = -1 ENOENT (No such file or directory)
open("/usr/share/locale/en.UTF-8/LC_MESSAGES/libc.mo", O_RDONLY) = -1 ENOENT (No such file or directory)
open("/usr/share/locale/en.utf8/LC_MESSAGES/libc.mo", O_RDONLY) = -1 ENOENT (No such file or directory)
open("/usr/share/locale/en/LC_MESSAGES/libc.mo", O_RDONLY) = -1 ENOENT (No such file or directory)
write(2, ": Operation not permitted", 25: Operation not permitted) = 25
write(2, "\n", 1
)                       = 1
munmap(0x7f44e0b5d000, 4202496)         = 0
close(3)                                = 0
close(1)                                = 0
close(2)                                = 0
exit_group(1)                           = ?
+++ exited with 1 +++



Also, using ceph-fuse will not get me the fsync failed problem and files can be opened, but those files which cannot be opened with kernel mount now become garbled.

Could you please inform me some advice about how to recover it? Thank you.

Best Regards,
FaHui


_______________________________________________
ceph-users mailing list
ceph-users@xxxxxxxxxxxxxx
http://lists.ceph.com/listinfo.cgi/ceph-users-ceph.com

[Index of Archives]     [Information on CEPH]     [Linux Filesystem Development]     [Ceph Development]     [Ceph Large]     [Ceph Dev]     [Linux USB Development]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]     [xfs]


  Powered by Linux