On Thu, Jan 28, 2016 at 3:14 PM, FaHui Lin <fahui.lin@xxxxxxxxxx> wrote: > Dear Mr. Zheng and all, > > I'm pretty sure the permission is OK because I could read/write normally > before and even now I still can write a new file (via stdout redirection) , > as shown in my last mail. The problem happens when fsync is called (e.g. > using vim) and fails and the file can no longer be opened (say via cat, > less, cp, ... ). > > What I'd like to know are: > 1) How can I find out more clues about this issues > 2) Is there any way to regain my data on this CephFS? (something like > fsck?). this should be kernel client issue, no need fsck please enable kernel dynamic debug: echo module ceph +p > /sys/kernel/debug/dynamic_debug/control echo module libceph +p > /sys/kernel/debug/dynamic_debug/control run vim and cat gather the kernel log and send it to us. Regards Yan, Zheng. > > By the way. Here is the setup for the authentication: > > [root@dl-disk4 ~]# ceph auth get client.user > exported keyring for client.user > [client.user] > key = ......== > caps mon = "allow r" > caps osd = "allow rw pool=cephfs,cephfsmeta" > > [root@dl-disk4 ~]# cephfs-journal-tool journal inspect > Overall journal integrity: OK > > Thank you. > > Best Regards, > FaHui > > > > Yan, Zheng 於 2016/1/28 下午 02:51 寫道: > > This seems like the user has no read/write permission to cephfs data pool. > > Regards > Yan, Zheng > > On Thu, Jan 28, 2016 at 11:36 AM, FaHui Lin <fahui.lin@xxxxxxxxxx> wrote: > > Dear Ceph experts, > > I've got a problem with CephFS one day. > When I use vim to edit a file on cephfs, it will show fsync failed, and > later the file cannot be read/open anymore. > Strangely there is no error I can spot on ceph logs, dmesg, etc. > Here is an example below: (all machines in my ceph cluster have the same OS, > kernel, and ceph version) > > [root@dl-disk4 ceph-dir]# uname -a > Linux dl-disk4 3.10.0-327.4.4.el7.x86_64 #1 SMP Tue Jan 5 16:07:00 UTC 2016 > x86_64 x86_64 x86_64 GNU/Linux > > [root@dl-disk4 ceph-dir]# ceph version > ceph version 0.94.5 (9764da52395923e0b32908d83a9f7304401fee43) > > [root@dl-disk4 ceph-dir]# mount | grep cephfs > xxx.xxx.xxx.xxx:6789:/ on /cephfs type ceph > (rw,relatime,name=user,secret=<hidden>) > > [root@dl-disk4 ceph-dir]# ceph -s > cluster 13c231fc-837e-48bb-b4d4-8a0ce1c12a24 > health HEALTH_WARN > too many PGs per OSD (645 > max 300) > monmap e1: 3 mons at > {dl-disk1=xxx.xxx.xxx.xxx:6789/0,dl-disk2=xxx.xxx.xxx.xxx:6789/0,dl-disk3=xxx.xxx.xxx.xxx:6789/0} > election epoch 60, quorum 0,1,2 dl-disk1,dl-disk2,dl-disk3 > mdsmap e76: 1/1/1 up {0=dl-disk4=up:active} > osdmap e307: 32 osds: 32 up, 32 in > pgmap v239602: 8288 pgs, 4 pools, 375 GB data, 1311 kobjects > 924 GB used, 348 TB / 349 TB avail > 8288 active+clean > [root@dl-disk4 ceph-dir]# ceph health detail > HEALTH_WARN too many PGs per OSD (645 > max 300) > too many PGs per OSD (645 > max 300) > > [root@dl-disk4 ceph-dir]# echo "hello123" > /cephfs/test1 > [root@dl-disk4 ceph-dir]# cat /cephfs/test1 > hello123 > > [root@dl-disk4 ~]# ll /cephfs/test1 > -rw-r--r-- 1 root root 9 Jan 28 02:27 /cephfs/test1 > > [root@dl-disk4 ceph-dir]# vim /cephfs/test1 > (in vim) > "/cephfs/test1" > "/cephfs/test1" E667: Fsync failed > > [root@dl-disk4 ceph-dir]# cat /cephfs/test1 > cat: /cephfs/test1: Operation not permitted > > [root@dl-disk4 ceph-dir]# less /cephfs/test1 > (read error) > > [root@dl-disk4 ceph-dir]# strace /cephfs/test1 > execve("/cephfs/test1", ["/cephfs/test1"], [/* 32 vars */]) = -1 EACCES > (Permission denied) > write(2, "strace: exec: Permission denied\n", 32strace: exec: Permission > denied > ) = 32 > exit_group(1) = ? > +++ exited with 1 +++ > [root@dl-disk4 ceph-dir]# strace cat /cephfs/test1 > execve("/usr/bin/cat", ["cat", "/cephfs/test1"], [/* 32 vars */]) = 0 > brk(0) = 0x977000 > mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = > 0x7f44e7a67000 > access("/etc/ld.so.preload", R_OK) = -1 ENOENT (No such file or > directory) > open("/etc/ld.so.cache", O_RDONLY|O_CLOEXEC) = 3 > fstat(3, {st_mode=S_IFREG|0644, st_size=129385, ...}) = 0 > mmap(NULL, 129385, PROT_READ, MAP_PRIVATE, 3, 0) = 0x7f44e7a47000 > close(3) = 0 > open("/lib64/libc.so.6", O_RDONLY|O_CLOEXEC) = 3 > read(3, "\177ELF\2\1\1\3\0\0\0\0\0\0\0\0\3\0>\0\1\0\0\0 \34\2\0\0\0\0\0"..., > 832) = 832 > fstat(3, {st_mode=S_IFREG|0755, st_size=2107816, ...}) = 0 > mmap(NULL, 3932736, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_DENYWRITE, 3, 0) = > 0x7f44e7486000 > mprotect(0x7f44e763c000, 2097152, PROT_NONE) = 0 > mmap(0x7f44e783c000, 24576, PROT_READ|PROT_WRITE, > MAP_PRIVATE|MAP_FIXED|MAP_DENYWRITE, 3, 0x1b6000) = 0x7f44e783c000 > mmap(0x7f44e7842000, 16960, PROT_READ|PROT_WRITE, > MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS, -1, 0) = 0x7f44e7842000 > close(3) = 0 > mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = > 0x7f44e7a46000 > mmap(NULL, 8192, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = > 0x7f44e7a44000 > arch_prctl(ARCH_SET_FS, 0x7f44e7a44740) = 0 > mprotect(0x7f44e783c000, 16384, PROT_READ) = 0 > mprotect(0x60b000, 4096, PROT_READ) = 0 > mprotect(0x7f44e7a68000, 4096, PROT_READ) = 0 > munmap(0x7f44e7a47000, 129385) = 0 > brk(0) = 0x977000 > brk(0x998000) = 0x998000 > brk(0) = 0x998000 > open("/usr/lib/locale/locale-archive", O_RDONLY|O_CLOEXEC) = 3 > fstat(3, {st_mode=S_IFREG|0644, st_size=106065056, ...}) = 0 > mmap(NULL, 106065056, PROT_READ, MAP_PRIVATE, 3, 0) = 0x7f44e0f5f000 > close(3) = 0 > fstat(1, {st_mode=S_IFCHR|0620, st_rdev=makedev(136, 0), ...}) = 0 > open("/cephfs/test1", O_RDONLY) = 3 > fstat(3, {st_mode=S_IFREG|0644, st_size=9, ...}) = 0 > fadvise64(3, 0, 0, POSIX_FADV_SEQUENTIAL) = 0 > mmap(NULL, 4202496, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) > = 0x7f44e0b5d000 > read(3, 0x7f44e0b5e000, 4194304) = -1 EPERM (Operation not permitted) > write(2, "cat: ", 5cat: ) = 5 > write(2, "/cephfs/test1", 13/cephfs/test1) = 13 > open("/usr/share/locale/locale.alias", O_RDONLY|O_CLOEXEC) = 4 > fstat(4, {st_mode=S_IFREG|0644, st_size=2502, ...}) = 0 > mmap(NULL, 4096, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0) = > 0x7f44e7a66000 > read(4, "# Locale name alias data base.\n#"..., 4096) = 2502 > read(4, "", 4096) = 0 > close(4) = 0 > munmap(0x7f44e7a66000, 4096) = 0 > open("/usr/share/locale/en_US.UTF-8/LC_MESSAGES/libc.mo", O_RDONLY) = -1 > ENOENT (No such file or directory) > open("/usr/share/locale/en_US.utf8/LC_MESSAGES/libc.mo", O_RDONLY) = -1 > ENOENT (No such file or directory) > open("/usr/share/locale/en_US/LC_MESSAGES/libc.mo", O_RDONLY) = -1 ENOENT > (No such file or directory) > open("/usr/share/locale/en.UTF-8/LC_MESSAGES/libc.mo", O_RDONLY) = -1 ENOENT > (No such file or directory) > open("/usr/share/locale/en.utf8/LC_MESSAGES/libc.mo", O_RDONLY) = -1 ENOENT > (No such file or directory) > open("/usr/share/locale/en/LC_MESSAGES/libc.mo", O_RDONLY) = -1 ENOENT (No > such file or directory) > write(2, ": Operation not permitted", 25: Operation not permitted) = 25 > write(2, "\n", 1 > ) = 1 > munmap(0x7f44e0b5d000, 4202496) = 0 > close(3) = 0 > close(1) = 0 > close(2) = 0 > exit_group(1) = ? > +++ exited with 1 +++ > > > Also, using ceph-fuse will not get me the fsync failed problem and files can > be opened, but those files which cannot be opened with kernel mount now > become garbled. > > Could you please inform me some advice about how to recover it? Thank you. > > Best Regards, > FaHui > > > > _______________________________________________ > ceph-users mailing list > ceph-users@xxxxxxxxxxxxxx > http://lists.ceph.com/listinfo.cgi/ceph-users-ceph.com > > _______________________________________________ ceph-users mailing list ceph-users@xxxxxxxxxxxxxx http://lists.ceph.com/listinfo.cgi/ceph-users-ceph.com