You can try
ceph daemon mds.host session evict
to kill it off.
------------------ Original ------------------
From: "Zhao Xu";<xuzh.fdu@xxxxxxxxx>;
Date: Feb 3, 2016
To: "Goncalo Borges"<goncalo.borges@xxxxxxxxxxxxx>;
Cc: "ceph-users@xxxxxxxxxxxxxx"<ceph-users@xxxxxxxxxxxxxx>;
Subject: Re: [ceph-users] Urgent help needed for ceph storage "mount error 5= Input/output error"
[root@igc-head ~]# ceph daemon mds.igc-head session ls
[
{
"id": 274143,
"num_leases": 0,
"num_caps": 0,
"state": "closing",
"replay_requests": 0,
"reconnecting": false,
"inst": "client.274143 10.1.10.1:0\/3555390539",
"client_metadata": {
"entity_id": "admin",
"hostname": "igc-head"
}
},
{
"id": 204105,
"num_leases": 0,
"num_caps": 0,
"state": "killing",
"replay_requests": 0,
"reconnecting": false,
"inst": "client.204105 10.1.10.1:0\/951296261",
"client_metadata": {
"entity_id": "admin",
"hostname": "igc-head"
}
},
{
"id": 274120,
"num_leases": 0,
"num_caps": 0,
"state": "opening",
"replay_requests": 0,
"reconnecting": false,
"inst": "client.274120 10.1.10.1:0\/5799",
"client_metadata": {
"ceph_sha1": "9764da52395923e0b32908d83a9f7304401fee43",
"ceph_version": "ceph version 0.94.5 (9764da52395923e0b32908d83a9f7304401fee43)",
"entity_id": "admin",
"hostname": "igc-head",
"mount_point": ""
}
},
{
"id": 274148,
"num_leases": 0,
"num_caps": 0,
"state": "opening",
"replay_requests": 0,
"reconnecting": false,
"inst": "client.274148 10.1.10.1:0\/5695",
"client_metadata": {
"ceph_sha1": "9764da52395923e0b32908d83a9f7304401fee43",
"ceph_version": "ceph version 0.94.5 (9764da52395923e0b32908d83a9f7304401fee43)",
"entity_id": "admin",
"hostname": "igc-head",
"mount_point": "\/home\/igc\/igc"
}
},
{
"id": 274186,
"num_leases": 0,
"num_caps": 0,
"state": "opening",
"replay_requests": 0,
"reconnecting": false,
"inst": "client.274186 10.1.10.1:0\/5776",
"client_metadata": {
"ceph_sha1": "9764da52395923e0b32908d83a9f7304401fee43",
"ceph_version": "ceph version 0.94.5 (9764da52395923e0b32908d83a9f7304401fee43)",
"entity_id": "admin",
"hostname": "igc-head",
"mount_point": ""
}
},
{
"id": 274154,
"num_leases": 0,
"num_caps": 0,
"state": "closing",
"replay_requests": 0,
"reconnecting": false,
"inst": "client.274154 10.1.10.1:0\/3620932361",
"client_metadata": {
"entity_id": "admin",
"hostname": "igc-head"
}
},
{
"id": 214112,
"num_leases": 0,
"num_caps": 0,
"state": "killing",
"replay_requests": 0,
"reconnecting": false,
"inst": "client.214112 10.1.10.12:0\/2793087314",
"client_metadata": {
"entity_id": "admin",
"hostname": "i2"
}
},
{
"id": 274182,
"num_leases": 0,
"num_caps": 0,
"state": "opening",
"replay_requests": 0,
"reconnecting": false,
"inst": "client.274182 10.1.10.1:0\/5622",
"client_metadata": {
"ceph_sha1": "9764da52395923e0b32908d83a9f7304401fee43",
"ceph_version": "ceph version 0.94.5 (9764da52395923e0b32908d83a9f7304401fee43)",
"entity_id": "admin",
"hostname": "igc-head",
"mount_point": "\/mnt\/igcfs"
}
},
{
"id": 274164,
"num_leases": 0,
"num_caps": 0,
"state": "opening",
"replay_requests": 0,
"reconnecting": false,
"inst": "client.274164 10.1.10.1:0\/5822",
"client_metadata": {
"ceph_sha1": "9764da52395923e0b32908d83a9f7304401fee43",
"ceph_version": "ceph version 0.94.5 (9764da52395923e0b32908d83a9f7304401fee43)",
"entity_id": "admin",
"hostname": "igc-head",
"mount_point": "\/mnt\/igcfs"
}
},
{
"id": 274125,
"num_leases": 0,
"num_caps": 0,
"state": "opening",
"replay_requests": 0,
"reconnecting": false,
"inst": "client.274125 10.1.10.1:0\/5939",
"client_metadata": {
"ceph_sha1": "9764da52395923e0b32908d83a9f7304401fee43",
"ceph_version": "ceph version 0.94.5 (9764da52395923e0b32908d83a9f7304401fee43)",
"entity_id": "admin",
"hostname": "igc-head",
"mount_point": "\/mnt\/igcfs"
}
},
{
"id": 274138,
"num_leases": 0,
"num_caps": 0,
"state": "closing",
"replay_requests": 0,
"reconnecting": false,
"inst": "client.274138 10.1.10.1:0\/2304566292",
"client_metadata": {
"entity_id": "admin",
"hostname": "igc-head"
}
},
{
"id": 274159,
"num_leases": 0,
"num_caps": 0,
"state": "closing",
"replay_requests": 0,
"reconnecting": false,
"inst": "client.274159 10.1.10.1:0\/3311695642",
"client_metadata": {
"entity_id": "admin",
"hostname": "igc-head"
}
},
{
"id": 214166,
"num_leases": 0,
"num_caps": 0,
"state": "killing",
"replay_requests": 0,
"reconnecting": false,
"inst": "client.214166 10.1.10.13:0\/4115598344",
"client_metadata": {
"entity_id": "admin",
"hostname": "i3"
}
},
{
"id": 224138,
"num_leases": 0,
"num_caps": 0,
"state": "killing",
"replay_requests": 0,
"reconnecting": false,
"inst": "client.224138 10.1.10.11:0\/1531154666",
"client_metadata": {
"entity_id": "admin",
"hostname": "i1"
}
},
{
"id": 274133,
"num_leases": 0,
"num_caps": 0,
"state": "closing",
"replay_requests": 0,
"reconnecting": false,
"inst": "client.274133 10.1.10.1:0\/3988909303",
"client_metadata": {
"entity_id": "admin",
"hostname": "igc-head"
}
}
]
Thanks,
X
On Tue, Feb 2, 2016 at 4:04 PM, Goncalo Borges <goncalo.borges@xxxxxxxxxxxxx> wrote:
Hi X
Have you tried to inspect the mds for problematic sessions still connected from those clients?
To check which sessions are still connected to the mds, do (in ceph 9.2.0, the command might be different or even do not exist in other older versions)
ceph daemon mds.<name> session ls
Cheers
G.
From: ceph-users [ceph-users-bounces@xxxxxxxxxxxxxx] on behalf of Zhao Xu [xuzh.fdu@xxxxxxxxx]
Sent: 03 February 2016 08:10
To: Mykola Dvornik
Cc: ceph-users@xxxxxxxxxxxxxx
Subject: Re: [ceph-users] Urgent help needed for ceph storage "mount error 5 = Input/output error"
I am not lucky on the ceph-fuse
[root@igc-head ~]# ceph-fuse -d -m igc-head,is1,i1,i2,i3:6789 /mnt/igcfs/
2016-02-03 04:55:08.756420 7fe3f7437780 0 ceph version 0.94.5 (9764da52395923e0b32908d83a9f7304401fee43), process ceph-fuse, pid 5822
ceph-fuse[5822]: starting ceph client
2016-02-03 04:55:08.794920 7fe3f7437780 -1 init, newargv = 0x3292e20 newargc=11
ceph-fuse[5822]: ceph mount failed with (110) Connection timed out
2016-02-03 05:00:08.829498 7fe3e77fe700 0 client.274164 ms_handle_reset on 10.1.10.1:6800/2641
[root@igc-head ~]# ceph-fuse -d -m igc-head:6789 /mnt/igcfs/
2016-02-03 05:00:47.029698 7f1ec270a780 0 ceph version 0.94.5 (9764da52395923e0b32908d83a9f7304401fee43), process ceph-fuse, pid 5939
ceph-fuse[5939]: starting ceph client
2016-02-03 05:00:47.067364 7f1ec270a780 -1 init, newargv = 0x2dd9330 newargc=11
ceph-fuse[5939]: ceph mount failed with (110) Connection timed out
2016-02-03 05:05:47.100815 7f1eb67fc700 0 client.274125 ms_handle_reset on 10.1.10.1:6800/2641
Any log file I should provide here?
Thanks,
X
On Tue, Feb 2, 2016 at 11:47 AM, Mykola Dvornik <mykola.dvornik@xxxxxxxxx> wrote:
Try to mount with ceph-fuse. It worked for me when I've faced the same sort of issues you are now dealing with.
-Mykola
On Tue, Feb 2, 2016 at 8:42 PM, Zhao Xu <xuzh.fdu@xxxxxxxxx> wrote:
Thank you Mykola. The issue is that I/we strongly suggested to add OSD for many times, but we are not the decision maker.
For now, I just want to mount the ceph drive again, even in read only mode, so that they can read the data. Any idea on how to achieve this?
Thanks,
X
On Tue, Feb 2, 2016 at 9:57 AM, Mykola Dvornik <mykola.dvornik@xxxxxxxxx> wrote:
I would strongly(!) suggest you to add few more OSDs to cluster before things get worse / corrupted.
-Mykola
On Tue, Feb 2, 2016 at 6:45 PM, Zhao Xu <xuzh.fdu@xxxxxxxxx> wrote:
Hi All,
Recently our ceph storage is running at low performance. Today, we can not write to the folder. We tried to unmount the ceph storage then to re-mount it, however, we can not even mount it now:
# mount -v -t ceph igc-head,is1,i1,i2,i3:6789:/ /mnt/igcfs/ -o name=admin,secretfile=/etc/admin.secret
parsing options: rw,name=admin,secretfile=/etc/admin.secret
mount error 5 = Input/output error
Previously there are some nearly full osd, so we did the "ceph osd reweight-by-utilization" to rebalance the usage. The ceph health is not idealbut it should still alive. Please help me to mount the disk again.
[root@igc-head ~]# ceph -s
cluster debdcfe9-20d3-404b-921c-2210534454e1
health HEALTH_WARN
39 pgs degraded
39 pgs stuck degraded
3 pgs stuck inactive
332 pgs stuck unclean
39 pgs stuck undersized
39 pgs undersized
48 requests are blocked > 32 sec
recovery 129755/8053623 objects degraded (1.611%)
recovery 965837/8053623 objects misplaced (11.993%)
mds0: Behind on trimming (455/30)
clock skew detected on mon.i1, mon.i2, mon.i3
monmap e1: 5 mons at {i1=10.1.10.11:6789/0,i2=10.1.10.12:6789/0,i3=10.1.10.13:6789/0,igc-head=10.1.10.1:6789/0,is1=10.1.10.100:6789/0}
election epoch 1314, quorum 0,1,2,3,4 igc-head,i1,i2,i3,is1
mdsmap e1602: 1/1/1 up {0=igc-head=up:active}
osdmap e8007: 17 osds: 17 up, 17 in; 298 remapped pgs
pgmap v5726326: 1088 pgs, 3 pools, 7442 GB data, 2621 kobjects
22228 GB used, 18652 GB / 40881 GB avail
129755/8053623 objects degraded (1.611%)
965837/8053623 objects misplaced (11.993%)
755 active+clean
293 active+remapped
31 active+undersized+degraded
5 active+undersized+degraded+remapped
3 undersized+degraded+peered
1 active+clean+scrubbing
[root@igc-head ~]# ceph osd tree
ID WEIGHT TYPE NAME UP/DOWN REWEIGHT PRIMARY-AFFINITY
-1 39.86992 root default
-2 18.14995 host is1
0 3.62999 osd.0 up 1.00000 1.00000
1 3.62999 osd.1 up 1.00000 1.00000
2 3.62999 osd.2 up 1.00000 1.00000
3 3.62999 osd.3 up 1.00000 1.00000
4 3.62999 osd.4 up 1.00000 1.00000
-3 7.23999 host i1
5 1.81000 osd.5 up 0.44101 1.00000
6 1.81000 osd.6 up 0.40675 1.00000
7 1.81000 osd.7 up 0.60754 1.00000
8 1.81000 osd.8 up 0.50868 1.00000
-4 7.23999 host i2
9 1.81000 osd.9 up 0.54956 1.00000
10 1.81000 osd.10 up 0.44815 1.00000
11 1.81000 osd.11 up 0.53262 1.00000
12 1.81000 osd.12 up 0.47197 1.00000
-5 7.23999 host i3
13 1.81000 osd.13 up 0.55557 1.00000
14 1.81000 osd.14 up 0.65874 1.00000
15 1.81000 osd.15 up 0.49663 1.00000
16 1.81000 osd.16 up 0.50136 1.00000
Thanks,
X
_______________________________________________ ceph-users mailing list ceph-users@xxxxxxxxxxxxxx http://lists.ceph.com/listinfo.cgi/ceph-users-ceph.com