Ceph FS - MDS problem

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hi there,

maybe you could be so kind and help me with following issue:

We running Ceph FS but there's repeatedly a problem with the MDS.

Sometimes following error occurs: "mds0: Client 701782 failing to respond to capability release"
Listing the session informations shows that the "num_caps" on that Client is much more than on the other Clients. ( see also -> attachement )

The problem is that the load on one of the server is increasing to really high value ( 80 to 100 ) independent of client which is complaining.

I guess my problem is also that I dont really understand the meaning of those "capabilties".

Following facts (let me know if you need more):
  • CEPH-FS-Client, MDS, MON, OSD all on same server
  • Kernel-Client (Kernel: 3.14.16-031416-generic)
  • MDS config
    • only raised "mds cache size = 5000000"  (because before there was error "failing to respond to cache pressure")


Best regards
Mathias



     


##### CEPH FS ERROR

09:33:30 PROD root@ceph01:~# ceph -s
    cluster xxxxxxxxxxxxxxxxxxxxxxx
     health HEALTH_WARN
            mds0: Client 701782 failing to respond to capability release
     monmap e1: 3 mons at {ceph01=xx.xx.xx.114:6789/0,ceph02=xx.xx.xx.115:6789/0,ceph03=xx.xx.xx.116:6789/0}
            election epoch 106, quorum 0,1,2 ceph01,ceph02,ceph03
     mdsmap e260: 1/1/1 up {0=ceph01=up:active}, 2 up:standby
     .....


-> Load raises immedtiatly


09:33:32 PROD root@ceph01:~# ceph daemon mds.ceph01 session ls
[
    {
        "id": 701782,
        "num_leases": 16,
        "num_caps": 221397,
        "state": "open",
        "replay_requests": 0,
        "reconnecting": false,
        "inst": "client.701782 xx.xx.xx.114:0\/1344307356",
        "client_metadata": {}
    },
    {
        "id": 692103,
        "num_leases": 1,
        "num_caps": 50115,
        "state": "open",
        "replay_requests": 0,
        "reconnecting": false,
        "inst": "client.692103 xx.xx.xx.117:0\/3600471798",
        "client_metadata": {}
    },
    {
        "id": 691995,
        "num_leases": 2,
        "num_caps": 53227,
        "state": "open",
        "replay_requests": 0,
        "reconnecting": false,
        "inst": "client.691995 xx.xx.xx.115:0\/1220606159",
        "client_metadata": {}
    },
    {
        "id": 692058,
        "num_leases": 8,
        "num_caps": 49722,
        "state": "open",
        "replay_requests": 0,
        "reconnecting": false,
        "inst": "client.692058 xx.xx.xx.116:0\/4048537076",
        "client_metadata": {}
    }
]


09:38:18 PROD root@ceph01:~# ceph daemon mds.ceph01 perf dump
{
    "mds": {
        "request": 1387754,
        "reply": 1387696,
        "reply_latency": {
            "avgcount": 1387696,
            "sum": 6439.991891758
        },
        "forward": 0,
        "dir_fetch": 57946,
        "dir_commit": 35053,
        "dir_split": 0,
        "inode_max": 5000000,
        "inodes": 1116643,
        "inodes_top": 837156,
        "inodes_bottom": 279487,
        "inodes_pin_tail": 0,
        "inodes_pinned": 292936,
        "inodes_expired": 0,
        "inodes_with_caps": 269668,
        "caps": 374718,
        "subtrees": 2,
        "traverse": 2591500,
        "traverse_hit": 2492810,
        "traverse_forward": 0,
        "traverse_discover": 0,
        "traverse_dir_fetch": 19330,
        "traverse_remote_ino": 0,
        "traverse_lock": 2350,
        "load_cent": 138774897,
        "q": 0,
        "exported": 0,
        "exported_inodes": 0,
        "imported": 0,
        "imported_inodes": 0
    },
    "mds_cache": {
        "num_strays": 56,
        "num_strays_purging": 0,
        "num_strays_delayed": 0,
        "strays_created": 2835,
        "strays_purged": 2802,
        "num_recovering_processing": 0,
        "num_recovering_enqueued": 0,
        "num_recovering_prioritized": 0,
        "recovery_started": 0,
        "recovery_completed": 0
    },
    "mds_log": {
        "evadd": 376174,
        "evex": 377829,
        "evtrm": 377829,
        "ev": 13815,
        "evexg": 0,
        "evexd": 1024,
        "segadd": 738,
        "segex": 738,
        "segtrm": 738,
        "seg": 31,
        "segexg": 0,
        "segexd": 1,
        "expos": 6882857746,
        "wrpos": 6991387600,
        "rdpos": 4859818564,
        "jlat": 0
    },
    "mds_mem": {
        "ino": 1112733,
        "ino+": 1115537,
        "ino-": 2804,
        "dir": 66813,
        "dir+": 67017,
        "dir-": 204,
        "dn": 1116643,
        "dn+": 1121224,
        "dn-": 4581,
        "cap": 374718,
        "cap+": 1005845,
        "cap-": 631127,
        "rss": 6992420,
        "heap": 49060,
        "malloc": 18446744073708021059,
        "buf": 0
    },
    "mds_server": {
        "handle_client_request": 1387754,
        "handle_slave_request": 0,
        "handle_client_session": 80950,
        "dispatch_client_request": 2526245,
        "dispatch_server_request": 0
    },
    "objecter": {
        "op_active": 0,
        "op_laggy": 0,
        "op_send": 567467,
        "op_send_bytes": 0,
        "op_resend": 0,
        "op_ack": 283387,
        "op_commit": 284080,
        "op": 567467,
        "op_r": 283387,
        "op_w": 284080,
        "op_rmw": 0,
        "op_pg": 0,
        "osdop_stat": 24703,
        "osdop_create": 40923,
        "osdop_read": 24,
        "osdop_write": 186035,
        "osdop_writefull": 17341,
        "osdop_append": 0,
        "osdop_zero": 1,
        "osdop_truncate": 0,
        "osdop_delete": 4721,
        "osdop_mapext": 0,
        "osdop_sparse_read": 0,
        "osdop_clonerange": 0,
        "osdop_getxattr": 283361,
        "osdop_setxattr": 40923,
        "osdop_cmpxattr": 0,
        "osdop_rmxattr": 0,
        "osdop_resetxattrs": 0,
        "osdop_tmap_up": 0,
        "osdop_tmap_put": 0,
        "osdop_tmap_get": 0,
        "osdop_call": 0,
        "osdop_watch": 0,
        "osdop_notify": 0,
        "osdop_src_cmpxattr": 0,
        "osdop_pgls": 0,
        "osdop_pgls_filter": 0,
        "osdop_other": 221016,
        "linger_active": 0,
        "linger_send": 0,
        "linger_resend": 0,
        "linger_ping": 0,
        "poolop_active": 0,
        "poolop_send": 0,
        "poolop_resend": 0,
        "poolstat_active": 0,
        "poolstat_send": 0,
        "poolstat_resend": 0,
        "statfs_active": 0,
        "statfs_send": 0,
        "statfs_resend": 0,
        "command_active": 0,
        "command_send": 0,
        "command_resend": 0,
        "map_epoch": 1025,
        "map_full": 0,
        "map_inc": 2,
        "osd_sessions": 210,
        "osd_session_open": 3589,
        "osd_session_close": 3569,
        "osd_laggy": 0
    },
    "throttle-msgr_dispatch_throttler-mds": {
        "val": 0,
        "max": 104857600,
        "get": 6198403,
        "get_sum": 1661221790,
        "get_or_fail_fail": 0,
        "get_or_fail_success": 0,
        "take": 0,
        "take_sum": 0,
        "put": 6198403,
        "put_sum": 1661221790,
        "wait": {
            "avgcount": 0,
            "sum": 0.000000000
        }
    },
    "throttle-objecter_bytes": {
        "val": 0,
        "max": 104857600,
        "get": 0,
        "get_sum": 0,
        "get_or_fail_fail": 0,
        "get_or_fail_success": 0,
        "take": 567467,
        "take_sum": 2263639983,
        "put": 562730,
        "put_sum": 2263639983,
        "wait": {
            "avgcount": 0,
            "sum": 0.000000000
        }
    },
    "throttle-objecter_ops": {
        "val": 0,
        "max": 1024,
        "get": 0,
        "get_sum": 0,
        "get_or_fail_fail": 0,
        "get_or_fail_success": 0,
        "take": 567467,
        "take_sum": 567467,
        "put": 567467,
        "put_sum": 567467,
        "wait": {
            "avgcount": 0,
            "sum": 0.000000000
        }
    }
}

Attachment: smime.p7s
Description: S/MIME Cryptographic Signature

_______________________________________________
ceph-users mailing list
ceph-users@xxxxxxxxxxxxxx
http://lists.ceph.com/listinfo.cgi/ceph-users-ceph.com

[Index of Archives]     [Information on CEPH]     [Linux Filesystem Development]     [Ceph Development]     [Ceph Large]     [Ceph Dev]     [Linux USB Development]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]     [xfs]


  Powered by Linux