Re: ceph mds memory usage 20GB : is it normal ?

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Sat, Apr 14, 2018 at 9:23 PM, Alexandre DERUMIER <aderumier@xxxxxxxxx> wrote:
> Hi,
>
> Still leaking again after update to 12.2.4, around 17G after 9 days
>
>
>
>
> USER         PID %CPU %MEM    VSZ   RSS TTY      STAT START   TIME COMMAND
>
> ceph      629903 50.7 25.9 17473680 17082432 ?   Ssl  avril05 6498:21 /usr/bin/ceph-mds -f --cluster ceph --id ceph4-1.odiso.net --setuser ceph --setgroup ceph
>
>
>
>
>
> ~# ceph daemon mds.ceph4-1.odiso.net cache status
> {
>     "pool": {
>         "items": 16019302,
>         "bytes": 5100941968
>     }
> }
>
>
>
>
>
> # ceph daemon mds.ceph4-1.odiso.net perf dump
> {
>     "AsyncMessenger::Worker-0": {
>         "msgr_recv_messages": 648541059,
>         "msgr_send_messages": 666102301,
>         "msgr_recv_bytes": 4943336751206,
>         "msgr_send_bytes": 868468165048,
>         "msgr_created_connections": 167,
>         "msgr_active_connections": 166,
>         "msgr_running_total_time": 33884.943400671,
>         "msgr_running_send_time": 12229.226645264,
>         "msgr_running_recv_time": 26234.680757843,
>         "msgr_running_fast_dispatch_time": 4650.248980986
>     },
>     "AsyncMessenger::Worker-1": {
>         "msgr_recv_messages": 732301444,
>         "msgr_send_messages": 750526966,
>         "msgr_recv_bytes": 4248782228635,
>         "msgr_send_bytes": 2379403291660,
>         "msgr_created_connections": 172,
>         "msgr_active_connections": 171,
>         "msgr_running_total_time": 38490.093448635,
>         "msgr_running_send_time": 14692.222019414,
>         "msgr_running_recv_time": 31000.304091618,
>         "msgr_running_fast_dispatch_time": 3945.573521893
>     },
>     "AsyncMessenger::Worker-2": {
>         "msgr_recv_messages": 503228767,
>         "msgr_send_messages": 485729577,
>         "msgr_recv_bytes": 3644656184942,
>         "msgr_send_bytes": 526380645708,
>         "msgr_created_connections": 156,
>         "msgr_active_connections": 156,
>         "msgr_running_total_time": 26566.051442840,
>         "msgr_running_send_time": 9335.249687474,
>         "msgr_running_recv_time": 22643.927960456,
>         "msgr_running_fast_dispatch_time": 3426.566334706
>     },
>     "finisher-PurgeQueue": {
>         "queue_len": 0,
>         "complete_latency": {
>             "avgcount": 2077128,
>             "sum": 10029.468276512,
>             "avgtime": 0.004828526
>         }
>     },
>     "mds": {
>         "request": 1320419754,
>         "reply": 1320418963,
>         "reply_latency": {
>             "avgcount": 1320418963,
>             "sum": 3567340.917522550,
>             "avgtime": 0.002701673
>         },
>         "forward": 0,
>         "dir_fetch": 95955541,
>         "dir_commit": 5380286,
>         "dir_split": 29080,
>         "dir_merge": 28453,
>         "inode_max": 2147483647,
>         "inodes": 2049324,
>         "inodes_top": 55759,
>         "inodes_bottom": 118910,
>         "inodes_pin_tail": 1874655,
>         "inodes_pinned": 1969667,
>         "inodes_expired": 14225864524,
>         "inodes_with_caps": 1969030,
>         "caps": 3010600,
>         "subtrees": 2,
>         "traverse": 1433042396,
>         "traverse_hit": 855810795,
>         "traverse_forward": 0,
>         "traverse_discover": 0,
>         "traverse_dir_fetch": 75553963,
>         "traverse_remote_ino": 5462,
>         "traverse_lock": 217,
>         "load_cent": 132079451933,
>         "q": 41,
>         "exported": 0,
>         "exported_inodes": 0,
>         "imported": 0,
>         "imported_inodes": 0
>     },
>     "mds_cache": {
>         "num_strays": 150,
>         "num_strays_delayed": 0,
>         "num_strays_enqueuing": 0,
>         "strays_created": 2317004,
>         "strays_enqueued": 2316671,
>         "strays_reintegrated": 288,
>         "strays_migrated": 0,
>         "num_recovering_processing": 0,
>         "num_recovering_enqueued": 0,
>         "num_recovering_prioritized": 0,
>         "recovery_started": 0,
>         "recovery_completed": 0,
>         "ireq_enqueue_scrub": 0,
>         "ireq_exportdir": 0,
>         "ireq_flush": 0,
>         "ireq_fragmentdir": 57533,
>         "ireq_fragstats": 0,
>         "ireq_inodestats": 0
>     },
>     "mds_log": {
>         "evadd": 293928039,
>         "evex": 293928281,
>         "evtrm": 293926233,
>         "ev": 26595,
>         "evexg": 0,
>         "evexd": 2048,
>         "segadd": 365381,
>         "segex": 365382,
>         "segtrm": 365380,
>         "seg": 32,
>         "segexg": 0,
>         "segexd": 2,
>         "expos": 4997676796422,
>         "wrpos": 4997732797135,
>         "rdpos": 4232612352311,
>         "jlat": {
>             "avgcount": 62629276,
>             "sum": 260619.838247062,
>             "avgtime": 0.004161310
>         },
>         "replayed": 24789
>     },
>     "mds_mem": {
>         "ino": 2048405,
>         "ino+": 14160488289,
>         "ino-": 14158439884,
>         "dir": 377882,
>         "dir+": 15421679,
>         "dir-": 15043797,
>         "dn": 2049614,
>         "dn+": 14231703198,
>         "dn-": 14229653584,
>         "cap": 3010600,
>         "cap+": 1555206662,
>         "cap-": 1552196062,
>         "rss": 17082432,
>         "heap": 313916,
>         "buf": 0
>     },
>     "mds_server": {
>         "dispatch_client_request": 1437033326,
>         "dispatch_server_request": 0,
>         "handle_client_request": 1320419754,
>         "handle_client_session": 11542297,
>         "handle_slave_request": 0,
>         "req_create": 18618128,
>         "req_getattr": 11195570,
>         "req_getfilelock": 0,
>         "req_link": 411,
>         "req_lookup": 1005844421,
>         "req_lookuphash": 0,
>         "req_lookupino": 0,
>         "req_lookupname": 37344,
>         "req_lookupparent": 0,
>         "req_lookupsnap": 0,
>         "req_lssnap": 0,
>         "req_mkdir": 691747,
>         "req_mknod": 18,
>         "req_mksnap": 0,
>         "req_open": 230213054,
>         "req_readdir": 50618109,
>         "req_rename": 17377032,
>         "req_renamesnap": 0,
>         "req_rmdir": 463707,
>         "req_rmsnap": 0,
>         "req_rmxattr": 0,
>         "req_setattr": 1963949,
>         "req_setdirlayout": 0,
>         "req_setfilelock": 210187,
>         "req_setlayout": 0,
>         "req_setxattr": 8,
>         "req_symlink": 1971,
>         "req_unlink": 1801435
>     },
>     "mds_sessions": {
>         "session_count": 305,
>         "session_add": 473,
>         "session_remove": 168
>     },
>     "objecter": {
>         "op_active": 0,
>         "op_laggy": 0,
>         "op_send": 197270397,
>         "op_send_bytes": 796275884964,
>         "op_resend": 7,
>         "op_reply": 197270390,
>         "op": 197270390,
>         "op_r": 96075672,
>         "op_w": 101194718,
>         "op_rmw": 0,
>         "op_pg": 0,
>         "osdop_stat": 4428036,
>         "osdop_create": 19400797,
>         "osdop_read": 31288,
>         "osdop_write": 62709547,
>         "osdop_writefull": 165583,
>         "osdop_writesame": 0,
>         "osdop_append": 0,
>         "osdop_zero": 2,
>         "osdop_truncate": 13280,
>         "osdop_delete": 3185444,
>         "osdop_mapext": 0,
>         "osdop_sparse_read": 0,
>         "osdop_clonerange": 0,
>         "osdop_getxattr": 27007173,
>         "osdop_setxattr": 38801594,
>         "osdop_cmpxattr": 0,
>         "osdop_rmxattr": 0,
>         "osdop_resetxattrs": 0,
>         "osdop_tmap_up": 0,
>         "osdop_tmap_put": 0,
>         "osdop_tmap_get": 0,
>         "osdop_call": 0,
>         "osdop_watch": 0,
>         "osdop_notify": 0,
>         "osdop_src_cmpxattr": 0,
>         "osdop_pgls": 0,
>         "osdop_pgls_filter": 0,
>         "osdop_other": 10143158,
>         "linger_active": 0,
>         "linger_send": 0,
>         "linger_resend": 0,
>         "linger_ping": 0,
>         "poolop_active": 0,
>         "poolop_send": 0,
>         "poolop_resend": 0,
>         "poolstat_active": 0,
>         "poolstat_send": 0,
>         "poolstat_resend": 0,
>         "statfs_active": 0,
>         "statfs_send": 0,
>         "statfs_resend": 0,
>         "command_active": 0,
>         "command_send": 0,
>         "command_resend": 0,
>         "map_epoch": 3044,
>         "map_full": 0,
>         "map_inc": 160,
>         "osd_sessions": 18,
>         "osd_session_open": 20,
>         "osd_session_close": 2,
>         "osd_laggy": 0,
>         "omap_wr": 9743114,
>         "omap_rd": 191911089,
>         "omap_del": 684272
>     },
>     "purge_queue": {
>         "pq_executing_ops": 0,
>         "pq_executing": 0,
>         "pq_executed": 2316671
>     },
>     "throttle-msgr_dispatch_throttler-mds": {
>         "val": 0,
>         "max": 104857600,
>         "get_started": 0,
>         "get": 1884071270,
>         "get_sum": 12697353890803,
>         "get_or_fail_fail": 0,
>         "get_or_fail_success": 1884071270,
>         "take": 0,
>         "take_sum": 0,
>         "put": 1884071270,
>         "put_sum": 12697353890803,
>         "wait": {
>             "avgcount": 0,
>             "sum": 0.000000000,
>             "avgtime": 0.000000000
>         }
>     },
>     "throttle-objecter_bytes": {
>         "val": 0,
>         "max": 104857600,
>         "get_started": 0,
>         "get": 0,
>         "get_sum": 0,
>         "get_or_fail_fail": 0,
>         "get_or_fail_success": 0,
>         "take": 197270390,
>         "take_sum": 796529593788,
>         "put": 183928495,
>         "put_sum": 796529593788,
>         "wait": {
>             "avgcount": 0,
>             "sum": 0.000000000,
>             "avgtime": 0.000000000
>         }
>     },
>     "throttle-objecter_ops": {
>         "val": 0,
>         "max": 1024,
>         "get_started": 0,
>         "get": 0,
>         "get_sum": 0,
>         "get_or_fail_fail": 0,
>         "get_or_fail_success": 0,
>         "take": 197270390,
>         "take_sum": 197270390,
>         "put": 197270390,
>         "put_sum": 197270390,
>         "wait": {
>             "avgcount": 0,
>             "sum": 0.000000000,
>             "avgtime": 0.000000000
>         }
>     },
>     "throttle-write_buf_throttle": {
>         "val": 0,
>         "max": 3758096384,
>         "get_started": 0,
>         "get": 2316671,
>         "get_sum": 215451035,
>         "get_or_fail_fail": 0,
>         "get_or_fail_success": 2316671,
>         "take": 0,
>         "take_sum": 0,
>         "put": 31223,
>         "put_sum": 215451035,
>         "wait": {
>             "avgcount": 0,
>             "sum": 0.000000000,
>             "avgtime": 0.000000000
>         }
>     },
>     "throttle-write_buf_throttle-0x563c33bea220": {
>         "val": 29763,
>         "max": 3758096384,
>         "get_started": 0,
>         "get": 293928039,
>         "get_sum": 765120443785,
>         "get_or_fail_fail": 0,
>         "get_or_fail_success": 293928039,
>         "take": 0,
>         "take_sum": 0,
>         "put": 62629276,
>         "put_sum": 765120414022,
>         "wait": {
>             "avgcount": 0,
>             "sum": 0.000000000,
>             "avgtime": 0.000000000
>         }
>     }
> }
>

I don't find any clue. Next time it happens, could you please try
"ceph tell mds.xxx heap release"

>
>
> # ceph status
>   cluster:
>     id:     e22b8e83-3036-4fe5-8fd5-5ce9d539beca
>     health: HEALTH_OK
>
>   services:
>     mon: 3 daemons, quorum ceph4-1,ceph4-2,ceph4-3
>     mgr: ceph4-2.odiso.net(active), standbys: ceph4-3.odiso.net, ceph4-1.odiso.net
>     mds: cephfs4-1/1/1 up  {0=ceph4-1.odiso.net=up:active}, 2 up:standby
>     osd: 18 osds: 18 up, 18 in
>
>   data:
>     pools:   11 pools, 1992 pgs
>     objects: 72258k objects, 5918 GB
>     usage:   20088 GB used, 6737 GB / 26825 GB avail
>     pgs:     1992 active+clean
>
>   io:
>     client:   3099 kB/s rd, 6412 kB/s wr, 108 op/s rd, 481 op/s wr
>
>
> ----- Mail original -----
> De: "Patrick Donnelly" <pdonnell@xxxxxxxxxx>
> À: "aderumier" <aderumier@xxxxxxxxx>
> Cc: "ceph-users" <ceph-users@xxxxxxxxxxxxxx>
> Envoyé: Mardi 27 Mars 2018 20:35:08
> Objet: Re:  ceph mds memory usage 20GB : is it normal ?
>
> Hello Alexandre,
>
> On Thu, Mar 22, 2018 at 2:29 AM, Alexandre DERUMIER <aderumier@xxxxxxxxx> wrote:
>> Hi,
>>
>> I'm running cephfs since 2 months now,
>>
>> and my active msd memory usage is around 20G now (still growing).
>>
>> ceph 1521539 10.8 31.2 20929836 20534868 ? Ssl janv.26 8573:34 /usr/bin/ceph-mds -f --cluster ceph --id 2 --setuser ceph --setgroup ceph
>> USER PID %CPU %MEM VSZ RSS TTY STAT START TIME COMMAND
>>
>>
>> this is on luminous 12.2.2
>>
>> only tuning done is:
>>
>> mds_cache_memory_limit = 5368709120
>>
>>
>> (5GB). I known it's a soft limit, but 20G seem quite huge vs 5GB ....
>>
>>
>> Is it normal ?
>
> No, that's definitely not normal!
>
>
>> # ceph daemon mds.2 perf dump mds
>> {
>> "mds": {
>> "request": 1444009197,
>> "reply": 1443999870,
>> "reply_latency": {
>> "avgcount": 1443999870,
>> "sum": 1657849.656122933,
>> "avgtime": 0.001148095
>> },
>> "forward": 0,
>> "dir_fetch": 51740910,
>> "dir_commit": 9069568,
>> "dir_split": 64367,
>> "dir_merge": 58016,
>> "inode_max": 2147483647,
>> "inodes": 2042975,
>> "inodes_top": 152783,
>> "inodes_bottom": 138781,
>> "inodes_pin_tail": 1751411,
>> "inodes_pinned": 1824714,
>> "inodes_expired": 7258145573,
>> "inodes_with_caps": 1812018,
>> "caps": 2538233,
>> "subtrees": 2,
>> "traverse": 1591668547,
>> "traverse_hit": 1259482170,
>> "traverse_forward": 0,
>> "traverse_discover": 0,
>> "traverse_dir_fetch": 30827836,
>> "traverse_remote_ino": 7510,
>> "traverse_lock": 86236,
>> "load_cent": 144401980319,
>> "q": 49,
>> "exported": 0,
>> "exported_inodes": 0,
>> "imported": 0,
>> "imported_inodes": 0
>> }
>> }
>
> Can you also share `ceph daemon mds.2 cache status`, the full `ceph
> daemon mds.2 perf dump`, and `ceph status`?
>
> Note [1] will be in 12.2.5 and may help with your issue.
>
> [1] https://github.com/ceph/ceph/pull/20527
>
> --
> Patrick Donnelly
>
> _______________________________________________
> ceph-users mailing list
> ceph-users@xxxxxxxxxxxxxx
> http://lists.ceph.com/listinfo.cgi/ceph-users-ceph.com
_______________________________________________
ceph-users mailing list
ceph-users@xxxxxxxxxxxxxx
http://lists.ceph.com/listinfo.cgi/ceph-users-ceph.com




[Index of Archives]     [Information on CEPH]     [Linux Filesystem Development]     [Ceph Development]     [Ceph Large]     [Ceph Dev]     [Linux USB Development]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]     [xfs]


  Powered by Linux