Could you try path https://github.com/ceph/ceph/pull/22240/files. The leakage of MMDSBeacon messages can explain your issue. Regards Yan, Zheng On Mon, May 28, 2018 at 12:06 PM, Alexandre DERUMIER <aderumier@xxxxxxxxx> wrote: >>>could you send me full output of dump_mempools > > # ceph daemon mds.ceph4-2.odiso.net dump_mempools > { > "bloom_filter": { > "items": 41262668, > "bytes": 41262668 > }, > "bluestore_alloc": { > "items": 0, > "bytes": 0 > }, > "bluestore_cache_data": { > "items": 0, > "bytes": 0 > }, > "bluestore_cache_onode": { > "items": 0, > "bytes": 0 > }, > "bluestore_cache_other": { > "items": 0, > "bytes": 0 > }, > "bluestore_fsck": { > "items": 0, > "bytes": 0 > }, > "bluestore_txc": { > "items": 0, > "bytes": 0 > }, > "bluestore_writing_deferred": { > "items": 0, > "bytes": 0 > }, > "bluestore_writing": { > "items": 0, > "bytes": 0 > }, > "bluefs": { > "items": 0, > "bytes": 0 > }, > "buffer_anon": { > "items": 712726, > "bytes": 106964870 > }, > "buffer_meta": { > "items": 15, > "bytes": 1320 > }, > "osd": { > "items": 0, > "bytes": 0 > }, > "osd_mapbl": { > "items": 0, > "bytes": 0 > }, > "osd_pglog": { > "items": 0, > "bytes": 0 > }, > "osdmap": { > "items": 216, > "bytes": 12168 > }, > "osdmap_mapping": { > "items": 0, > "bytes": 0 > }, > "pgmap": { > "items": 0, > "bytes": 0 > }, > "mds_co": { > "items": 50741038, > "bytes": 5114319203 > }, > "unittest_1": { > "items": 0, > "bytes": 0 > }, > "unittest_2": { > "items": 0, > "bytes": 0 > }, > "total": { > "items": 92716663, > "bytes": 5262560229 > } > } > > > > > > ceph daemon mds.ceph4-2.odiso.net perf dump > { > "AsyncMessenger::Worker-0": { > "msgr_recv_messages": 1276789161, > "msgr_send_messages": 1317625246, > "msgr_recv_bytes": 10630409633633, > "msgr_send_bytes": 1093972769957, > "msgr_created_connections": 207, > "msgr_active_connections": 204, > "msgr_running_total_time": 63745.463077594, > "msgr_running_send_time": 22210.867549070, > "msgr_running_recv_time": 51944.624353942, > "msgr_running_fast_dispatch_time": 9185.274084187 > }, > "AsyncMessenger::Worker-1": { > "msgr_recv_messages": 641622644, > "msgr_send_messages": 616664293, > "msgr_recv_bytes": 7287546832466, > "msgr_send_bytes": 588278035895, > "msgr_created_connections": 494, > "msgr_active_connections": 494, > "msgr_running_total_time": 35390.081250881, > "msgr_running_send_time": 11559.689889195, > "msgr_running_recv_time": 29844.885712902, > "msgr_running_fast_dispatch_time": 6361.466445253 > }, > "AsyncMessenger::Worker-2": { > "msgr_recv_messages": 1972469623, > "msgr_send_messages": 1886060294, > "msgr_recv_bytes": 7924136565846, > "msgr_send_bytes": 5072502101797, > "msgr_created_connections": 181, > "msgr_active_connections": 176, > "msgr_running_total_time": 93257.811989806, > "msgr_running_send_time": 35556.662488302, > "msgr_running_recv_time": 81686.262228047, > "msgr_running_fast_dispatch_time": 6476.875317930 > }, > "finisher-PurgeQueue": { > "queue_len": 0, > "complete_latency": { > "avgcount": 3390753, > "sum": 44364.742135193, > "avgtime": 0.013084038 > } > }, > "mds": { > "request": 2780760988, > "reply": 2780760950, > "reply_latency": { > "avgcount": 2780760950, > "sum": 8467119.492491407, > "avgtime": 0.003044892 > }, > "forward": 0, > "dir_fetch": 173374097, > "dir_commit": 3235888, > "dir_split": 23, > "dir_merge": 45, > "inode_max": 2147483647, > "inodes": 1762555, > "inodes_top": 388540, > "inodes_bottom": 173389, > "inodes_pin_tail": 1200626, > "inodes_pinned": 1207497, > "inodes_expired": 32837415801, > "inodes_with_caps": 1206864, > "caps": 1565063, > "subtrees": 2, > "traverse": 2976675748, > "traverse_hit": 1725898480, > "traverse_forward": 0, > "traverse_discover": 0, > "traverse_dir_fetch": 157542892, > "traverse_remote_ino": 46197, > "traverse_lock": 294516, > "load_cent": 18446743922292121894, > "q": 169, > "exported": 0, > "exported_inodes": 0, > "imported": 0, > "imported_inodes": 0 > }, > "mds_cache": { > "num_strays": 6004, > "num_strays_delayed": 23, > "num_strays_enqueuing": 0, > "strays_created": 3123475, > "strays_enqueued": 3118819, > "strays_reintegrated": 1279, > "strays_migrated": 0, > "num_recovering_processing": 0, > "num_recovering_enqueued": 0, > "num_recovering_prioritized": 0, > "recovery_started": 17, > "recovery_completed": 17, > "ireq_enqueue_scrub": 0, > "ireq_exportdir": 0, > "ireq_flush": 0, > "ireq_fragmentdir": 68, > "ireq_fragstats": 0, > "ireq_inodestats": 0 > }, > "mds_log": { > "evadd": 557555624, > "evex": 557550922, > "evtrm": 557542937, > "ev": 117534, > "evexg": 0, > "evexd": 7985, > "segadd": 680325, > "segex": 680324, > "segtrm": 680314, > "seg": 140, > "segexg": 0, > "segexd": 10, > "expos": 7249128066146, > "wrpos": 7249314979798, > "rdpos": 6319502327537, > "jlat": { > "avgcount": 89044790, > "sum": 184818.469396630, > "avgtime": 0.002075567 > }, > "replayed": 104847 > }, > "mds_mem": { > "ino": 1762557, > "ino+": 32728559269, > "ino-": 32726796712, > "dir": 229243, > "dir+": 29431940, > "dir-": 29202697, > "dn": 1762898, > "dn+": 32844219705, > "dn-": 32842456807, > "cap": 1565063, > "cap+": 2489269431, > "cap-": 2487704368, > "rss": 21561424, > "heap": 313980, > "buf": 0 > }, > "mds_server": { > "dispatch_client_request": 2993092793, > "dispatch_server_request": 0, > "handle_client_request": 2780760988, > "handle_client_session": 23823011, > "handle_slave_request": 0, > "req_create": 5549861, > "req_getattr": 28160035, > "req_getfilelock": 0, > "req_link": 7409, > "req_lookup": 2162695254, > "req_lookuphash": 0, > "req_lookupino": 0, > "req_lookupname": 16114, > "req_lookupparent": 0, > "req_lookupsnap": 0, > "req_lssnap": 0, > "req_mkdir": 423120, > "req_mknod": 0, > "req_mksnap": 0, > "req_open": 549851331, > "req_readdir": 25836771, > "req_rename": 2865148, > "req_renamesnap": 0, > "req_rmdir": 143496, > "req_rmsnap": 0, > "req_rmxattr": 0, > "req_setattr": 6833015, > "req_setdirlayout": 0, > "req_setfilelock": 960105, > "req_setlayout": 0, > "req_setxattr": 2, > "req_symlink": 2561, > "req_unlink": 2966589 > }, > "mds_sessions": { > "session_count": 326, > "session_add": 472, > "session_remove": 146 > }, > "objecter": { > "op_active": 0, > "op_laggy": 0, > "op_send": 297200358, > "op_send_bytes": 943806252615, > "op_resend": 22, > "op_reply": 297200336, > "op": 297200336, > "op_r": 173655966, > "op_w": 123544370, > "op_rmw": 0, > "op_pg": 0, > "osdop_stat": 2843429, > "osdop_create": 5729675, > "osdop_read": 126350, > "osdop_write": 89171030, > "osdop_writefull": 365835, > "osdop_writesame": 0, > "osdop_append": 0, > "osdop_zero": 2, > "osdop_truncate": 15, > "osdop_delete": 4128067, > "osdop_mapext": 0, > "osdop_sparse_read": 0, > "osdop_clonerange": 0, > "osdop_getxattr": 46958217, > "osdop_setxattr": 11459350, > "osdop_cmpxattr": 0, > "osdop_rmxattr": 0, > "osdop_resetxattrs": 0, > "osdop_tmap_up": 0, > "osdop_tmap_put": 0, > "osdop_tmap_get": 0, > "osdop_call": 0, > "osdop_watch": 0, > "osdop_notify": 0, > "osdop_src_cmpxattr": 0, > "osdop_pgls": 0, > "osdop_pgls_filter": 0, > "osdop_other": 20547060, > "linger_active": 0, > "linger_send": 0, > "linger_resend": 0, > "linger_ping": 0, > "poolop_active": 0, > "poolop_send": 0, > "poolop_resend": 0, > "poolstat_active": 0, > "poolstat_send": 0, > "poolstat_resend": 0, > "statfs_active": 0, > "statfs_send": 0, > "statfs_resend": 0, > "command_active": 0, > "command_send": 0, > "command_resend": 0, > "map_epoch": 4048, > "map_full": 0, > "map_inc": 742, > "osd_sessions": 18, > "osd_session_open": 26, > "osd_session_close": 8, > "osd_laggy": 0, > "omap_wr": 6209755, > "omap_rd": 346748196, > "omap_del": 605991 > }, > "purge_queue": { > "pq_executing_ops": 0, > "pq_executing": 0, > "pq_executed": 3118819 > }, > "throttle-msgr_dispatch_throttler-mds": { > "val": 0, > "max": 104857600, > "get_started": 0, > "get": 3890881428, > "get_sum": 25554167806273, > "get_or_fail_fail": 0, > "get_or_fail_success": 3890881428, > "take": 0, > "take_sum": 0, > "put": 3890881428, > "put_sum": 25554167806273, > "wait": { > "avgcount": 0, > "sum": 0.000000000, > "avgtime": 0.000000000 > } > }, > "throttle-objecter_bytes": { > "val": 0, > "max": 104857600, > "get_started": 0, > "get": 0, > "get_sum": 0, > "get_or_fail_fail": 0, > "get_or_fail_success": 0, > "take": 297200336, > "take_sum": 944272996789, > "put": 272525107, > "put_sum": 944272996789, > "wait": { > "avgcount": 0, > "sum": 0.000000000, > "avgtime": 0.000000000 > } > }, > "throttle-objecter_ops": { > "val": 0, > "max": 1024, > "get_started": 0, > "get": 0, > "get_sum": 0, > "get_or_fail_fail": 0, > "get_or_fail_success": 0, > "take": 297200336, > "take_sum": 297200336, > "put": 297200336, > "put_sum": 297200336, > "wait": { > "avgcount": 0, > "sum": 0.000000000, > "avgtime": 0.000000000 > } > }, > "throttle-write_buf_throttle": { > "val": 0, > "max": 3758096384, > "get_started": 0, > "get": 3118819, > "get_sum": 290050463, > "get_or_fail_fail": 0, > "get_or_fail_success": 3118819, > "take": 0, > "take_sum": 0, > "put": 126240, > "put_sum": 290050463, > "wait": { > "avgcount": 0, > "sum": 0.000000000, > "avgtime": 0.000000000 > } > }, > "throttle-write_buf_throttle-0x55decea8e140": { > "val": 117619, > "max": 3758096384, > "get_started": 0, > "get": 557555624, > "get_sum": 929812651219, > "get_or_fail_fail": 0, > "get_or_fail_success": 557555624, > "take": 0, > "take_sum": 0, > "put": 89044790, > "put_sum": 929812533600, > "wait": { > "avgcount": 0, > "sum": 0.000000000, > "avgtime": 0.000000000 > } > } > } > > > root@ceph4-2:~# ceph daemon mds.ceph4-2.odiso.net perf dump mds > { > "mds": { > "request": 2780811086, > "reply": 2780811048, > "reply_latency": { > "avgcount": 2780811048, > "sum": 8467173.659961677, > "avgtime": 0.003044857 > }, > "forward": 0, > "dir_fetch": 173379852, > "dir_commit": 3235892, > "dir_split": 23, > "dir_merge": 45, > "inode_max": 2147483647, > "inodes": 1670056, > "inodes_top": 307065, > "inodes_bottom": 188813, > "inodes_pin_tail": 1174178, > "inodes_pinned": 1231392, > "inodes_expired": 32838002167, > "inodes_with_caps": 1231047, > "caps": 1589515, > "subtrees": 2, > "traverse": 2976731809, > "traverse_hit": 1725937513, > "traverse_forward": 0, > "traverse_discover": 0, > "traverse_dir_fetch": 157548349, > "traverse_remote_ino": 46197, > "traverse_lock": 294516, > "load_cent": 18446743922294611748, > "q": 47, > "exported": 0, > "exported_inodes": 0, > "imported": 0, > "imported_inodes": 0 > } > } > > ----- Mail original ----- > De: "Zheng Yan" <ukernel@xxxxxxxxx> > À: "aderumier" <aderumier@xxxxxxxxx> > Cc: "ceph-users" <ceph-users@xxxxxxxxxxxxxx> > Envoyé: Lundi 28 Mai 2018 03:32:10 > Objet: Re: ceph mds memory usage 20GB : is it normal ? > > could you send me full output of dump_mempools > > On Thu, May 24, 2018 at 7:22 PM, Alexandre DERUMIER <aderumier@xxxxxxxxx> wrote: >> Thanks! >> >> >> here the profile.pdf >> >> 10-15min profiling, I can't do it longer because my clients where lagging. >> >> but I think it should be enough to observe the rss memory increase. >> >> >> >> >> ----- Mail original ----- >> De: "Zheng Yan" <ukernel@xxxxxxxxx> >> À: "aderumier" <aderumier@xxxxxxxxx> >> Cc: "ceph-users" <ceph-users@xxxxxxxxxxxxxx> >> Envoyé: Jeudi 24 Mai 2018 11:34:20 >> Objet: Re: ceph mds memory usage 20GB : is it normal ? >> >> On Tue, May 22, 2018 at 3:11 PM, Alexandre DERUMIER <aderumier@xxxxxxxxx> wrote: >>> Hi,some new stats, mds memory is not 16G, >>> >>> I have almost same number of items and bytes in cache vs some weeks ago when mds was using 8G. (ceph 12.2.5) >>> >>> >>> root@ceph4-2:~# while sleep 1; do ceph daemon mds.ceph4-2.odiso.net perf dump | jq '.mds_mem.rss'; ceph daemon mds.ceph4-2.odiso.net dump_mempools | jq -c '.mds_co'; done >>> 16905052 >>> {"items":43350988,"bytes":5257428143} >>> 16905052 >>> {"items":43428329,"bytes":5283850173} >>> 16905052 >>> {"items":43209167,"bytes":5208578149} >>> 16905052 >>> {"items":43177631,"bytes":5198833577} >>> 16905052 >>> {"items":43312734,"bytes":5252649462} >>> 16905052 >>> {"items":43355753,"bytes":5277197972} >>> 16905052 >>> {"items":43700693,"bytes":5303376141} >>> 16905052 >>> {"items":43115809,"bytes":5156628138} >>> ^C >>> >>> >>> >>> >>> root@ceph4-2:~# ceph status >>> cluster: >>> id: e22b8e83-3036-4fe5-8fd5-5ce9d539beca >>> health: HEALTH_OK >>> >>> services: >>> mon: 3 daemons, quorum ceph4-1,ceph4-2,ceph4-3 >>> mgr: ceph4-1.odiso.net(active), standbys: ceph4-2.odiso.net, ceph4-3.odiso.net >>> mds: cephfs4-1/1/1 up {0=ceph4-2.odiso.net=up:active}, 2 up:standby >>> osd: 18 osds: 18 up, 18 in >>> rgw: 3 daemons active >>> >>> data: >>> pools: 11 pools, 1992 pgs >>> objects: 75677k objects, 6045 GB >>> usage: 20579 GB used, 6246 GB / 26825 GB avail >>> pgs: 1992 active+clean >>> >>> io: >>> client: 14441 kB/s rd, 2550 kB/s wr, 371 op/s rd, 95 op/s wr >>> >>> >>> root@ceph4-2:~# ceph daemon mds.ceph4-2.odiso.net cache status >>> { >>> "pool": { >>> "items": 44523608, >>> "bytes": 5326049009 >>> } >>> } >>> >>> >>> root@ceph4-2:~# ceph daemon mds.ceph4-2.odiso.net perf dump >>> { >>> "AsyncMessenger::Worker-0": { >>> "msgr_recv_messages": 798876013, >>> "msgr_send_messages": 825999506, >>> "msgr_recv_bytes": 7003223097381, >>> "msgr_send_bytes": 691501283744, >>> "msgr_created_connections": 148, >>> "msgr_active_connections": 146, >>> "msgr_running_total_time": 39914.832387470, >>> "msgr_running_send_time": 13744.704199430, >>> "msgr_running_recv_time": 32342.160588451, >>> "msgr_running_fast_dispatch_time": 5996.336446782 >>> }, >>> "AsyncMessenger::Worker-1": { >>> "msgr_recv_messages": 429668771, >>> "msgr_send_messages": 414760220, >>> "msgr_recv_bytes": 5003149410825, >>> "msgr_send_bytes": 396281427789, >>> "msgr_created_connections": 132, >>> "msgr_active_connections": 132, >>> "msgr_running_total_time": 23644.410515392, >>> "msgr_running_send_time": 7669.068710688, >>> "msgr_running_recv_time": 19751.610043696, >>> "msgr_running_fast_dispatch_time": 4331.023453385 >>> }, >>> "AsyncMessenger::Worker-2": { >>> "msgr_recv_messages": 1312910919, >>> "msgr_send_messages": 1260040403, >>> "msgr_recv_bytes": 5330386980976, >>> "msgr_send_bytes": 3341965016878, >>> "msgr_created_connections": 143, >>> "msgr_active_connections": 138, >>> "msgr_running_total_time": 61696.635450100, >>> "msgr_running_send_time": 23491.027014598, >>> "msgr_running_recv_time": 53858.409319734, >>> "msgr_running_fast_dispatch_time": 4312.451966809 >>> }, >>> "finisher-PurgeQueue": { >>> "queue_len": 0, >>> "complete_latency": { >>> "avgcount": 1889416, >>> "sum": 29224.227703697, >>> "avgtime": 0.015467333 >>> } >>> }, >>> "mds": { >>> "request": 1822420924, >>> "reply": 1822420886, >>> "reply_latency": { >>> "avgcount": 1822420886, >>> "sum": 5258467.616943274, >>> "avgtime": 0.002885429 >>> }, >>> "forward": 0, >>> "dir_fetch": 116035485, >>> "dir_commit": 1865012, >>> "dir_split": 17, >>> "dir_merge": 24, >>> "inode_max": 2147483647, >>> "inodes": 1600438, >>> "inodes_top": 210492, >>> "inodes_bottom": 100560, >>> "inodes_pin_tail": 1289386, >>> "inodes_pinned": 1299735, >>> "inodes_expired": 22223476046, >>> "inodes_with_caps": 1299137, >>> "caps": 2211546, >>> "subtrees": 2, >>> "traverse": 1953482456, >>> "traverse_hit": 1127647211, >>> "traverse_forward": 0, >>> "traverse_discover": 0, >>> "traverse_dir_fetch": 105833969, >>> "traverse_remote_ino": 31686, >>> "traverse_lock": 4344, >>> "load_cent": 182244014474, >>> "q": 104, >>> "exported": 0, >>> "exported_inodes": 0, >>> "imported": 0, >>> "imported_inodes": 0 >>> }, >>> "mds_cache": { >>> "num_strays": 14980, >>> "num_strays_delayed": 7, >>> "num_strays_enqueuing": 0, >>> "strays_created": 1672815, >>> "strays_enqueued": 1659514, >>> "strays_reintegrated": 666, >>> "strays_migrated": 0, >>> "num_recovering_processing": 0, >>> "num_recovering_enqueued": 0, >>> "num_recovering_prioritized": 0, >>> "recovery_started": 2, >>> "recovery_completed": 2, >>> "ireq_enqueue_scrub": 0, >>> "ireq_exportdir": 0, >>> "ireq_flush": 0, >>> "ireq_fragmentdir": 41, >>> "ireq_fragstats": 0, >>> "ireq_inodestats": 0 >>> }, >>> "mds_log": { >>> "evadd": 357717092, >>> "evex": 357717106, >>> "evtrm": 357716741, >>> "ev": 105198, >>> "evexg": 0, >>> "evexd": 365, >>> "segadd": 437124, >>> "segex": 437124, >>> "segtrm": 437123, >>> "seg": 130, >>> "segexg": 0, >>> "segexd": 1, >>> "expos": 6916004026339, >>> "wrpos": 6916179441942, >>> "rdpos": 6319502327537, >>> "jlat": { >>> "avgcount": 59071693, >>> "sum": 120823.311894779, >>> "avgtime": 0.002045367 >>> }, >>> "replayed": 104847 >>> }, >>> "mds_mem": { >>> "ino": 1599422, >>> "ino+": 22152405695, >>> "ino-": 22150806273, >>> "dir": 256943, >>> "dir+": 18460298, >>> "dir-": 18203355, >>> "dn": 1600689, >>> "dn+": 22227888283, >>> "dn-": 22226287594, >>> "cap": 2211546, >>> "cap+": 1674287476, >>> "cap-": 1672075930, >>> "rss": 16905052, >>> "heap": 313916, >>> "buf": 0 >>> }, >>> "mds_server": { >>> "dispatch_client_request": 1964131912, >>> "dispatch_server_request": 0, >>> "handle_client_request": 1822420924, >>> "handle_client_session": 15557609, >>> "handle_slave_request": 0, >>> "req_create": 4116952, >>> "req_getattr": 18696543, >>> "req_getfilelock": 0, >>> "req_link": 6625, >>> "req_lookup": 1425824734, >>> "req_lookuphash": 0, >>> "req_lookupino": 0, >>> "req_lookupname": 8703, >>> "req_lookupparent": 0, >>> "req_lookupsnap": 0, >>> "req_lssnap": 0, >>> "req_mkdir": 371878, >>> "req_mknod": 0, >>> "req_mksnap": 0, >>> "req_open": 351119806, >>> "req_readdir": 17103599, >>> "req_rename": 2437529, >>> "req_renamesnap": 0, >>> "req_rmdir": 78789, >>> "req_rmsnap": 0, >>> "req_rmxattr": 0, >>> "req_setattr": 4547650, >>> "req_setdirlayout": 0, >>> "req_setfilelock": 633219, >>> "req_setlayout": 0, >>> "req_setxattr": 2, >>> "req_symlink": 2520, >>> "req_unlink": 1589288 >>> }, >>> "mds_sessions": { >>> "session_count": 321, >>> "session_add": 383, >>> "session_remove": 62 >>> }, >>> "objecter": { >>> "op_active": 0, >>> "op_laggy": 0, >>> "op_send": 197932443, >>> "op_send_bytes": 605992324653, >>> "op_resend": 22, >>> "op_reply": 197932421, >>> "op": 197932421, >>> "op_r": 116256030, >>> "op_w": 81676391, >>> "op_rmw": 0, >>> "op_pg": 0, >>> "osdop_stat": 1518341, >>> "osdop_create": 4314348, >>> "osdop_read": 79810, >>> "osdop_write": 59151421, >>> "osdop_writefull": 237358, >>> "osdop_writesame": 0, >>> "osdop_append": 0, >>> "osdop_zero": 2, >>> "osdop_truncate": 9, >>> "osdop_delete": 2320714, >>> "osdop_mapext": 0, >>> "osdop_sparse_read": 0, >>> "osdop_clonerange": 0, >>> "osdop_getxattr": 29426577, >>> "osdop_setxattr": 8628696, >>> "osdop_cmpxattr": 0, >>> "osdop_rmxattr": 0, >>> "osdop_resetxattrs": 0, >>> "osdop_tmap_up": 0, >>> "osdop_tmap_put": 0, >>> "osdop_tmap_get": 0, >>> "osdop_call": 0, >>> "osdop_watch": 0, >>> "osdop_notify": 0, >>> "osdop_src_cmpxattr": 0, >>> "osdop_pgls": 0, >>> "osdop_pgls_filter": 0, >>> "osdop_other": 13551599, >>> "linger_active": 0, >>> "linger_send": 0, >>> "linger_resend": 0, >>> "linger_ping": 0, >>> "poolop_active": 0, >>> "poolop_send": 0, >>> "poolop_resend": 0, >>> "poolstat_active": 0, >>> "poolstat_send": 0, >>> "poolstat_resend": 0, >>> "statfs_active": 0, >>> "statfs_send": 0, >>> "statfs_resend": 0, >>> "command_active": 0, >>> "command_send": 0, >>> "command_resend": 0, >>> "map_epoch": 3907, >>> "map_full": 0, >>> "map_inc": 601, >>> "osd_sessions": 18, >>> "osd_session_open": 20, >>> "osd_session_close": 2, >>> "osd_laggy": 0, >>> "omap_wr": 3595801, >>> "omap_rd": 232070972, >>> "omap_del": 272598 >>> }, >>> "purge_queue": { >>> "pq_executing_ops": 0, >>> "pq_executing": 0, >>> "pq_executed": 1659514 >>> }, >>> "throttle-msgr_dispatch_throttler-mds": { >>> "val": 0, >>> "max": 104857600, >>> "get_started": 0, >>> "get": 2541455703, >>> "get_sum": 17148691767160, >>> "get_or_fail_fail": 0, >>> "get_or_fail_success": 2541455703, >>> "take": 0, >>> "take_sum": 0, >>> "put": 2541455703, >>> "put_sum": 17148691767160, >>> "wait": { >>> "avgcount": 0, >>> "sum": 0.000000000, >>> "avgtime": 0.000000000 >>> } >>> }, >>> "throttle-objecter_bytes": { >>> "val": 0, >>> "max": 104857600, >>> "get_started": 0, >>> "get": 0, >>> "get_sum": 0, >>> "get_or_fail_fail": 0, >>> "get_or_fail_success": 0, >>> "take": 197932421, >>> "take_sum": 606323353310, >>> "put": 182060027, >>> "put_sum": 606323353310, >>> "wait": { >>> "avgcount": 0, >>> "sum": 0.000000000, >>> "avgtime": 0.000000000 >>> } >>> }, >>> "throttle-objecter_ops": { >>> "val": 0, >>> "max": 1024, >>> "get_started": 0, >>> "get": 0, >>> "get_sum": 0, >>> "get_or_fail_fail": 0, >>> "get_or_fail_success": 0, >>> "take": 197932421, >>> "take_sum": 197932421, >>> "put": 197932421, >>> "put_sum": 197932421, >>> "wait": { >>> "avgcount": 0, >>> "sum": 0.000000000, >>> "avgtime": 0.000000000 >>> } >>> }, >>> "throttle-write_buf_throttle": { >>> "val": 0, >>> "max": 3758096384, >>> "get_started": 0, >>> "get": 1659514, >>> "get_sum": 154334946, >>> "get_or_fail_fail": 0, >>> "get_or_fail_success": 1659514, >>> "take": 0, >>> "take_sum": 0, >>> "put": 79728, >>> "put_sum": 154334946, >>> "wait": { >>> "avgcount": 0, >>> "sum": 0.000000000, >>> "avgtime": 0.000000000 >>> } >>> }, >>> "throttle-write_buf_throttle-0x55decea8e140": { >>> "val": 255839, >>> "max": 3758096384, >>> "get_started": 0, >>> "get": 357717092, >>> "get_sum": 596677113363, >>> "get_or_fail_fail": 0, >>> "get_or_fail_success": 357717092, >>> "take": 0, >>> "take_sum": 0, >>> "put": 59071693, >>> "put_sum": 596676857524, >>> "wait": { >>> "avgcount": 0, >>> "sum": 0.000000000, >>> "avgtime": 0.000000000 >>> } >>> } >>> } >>> >>> >> >> Maybe there is memory leak. What is output of 'ceph tell mds.xx heap >> stats'. If the RSS size keeps increasing, please run profile heap for >> a period of time. >> >> >> ceph tell mds.xx heap start_profiler >> "wait some time" >> ceph tell mds.xx heap dump >> ceph tell mds.xx heap stop_profiler >> pprof --pdf <location pf ceph-mds binary> >> /var/log/ceph/mds.xxx.profile.* > profile.pdf >> >> send profile.pdf to us >> >> Regards >> Yan, Zheng >> >>> >>> ----- Mail original ----- >>> De: "Webert de Souza Lima" <webert.boss@xxxxxxxxx> >>> À: "ceph-users" <ceph-users@xxxxxxxxxxxxxx> >>> Envoyé: Lundi 14 Mai 2018 15:14:35 >>> Objet: Re: ceph mds memory usage 20GB : is it normal ? >>> >>> On Sat, May 12, 2018 at 3:11 AM Alexandre DERUMIER < [ mailto:aderumier@xxxxxxxxx | aderumier@xxxxxxxxx ] > wrote: >>> >>> >>> The documentation (luminous) say: >>> >>> >>> >>> >>> >>>>mds cache size >>>> >>>>Description: The number of inodes to cache. A value of 0 indicates an unlimited number. It is recommended to use mds_cache_memory_limit to limit the amount of memory the MDS cache uses. >>>>Type: 32-bit Integer >>>>Default: 0 >>>> >>> >>> >>> >>> and, my mds_cache_memory_limit is currently at 5GB. >>> >>> >>> yeah I have only suggested that because the high memory usage seemed to trouble you and it might be a bug, so it's more of a workaround. >>> >>> Regards, >>> Webert Lima >>> DevOps Engineer at MAV Tecnologia >>> Belo Horizonte - Brasil >>> IRC NICK - WebertRLZ >>> >>> _______________________________________________ >>> ceph-users mailing list >>> ceph-users@xxxxxxxxxxxxxx >>> http://lists.ceph.com/listinfo.cgi/ceph-users-ceph.com >>> >>> _______________________________________________ >>> ceph-users mailing list >>> ceph-users@xxxxxxxxxxxxxx >>> http://lists.ceph.com/listinfo.cgi/ceph-users-ceph.com >> > _______________________________________________ ceph-users mailing list ceph-users@xxxxxxxxxxxxxx http://lists.ceph.com/listinfo.cgi/ceph-users-ceph.com