Re: 1 clients failing to respond to cache pressure (quincy:17.2.6)

Özkan Göksu <ozkangksu@xxxxxxxxx> · Wed, 17 Jan 2024 08:35:37 +0300

This my active MDS perf dump output:

root@ud-01:~# ceph tell  mds.ud-data.ud-02.xcoojt perf dump
{
    "AsyncMessenger::Worker-0": {
        "msgr_recv_messages": 17179307,
        "msgr_send_messages": 15867134,
        "msgr_recv_bytes": 445239812294,
        "msgr_send_bytes": 42003529245,
        "msgr_created_connections": 96,
        "msgr_active_connections": 54,
        "msgr_running_total_time": 2640.694486182,
        "msgr_running_send_time": 716.588978014,
        "msgr_running_recv_time": 45572.840723683,
        "msgr_running_fast_dispatch_time": 898.208871568,
        "msgr_send_messages_queue_lat": {
            "avgcount": 15867133,
            "sum": 4104.943819792,
            "avgtime": 0.000258707
        },
        "msgr_handle_ack_lat": {
            "avgcount": 7464879,
            "sum": 4.939409831,
            "avgtime": 0.000000661
        }
    },
    "AsyncMessenger::Worker-1": {
        "msgr_recv_messages": 18116812,
        "msgr_send_messages": 17095685,
        "msgr_recv_bytes": 506282885037,
        "msgr_send_bytes": 55228114586,
        "msgr_created_connections": 197,
        "msgr_active_connections": 56,
        "msgr_running_total_time": 3012.176186383,
        "msgr_running_send_time": 814.143975507,
        "msgr_running_recv_time": 57814.469683537,
        "msgr_running_fast_dispatch_time": 993.179349828,
        "msgr_send_messages_queue_lat": {
            "avgcount": 17095673,
            "sum": 5137.869051226,
            "avgtime": 0.000300536
        },
        "msgr_handle_ack_lat": {
            "avgcount": 8803334,
            "sum": 6.183353371,
            "avgtime": 0.000000702
        }
    },
    "AsyncMessenger::Worker-2": {
        "msgr_recv_messages": 23067443,
        "msgr_send_messages": 20150693,
        "msgr_recv_bytes": 524358461533,
        "msgr_send_bytes": 124188927975,
        "msgr_created_connections": 136,
        "msgr_active_connections": 57,
        "msgr_running_total_time": 3732.249144012,
        "msgr_running_send_time": 1160.515966671,
        "msgr_running_recv_time": 4777.558043971,
        "msgr_running_fast_dispatch_time": 1037.227270644,
        "msgr_send_messages_queue_lat": {
            "avgcount": 20150689,
            "sum": 5507.000605725,
            "avgtime": 0.000273290
        },
        "msgr_handle_ack_lat": {
            "avgcount": 12133428,
            "sum": 10.398089696,
            "avgtime": 0.000000856
        }
    },
    "cct": {
        "total_workers": 1,
        "unhealthy_workers": 0
    },
    "finisher-MDSRank": {
        "queue_len": 0,
        "complete_latency": {
            "avgcount": 7983999,
            "sum": 42368.199433194,
            "avgtime": 0.005306638
        }
    },
    "finisher-PurgeQueue": {
        "queue_len": 0,
        "complete_latency": {
            "avgcount": 1681586,
            "sum": 1814.673700573,
            "avgtime": 0.001079144
        }
    },
    "mds": {
        "request": 22046242,
        "reply": 22046200,
        "reply_latency": {
            "avgcount": 22046200,
            "sum": 485668.718167318,
            "avgtime": 0.022029588
        },
        "slow_reply": 0,
        "forward": 0,
        "dir_fetch": 8048670,
        "dir_commit": 162554,
        "dir_split": 1,
        "dir_merge": 5,
        "inodes": 3007665,
        "inodes_top": 1851907,
        "inodes_bottom": 1075947,
        "inodes_pin_tail": 79811,
        "inodes_pinned": 362083,
        "inodes_expired": 2390089349,
        "inodes_with_caps": 359488,
        "caps": 367045,
        "subtrees": 2,
        "traverse": 30835852,
        "traverse_hit": 28292209,
        "traverse_forward": 0,
        "traverse_discover": 0,
        "traverse_dir_fetch": 997199,
        "traverse_remote_ino": 4936,
        "traverse_lock": 14640,
        "load_cent": 15904796,
        "q": 0,
        "exported": 0,
        "exported_inodes": 0,
        "imported": 0,
        "imported_inodes": 0,
        "openino_dir_fetch": 392728,
        "openino_backtrace_fetch": 118146,
        "openino_peer_discover": 0,
        "root_rfiles": 96255545,
        "root_rbytes": 28616263717441,
        "root_rsnaps": 0,
        "scrub_backtrace_fetch": 0,
        "scrub_set_tag": 0,
        "scrub_backtrace_repaired": 0,
        "scrub_inotable_repaired": 0,
        "scrub_dir_inodes": 0,
        "scrub_dir_base_inodes": 0,
        "scrub_dirfrag_rstats": 0,
        "scrub_file_inodes": 0,
        "handle_inode_file_caps": 0,
        "ceph_cap_op_revoke": 1377821,
        "ceph_cap_op_grant": 10915803,
        "ceph_cap_op_trunc": 14858,
        "ceph_cap_op_flushsnap_ack": 0,
        "ceph_cap_op_flush_ack": 0,
        "handle_client_caps": 13064247,
        "handle_client_caps_dirty": 1447210,
        "handle_client_cap_release": 2377564,
        "process_request_cap_release": 9609997
    },
    "mds_cache": {
        "num_strays": 7,
        "num_strays_delayed": 0,
        "num_strays_enqueuing": 0,
        "strays_created": 1132660,
        "strays_enqueued": 1129983,
        "strays_reintegrated": 10807,
        "strays_migrated": 0,
        "num_recovering_processing": 0,
        "num_recovering_enqueued": 0,
        "num_recovering_prioritized": 0,
        "recovery_started": 1,
        "recovery_completed": 1,
        "ireq_enqueue_scrub": 0,
        "ireq_exportdir": 0,
        "ireq_flush": 0,
        "ireq_fragmentdir": 6,
        "ireq_fragstats": 0,
        "ireq_inodestats": 0
    },
    "mds_log": {
        "evadd": 9429876,
        "evex": 9437748,
        "evtrm": 9437748,
        "ev": 88984,
        "evexg": 0,
        "evexd": 25,
        "segadd": 12165,
        "segex": 12165,
        "segtrm": 12165,
        "seg": 129,
        "segexg": 0,
        "segexd": 1,
        "expos": 12708408729837,
        "wrpos": 12708585242899,
        "rdpos": 12690823784258,
        "jlat": {
            "avgcount": 697759,
            "sum": 18206.880493144,
            "avgtime": 0.026093365
        },
        "replayed": 96856
    },
    "mds_mem": {
        "ino": 3006191,
        "ino+": 2391671062,
        "ino-": 2388664871,
        "dir": 56731,
        "dir+": 7146231,
        "dir-": 7089500,
        "dn": 3006389,
        "dn+": 2395253847,
        "dn-": 2392247458,
        "cap": 366021,
        "cap+": 394343412,
        "cap-": 393977391,
        "rss": 9974008,
        "heap": 223516
    },
    "mds_server": {
        "dispatch_client_request": 33305573,
        "dispatch_server_request": 0,
        "handle_client_request": 22046242,
        "handle_client_session": 546679,
        "handle_peer_request": 0,
        "req_create_latency": {
            "avgcount": 1265353,
            "sum": 8092.372232781,
            "avgtime": 0.006395347
        },
        "req_getattr_latency": {
            "avgcount": 742761,
            "sum": 7466.177333377,
            "avgtime": 0.010051924
        },
        "req_getfilelock_latency": {
            "avgcount": 0,
            "sum": 0.000000000,
            "avgtime": 0.000000000
        },
        "req_link_latency": {
            "avgcount": 7159,
            "sum": 14.511870538,
            "avgtime": 0.002027080
        },
        "req_lookup_latency": {
            "avgcount": 6453655,
            "sum": 112979.615492227,
            "avgtime": 0.017506299
        },
        "req_lookuphash_latency": {
            "avgcount": 0,
            "sum": 0.000000000,
            "avgtime": 0.000000000
        },
        "req_lookupino_latency": {
            "avgcount": 2,
            "sum": 0.000204966,
            "avgtime": 0.000102483
        },
        "req_lookupname_latency": {
            "avgcount": 0,
            "sum": 0.000000000,
            "avgtime": 0.000000000
        },
        "req_lookupparent_latency": {
            "avgcount": 0,
            "sum": 0.000000000,
            "avgtime": 0.000000000
        },
        "req_lookupsnap_latency": {
            "avgcount": 0,
            "sum": 0.000000000,
            "avgtime": 0.000000000
        },
        "req_lssnap_latency": {
            "avgcount": 0,
            "sum": 0.000000000,
            "avgtime": 0.000000000
        },
        "req_mkdir_latency": {
            "avgcount": 105563,
            "sum": 576.465681718,
            "avgtime": 0.005460868
        },
        "req_mknod_latency": {
            "avgcount": 12,
            "sum": 0.019379782,
            "avgtime": 0.001614981
        },
        "req_mksnap_latency": {
            "avgcount": 0,
            "sum": 0.000000000,
            "avgtime": 0.000000000
        },
        "req_open_latency": {
            "avgcount": 1515084,
            "sum": 12869.581748984,
            "avgtime": 0.008494302
        },
        "req_readdir_latency": {
            "avgcount": 10525466,
            "sum": 336757.025480678,
            "avgtime": 0.031994500
        },
        "req_rename_latency": {
            "avgcount": 364696,
            "sum": 1200.921138094,
            "avgtime": 0.003292937
        },
        "req_renamesnap_latency": {
            "avgcount": 0,
            "sum": 0.000000000,
            "avgtime": 0.000000000
        },
        "req_rmdir_latency": {
            "avgcount": 98241,
            "sum": 2455.598234225,
            "avgtime": 0.024995655
        },
        "req_rmsnap_latency": {
            "avgcount": 0,
            "sum": 0.000000000,
            "avgtime": 0.000000000
        },
        "req_rmxattr_latency": {
            "avgcount": 0,
            "sum": 0.000000000,
            "avgtime": 0.000000000
        },
        "req_setattr_latency": {
            "avgcount": 18921,
            "sum": 761.713863946,
            "avgtime": 0.040257590
        },
        "req_setdirlayout_latency": {
            "avgcount": 0,
            "sum": 0.000000000,
            "avgtime": 0.000000000
        },
        "req_setfilelock_latency": {
            "avgcount": 26396,
            "sum": 72.242011848,
            "avgtime": 0.002736854
        },
        "req_setlayout_latency": {
            "avgcount": 0,
            "sum": 0.000000000,
            "avgtime": 0.000000000
        },
        "req_setxattr_latency": {
            "avgcount": 17,
            "sum": 0.006095368,
            "avgtime": 0.000358551
        },
        "req_symlink_latency": {
            "avgcount": 5534,
            "sum": 19.856199425,
            "avgtime": 0.003588037
        },
        "req_unlink_latency": {
            "avgcount": 917340,
            "sum": 2402.611199361,
            "avgtime": 0.002619106
        },
        "cap_revoke_eviction": 0,
        "cap_acquisition_throttle": 0,
        "req_getvxattr_latency": {
            "avgcount": 0,
            "sum": 0.000000000,
            "avgtime": 0.000000000
        }
    },
    "mds_sessions": {
        "session_count": 84,
        "session_add": 93,
        "session_remove": 9,
        "sessions_open": 84,
        "sessions_stale": 0,
        "total_load": 2949,
        "average_load": 35,
        "avg_session_uptime": 9327000
    },
    "mempool": {
        "bloom_filter_bytes": 83341685,
        "bloom_filter_items": 83341685,
        "bluestore_alloc_bytes": 0,
        "bluestore_alloc_items": 0,
        "bluestore_cache_data_bytes": 0,
        "bluestore_cache_data_items": 0,
        "bluestore_cache_onode_bytes": 0,
        "bluestore_cache_onode_items": 0,
        "bluestore_cache_meta_bytes": 0,
        "bluestore_cache_meta_items": 0,
        "bluestore_cache_other_bytes": 0,
        "bluestore_cache_other_items": 0,
        "bluestore_Buffer_bytes": 0,
        "bluestore_Buffer_items": 0,
        "bluestore_Extent_bytes": 0,
        "bluestore_Extent_items": 0,
        "bluestore_Blob_bytes": 0,
        "bluestore_Blob_items": 0,
        "bluestore_SharedBlob_bytes": 0,
        "bluestore_SharedBlob_items": 0,
        "bluestore_inline_bl_bytes": 0,
        "bluestore_inline_bl_items": 0,
        "bluestore_fsck_bytes": 0,
        "bluestore_fsck_items": 0,
        "bluestore_txc_bytes": 0,
        "bluestore_txc_items": 0,
        "bluestore_writing_deferred_bytes": 0,
        "bluestore_writing_deferred_items": 0,
        "bluestore_writing_bytes": 0,
        "bluestore_writing_items": 0,
        "bluefs_bytes": 0,
        "bluefs_items": 0,
        "bluefs_file_reader_bytes": 0,
        "bluefs_file_reader_items": 0,
        "bluefs_file_writer_bytes": 0,
        "bluefs_file_writer_items": 0,
        "buffer_anon_bytes": 2114708,
        "buffer_anon_items": 825,
        "buffer_meta_bytes": 88,
        "buffer_meta_items": 1,
        "osd_bytes": 0,
        "osd_items": 0,
        "osd_mapbl_bytes": 0,
        "osd_mapbl_items": 0,
        "osd_pglog_bytes": 0,
        "osd_pglog_items": 0,
        "osdmap_bytes": 25728,
        "osdmap_items": 946,
        "osdmap_mapping_bytes": 0,
        "osdmap_mapping_items": 0,
        "pgmap_bytes": 0,
        "pgmap_items": 0,
        "mds_co_bytes": 8173443932,
        "mds_co_items": 109004579,
        "unittest_1_bytes": 0,
        "unittest_1_items": 0,
        "unittest_2_bytes": 0,
        "unittest_2_items": 0
    },
    "objecter": {
        "op_active": 0,
        "op_laggy": 0,
        "op_send": 13563810,
        "op_send_bytes": 21613887606,
        "op_resend": 1,
        "op_reply": 13563809,
        "oplen_avg": {
            "avgcount": 13563809,
            "sum": 31377549
        },
        "op": 13563809,
        "op_r": 10213362,
        "op_w": 3350447,
        "op_rmw": 0,
        "op_pg": 0,
        "osdop_stat": 75945,
        "osdop_create": 1139381,
        "osdop_read": 15848,
        "osdop_write": 713549,
        "osdop_writefull": 20267,
        "osdop_writesame": 0,
        "osdop_append": 0,
        "osdop_zero": 2,
        "osdop_truncate": 0,
        "osdop_delete": 1226688,
        "osdop_mapext": 0,
        "osdop_sparse_read": 0,
        "osdop_clonerange": 0,
        "osdop_getxattr": 7321546,
        "osdop_setxattr": 2283499,
        "osdop_cmpxattr": 0,
        "osdop_rmxattr": 0,
        "osdop_resetxattrs": 0,
        "osdop_call": 0,
        "osdop_watch": 0,
        "osdop_notify": 0,
        "osdop_src_cmpxattr": 0,
        "osdop_pgls": 0,
        "osdop_pgls_filter": 0,
        "osdop_other": 49342,
        "linger_active": 0,
        "linger_send": 0,
        "linger_resend": 0,
        "linger_ping": 0,
        "poolop_active": 0,
        "poolop_send": 0,
        "poolop_resend": 0,
        "poolstat_active": 0,
        "poolstat_send": 0,
        "poolstat_resend": 0,
        "statfs_active": 0,
        "statfs_send": 0,
        "statfs_resend": 0,
        "command_active": 0,
        "command_send": 0,
        "command_resend": 0,
        "map_epoch": 13646,
        "map_full": 0,
        "map_inc": 97,
        "osd_sessions": 80,
        "osd_session_open": 176,
        "osd_session_close": 96,
        "osd_laggy": 0,
        "omap_wr": 354624,
        "omap_rd": 18128035,
        "omap_del": 48823
    },
    "oft": {
        "omap_total_objs": 3,
        "omap_total_kv_pairs": 31549,
        "omap_total_updates": 9972364,
        "omap_total_removes": 7080093
    },
    "purge_queue": {
        "pq_executing_ops": 0,
        "pq_executing_ops_high_water": 1126,
        "pq_executing": 0,
        "pq_executing_high_water": 64,
        "pq_executed": 1129983,
        "pq_item_in_journal": 0
    },
    "throttle-msgr_dispatch_throttler-mds": {
        "val": 0,
        "max": 104857600,
        "get_started": 0,
        "get": 58363558,
        "get_sum": 1471342573618,
        "get_or_fail_fail": 0,
        "get_or_fail_success": 58363558,
        "take": 0,
        "take_sum": 0,
        "put": 58363558,
        "put_sum": 1471342573618,
        "wait": {
            "avgcount": 0,
            "sum": 0.000000000,
            "avgtime": 0.000000000
        }
    },
    "throttle-objecter_bytes": {
        "val": 0,
        "max": 104857600,
        "get_started": 0,
        "get": 0,
        "get_sum": 0,
        "get_or_fail_fail": 0,
        "get_or_fail_success": 0,
        "take": 13563809,
        "take_sum": 21700796870,
        "put": 13563809,
        "put_sum": 21700796870,
        "wait": {
            "avgcount": 0,
            "sum": 0.000000000,
            "avgtime": 0.000000000
        }
    },
    "throttle-objecter_ops": {
        "val": 0,
        "max": 1024,
        "get_started": 0,
        "get": 0,
        "get_sum": 0,
        "get_or_fail_fail": 0,
        "get_or_fail_success": 0,
        "take": 13563809,
        "take_sum": 13563809,
        "put": 13563809,
        "put_sum": 13563809,
        "wait": {
            "avgcount": 0,
            "sum": 0.000000000,
            "avgtime": 0.000000000
        }
    },
    "throttle-write_buf_throttle": {
        "val": 0,
        "max": 3758096384,
        "get_started": 0,
        "get": 1129983,
        "get_sum": 114128283,
        "get_or_fail_fail": 0,
        "get_or_fail_success": 1129983,
        "take": 0,
        "take_sum": 0,
        "put": 15790,
        "put_sum": 114128283,
        "wait": {
            "avgcount": 0,
            "sum": 0.000000000,
            "avgtime": 0.000000000
        }
    },
    "throttle-write_buf_throttle-0x5569b6836500": {
        "val": 0,
        "max": 3758096384,
        "get_started": 0,
        "get": 9429876,
        "get_sum": 17761457237,
        "get_or_fail_fail": 0,
        "get_or_fail_success": 9429876,
        "take": 0,
        "take_sum": 0,
        "put": 697759,
        "put_sum": 17761457237,
        "wait": {
            "avgcount": 0,
            "sum": 0.000000000,
            "avgtime": 0.000000000
        }
    }
}

Özkan Göksu <ozkangksu@xxxxxxxxx>, 17 Oca 2024 Çar, 08:29 tarihinde şunu
yazdı:

> All of my clients are servers located at 2 hop away with 10Gbit network
> and 2x Xeon CPU/16++ cores and minimum 64GB ram with SSD OS drive + 8GB
> spare.
> I use ceph kernel mount only and this is the command:
> - mount.ceph admin@$fsid.ud-data=/volumes/subvolumegroup ${MOUNT_DIR} -o
> name=admin,secret=XXX==,mon_addr=XXX
>
> I think all of my clients have enough resources to answer MDS requests
> very fast. The only possibility that any of my clients fails to respond to
> cache pressure is the default settings at cephfs client or MDS server.
>
> I have some problem with understanding how cephfs client works and why it
> needs communication with MDS server for managing local cache.
> And even at the beggining I didn't understand why MDS server needs direct
> control over clients and tell them what to do. My mind does not understand
> the concept and its logic.
> To me, clients must be independent and they must manage their data flow
> without any server side control. The client must send read and write
> request to the remote server and return answer to the kernel.
> Client can have read cache management future but it does not need
> communicate with remote server. When a client detects multiple read for the
> same object it should cache it with a set of protocols and release it when
> it needed.
> I don't understand why MDS needs to tell clients to release the allocation
> and why client needs to report the release status back...
>
> The logical answer for me is I think I'm looking from the wrong angle and
> this is not the cache that I know from block filesystems.
>
> With my use case, clients reads 50-100GB of data (10.000++ objects) only
> one or two times with each runtime in few hours.
>
>
> ------------------------------------------------------------------------------------
> While I was researching, I saw that some users recommends decreasing
> "mds_max_caps_per_client" from 1M to 64K
> # ceph config set mds mds_max_caps_per_client 65536
>
> But if you check the reported client ls at previous mail you will see
> "num_caps": 52092, for a failing client for cache pressure.
> So its even under 64K and I'm not sure changing this value can help or not.
>
> I want to repeat my main goal.
> I'm not trying to solve cache pressure warning.
> The ceph random read and write performance is not good and a lot of reads
> from 80+ clients creates latency.
> I'm trying to increase the speed by creating multiple MDS even maybe
> binding subvolumes to specific MDS servers and decrease the latency.
>
> Also when I check MDS CPU usage I see %120++ usage time to time. But when
> I check the server CPU load at MDS location, I see MDS only uses 2-4 cores
> and other CPU cores are almost at idle.
> I think MDS has a CPU core limitation and I need to increase the value to
> decrease the latency. How can I do that?
>
>
>
>
> Özkan Göksu <ozkangksu@xxxxxxxxx>, 17 Oca 2024 Çar, 07:44 tarihinde şunu
> yazdı:
>
>> Let me share some outputs about my cluster.
>>
>> root@ud-01:~# ceph fs status
>> ud-data - 84 clients
>> =======
>> RANK  STATE           MDS              ACTIVITY     DNS    INOS   DIRS
>> CAPS
>>  0    active  ud-data.ud-02.xcoojt  Reqs:   31 /s  3022k  3021k  52.6k
>> 385k
>>         POOL           TYPE     USED  AVAIL
>> cephfs.ud-data.meta  metadata   136G  44.4T
>> cephfs.ud-data.data    data    45.2T  44.4T
>>     STANDBY MDS
>> ud-data.ud-03.lhwkml
>> ud-data.ud-05.rnhcfe
>> ud-data.ud-01.uatjle
>> ud-data.ud-04.seggyv
>>
>> --------------------------------------------------------------------------
>> This is "ceph tell mds.ud-data.ud-02.xcoojt session ls" output for the
>> reported client for cache pressure warning.
>>
>>     {
>>         "id": 1282205,
>>         "entity": {
>>             "name": {
>>                 "type": "client",
>>                 "num": 1282205
>>             },
>>             "addr": {
>>                 "type": "v1",
>>                 "addr": "172.16.3.48:0",
>>                 "nonce": 2169935642
>>             }
>>         },
>>         "state": "open",
>>         "num_leases": 0,
>>         "num_caps": 52092,
>>         "request_load_avg": 1,
>>         "uptime": 75754.745608647994,
>>         "requests_in_flight": 0,
>>         "num_completed_requests": 0,
>>         "num_completed_flushes": 1,
>>         "reconnecting": false,
>>         "recall_caps": {
>>             "value": 2577232.0049106553,
>>             "halflife": 60
>>         },
>>         "release_caps": {
>>             "value": 1.4093491463510395,
>>             "halflife": 60
>>         },
>>         "recall_caps_throttle": {
>>             "value": 63733.985544098425,
>>             "halflife": 1.5
>>         },
>>         "recall_caps_throttle2o": {
>>             "value": 19452.428409271757,
>>             "halflife": 0.5
>>         },
>>         "session_cache_liveness": {
>>             "value": 14.100272208890081,
>>             "halflife": 300
>>         },
>>         "cap_acquisition": {
>>             "value": 0,
>>             "halflife": 10
>>         },
>>         "delegated_inos": [
>>             {
>>                 "start": "0x10004a1c031",
>>                 "length": 282
>>             },
>>             {
>>                 "start": "0x10004a1c33f",
>>                 "length": 207
>>             },
>>             {
>>                 "start": "0x10004a1cdda",
>>                 "length": 6
>>             },
>>             {
>>                 "start": "0x10004a3c12e",
>>                 "length": 3
>>             },
>>             {
>>                 "start": "0x1000f9831fe",
>>                 "length": 2
>>             }
>>         ],
>>         "inst": "client.1282205 v1:172.16.3.48:0/2169935642",
>>         "completed_requests": [],
>>         "prealloc_inos": [
>>             {
>>                 "start": "0x10004a1c031",
>>                 "length": 282
>>             },
>>             {
>>                 "start": "0x10004a1c33f",
>>                 "length": 207
>>             },
>>             {
>>                 "start": "0x10004a1cdda",
>>                 "length": 6
>>             },
>>             {
>>                 "start": "0x10004a3c12e",
>>                 "length": 3
>>             },
>>             {
>>                 "start": "0x1000f9831fe",
>>                 "length": 2
>>             },
>>             {
>>                 "start": "0x1000fa86e5f",
>>                 "length": 54
>>             },
>>             {
>>                 "start": "0x1000faa069c",
>>                 "length": 501
>>             }
>>         ],
>>         "client_metadata": {
>>             "client_features": {
>>                 "feature_bits": "0x0000000000007bff"
>>             },
>>             "metric_spec": {
>>                 "metric_flags": {
>>                     "feature_bits": "0x00000000000003ff"
>>                 }
>>             },
>>             "entity_id": "admin",
>>             "hostname": "bennevis-2",
>>             "kernel_version": "5.15.0-91-generic",
>>             "root": "/volumes/babblians"
>>         }
>>     }
>>
>> Özkan Göksu <ozkangksu@xxxxxxxxx>, 17 Oca 2024 Çar, 07:22 tarihinde şunu
>> yazdı:
>>
>>> Hello Eugen.
>>>
>>> Thank you for the answer.
>>> According to knowledge and test results at this issue:
>>> https://github.com/ceph/ceph/pull/38574
>>> I've tried their advice and I've applied the following changes.
>>>
>>> max_mds = 4
>>> standby_mds = 1
>>> mds_cache_memory_limit = 16GB
>>> mds_recall_max_caps = 40000
>>>
>>> When I set these parameters, 1 day later I saw this log:
>>> [8531248.982954] Out of memory: Killed process 1580586 (ceph-mds)
>>> total-vm:70577592kB, anon-rss:70244236kB, file-rss:0kB, shmem-rss:0kB,
>>> UID:167 pgtables:137832kB oom_score_adj:0
>>>
>>> All the MDS services created memory leak and killed by kernel.
>>> Because of this I changed it as below and it is stable now but
>>> performance is very poor and I still get cache pressure alerts.
>>>
>>> max_mds = 1
>>> standby_mds = 5
>>> mds_cache_memory_limit = 8GB
>>> mds_recall_max_caps = 30000
>>>
>>> I'm very surprised that you are advising to decrease
>>> "mds_recall_max_caps" because it is the opposite of what developers advised
>>> in the issue I've sended.
>>> It is very hard to play around with MDS parameters without expert level
>>> of understanding what these parameters stands for and how it will effect
>>> the behavior.
>>> Because of this I'm trying to understand the MDS code flow and I'm very
>>> interested with learning more and tuning my system by debugging and
>>> understanding my own data flow and MDS usage.
>>>
>>> I have a very unique data flow and I think I need to configure the
>>> system for this case.
>>> I have 80+ clients and via all of these clients my users are requesting
>>> Read a range of objects and compare them in GPU, they generate new data and
>>> Write the new data back in the cluster.
>>> So it means my clients usually reads objects only one time and do not
>>> read the same object again. Sometimes same user runs multiple service in
>>> multiple clients and these services can read the same data from different
>>> clients.
>>>
>>> So having a large cache is useless for my use case. I need to setup MDS
>>> and Cephfs Client for this data flow.
>>> When I debug the MDS ram usage, I see high allocation all the time and I
>>> wonder why? If any of my client does not read any object why MDS does not
>>> remove that data from ram allocation?
>>> I need to configure MDS for reading the data and removing it very fast
>>> if the data is constantly requested from clients. In this case ofc I want a
>>> ram cache tier.
>>>
>>> I'm little confused and I need to learn more about how MDS works and how
>>> should I make multiple active MDS faster for my subvolumes and client data
>>> flow.
>>>
>>> Best regards.
>>>
>>>
>>>
>>> Eugen Block <eblock@xxxxxx>, 16 Oca 2024 Sal, 11:36 tarihinde şunu
>>> yazdı:
>>>
>>>> Hi,
>>>>
>>>> I have dealt with this topic multiple times, the SUSE team helped
>>>> understanding what's going on under the hood. The summary can be found
>>>> in this thread [1].
>>>>
>>>> What helped in our case was to reduce the mds_recall_max_caps from 30k
>>>> (default) to 3k. We tried it in steps of 1k IIRC. So I suggest to
>>>> reduce that value step by step (maybe start with 20k or something) to
>>>> find the optimal value.
>>>>
>>>> Regards,
>>>> Eugen
>>>>
>>>> [1] https://www.spinics.net/lists/ceph-users/msg73188.html
>>>>
>>>> Zitat von Özkan Göksu <ozkangksu@xxxxxxxxx>:
>>>>
>>>> > Hello.
>>>> >
>>>> > I have 5 node ceph cluster and I'm constantly having "clients failing
>>>> to
>>>> > respond to cache pressure" warning.
>>>> >
>>>> > I have 84 cephfs kernel clients (servers) and my users are accessing
>>>> their
>>>> > personal subvolumes  located on one pool.
>>>> >
>>>> > My users are software developers and the data is home and user data.
>>>> (Git,
>>>> > python projects, sample data and generated new data)
>>>> >
>>>> >
>>>> ---------------------------------------------------------------------------------
>>>> > --- RAW STORAGE ---
>>>> > CLASS     SIZE    AVAIL    USED  RAW USED  %RAW USED
>>>> > ssd    146 TiB  101 TiB  45 TiB    45 TiB      30.71
>>>> > TOTAL  146 TiB  101 TiB  45 TiB    45 TiB      30.71
>>>> >
>>>> > --- POOLS ---
>>>> > POOL                 ID   PGS   STORED  OBJECTS     USED  %USED  MAX
>>>> AVAIL
>>>> > .mgr                  1     1  356 MiB       90  1.0 GiB      0
>>>>  30 TiB
>>>> > cephfs.ud-data.meta   9   256   69 GiB    3.09M  137 GiB   0.15
>>>>  45 TiB
>>>> > cephfs.ud-data.data  10  2048   26 TiB  100.83M   44 TiB  32.97
>>>>  45 TiB
>>>> >
>>>> ---------------------------------------------------------------------------------
>>>> > root@ud-01:~# ceph fs status
>>>> > ud-data - 84 clients
>>>> > =======
>>>> > RANK  STATE           MDS              ACTIVITY     DNS    INOS   DIRS
>>>> > CAPS
>>>> >  0    active  ud-data.ud-04.seggyv  Reqs:  142 /s  2844k  2798k   303k
>>>> > 720k
>>>> >         POOL           TYPE     USED  AVAIL
>>>> > cephfs.ud-data.meta  metadata   137G  44.9T
>>>> > cephfs.ud-data.data    data    44.2T  44.9T
>>>> >     STANDBY MDS
>>>> > ud-data.ud-02.xcoojt
>>>> > ud-data.ud-05.rnhcfe
>>>> > ud-data.ud-03.lhwkml
>>>> > ud-data.ud-01.uatjle
>>>> > MDS version: ceph version 17.2.6
>>>> (d7ff0d10654d2280e08f1ab989c7cdf3064446a5)
>>>> > quincy (stable)
>>>> >
>>>> >
>>>> -----------------------------------------------------------------------------------
>>>> > My MDS settings are below:
>>>> >
>>>> > mds_cache_memory_limit                | 8589934592
>>>> > mds_cache_trim_threshold              | 524288
>>>> > mds_recall_global_max_decay_threshold | 131072
>>>> > mds_recall_max_caps                       | 30000
>>>> > mds_recall_max_decay_rate             | 1.500000
>>>> > mds_recall_max_decay_threshold    | 131072
>>>> > mds_recall_warning_threshold          | 262144
>>>> >
>>>> >
>>>> > I have 2 questions:
>>>> > 1- What should I do to prevent cache pressue warning ?
>>>> > 2- What can I do to increase speed ?
>>>> >
>>>> > - Thanks
>>>> > _______________________________________________
>>>> > ceph-users mailing list -- ceph-users@xxxxxxx
>>>> > To unsubscribe send an email to ceph-users-leave@xxxxxxx
>>>>
>>>>
>>>> _______________________________________________
>>>> ceph-users mailing list -- ceph-users@xxxxxxx
>>>> To unsubscribe send an email to ceph-users-leave@xxxxxxx
>>>>
>>>
_______________________________________________
ceph-users mailing list -- ceph-users@xxxxxxx
To unsubscribe send an email to ceph-users-leave@xxxxxxx