Hello Martin. I'm using 14.2.16 Our S3 usage is similar. I have 10 rgw. They're running on OSD nodes. 9 RGW is between 3G and 5G. But One of my rgw is using 85G. I have 256G ram and that's why I didn't see that before. Thanks for the warning. But the question is Why 9 RGW is low and one of them very high? Weird... My ceph.conf: [radosgw.9] rgw data = /var/lib/ceph/radosgw/ceph-radosgw.9 rgw zonegroup = xx rgw zone = xxx rgw zonegroup root pool = xx.root rgw zone root pool = xx.root host = HOST9 rgw dns name = s3.xxxx.com rgw frontends = beast port=8010 rgw user max buckets=999 log file = /var/log/ceph/radosgw.9.log rgw run sync thread = false rgw_dynamic_resharding = false Martin Traxl <martin.traxl@xxxxxxxx>, 13 Ağu 2021 Cum, 14:53 tarihinde şunu yazdı: > We are experiencing this behaviour eversince this cluster is productive > and gets "some load". We started with this cluster in May this year, > running Ceph 14.2.15 and already had this same issue. It just took a little > longer until all RAM was consumed, as the load was a little lower than it > is now. > > This is my config diff (I stripped some hostnames/IPs): > > > { > "diff": { > "admin_socket": { > "default": "$run_dir/$cluster-$name.$pid.$cctid.asok", > "final": > "/var/run/ceph/ceph-client.rgw.#####.882549.94336165049544.asok" > }, > "bluefs_buffered_io": { > "default": true, > "file": true, > "final": true > }, > "cluster_network": { > "default": "", > "file": "#####/26", > "final": "#####/26" > }, > "daemonize": { > "default": true, > "override": false, > "final": false > }, > "debug_rgw": { > "default": "1/5", > "final": "1/5" > }, > "filestore_fd_cache_size": { > "default": 128, > "file": 2048, > "final": 2048 > }, > "filestore_op_threads": { > "default": 2, > "file": 8, > "final": 8 > }, > "filestore_queue_max_ops": { > "default": 50, > "file": 100, > "final": 100 > }, > "fsid": { > "default": "00000000-0000-0000-0000-000000000000", > "file": "#####", > "override": "#####", > "final": "#####" > }, > "keyring": { > "default": "$rgw_data/keyring", > "final": "/var/lib/ceph/radosgw/ceph-rgw.#####/keyring" > }, > "mon_host": { > "default": "", > "file": "##### ##### #####", > "final": "##### ##### #####" > }, > > "mon_osd_down_out_interval": { > "default": 600, > "file": 1800, > "final": 1800 > }, > "mon_osd_down_out_subtree_limit": { > "default": "rack", > "file": "host", > "final": "host" > }, > "mon_osd_initial_require_min_compat_client": { > "default": "jewel", > "file": "jewel", > "final": "jewel" > }, > "mon_osd_min_down_reporters": { > "default": 2, > "file": 2, > "final": 2 > }, > "mon_osd_reporter_subtree_level": { > "default": "host", > "file": "host", > "final": "host" > }, > "ms_client_mode": { > "default": "crc secure", > "file": "secure", > "final": "secure" > }, > "ms_cluster_mode": { > "default": "crc secure", > "file": "secure", > "final": "secure" > }, > "ms_mon_client_mode": { > "default": "secure crc", > "file": "secure", > "final": "secure" > }, > "ms_mon_cluster_mode": { > "default": "secure crc", > "file": "secure", > "final": "secure" > }, > "ms_mon_service_mode": { > "default": "secure crc", > "file": "secure", > "final": "secure" > }, > > "ms_service_mode": { > "default": "crc secure", > "file": "secure", > "final": "secure" > }, > "objecter_inflight_ops": { > "default": 24576, > "final": 24576 > }, > "osd_backfill_scan_max": { > "default": 512, > "file": 16, > "final": 16 > }, > "osd_backfill_scan_min": { > "default": 64, > "file": 8, > "final": 8 > }, > "osd_deep_scrub_stride": { > "default": "524288", > "file": "1048576", > "final": "1048576" > }, > "osd_fast_shutdown": { > "default": true, > "file": false, > "final": false > }, > "osd_heartbeat_min_size": { > "default": "2000", > "file": "0", > "final": "0" > }, > "osd_journal_size": { > "default": "5120", > "file": "4096", > "final": "4096" > }, > "osd_max_backfills": { > "default": 1, > "file": 1, > "final": 1 > }, > "osd_max_scrubs": { > "default": 1, > "file": 1, > "final": 1 > }, > "osd_op_complaint_time": { > "default": 30, > "file": 5, > "final": 5 > }, > > "osd_pool_default_flag_hashpspool": { > "default": true, > "file": true, > "final": true > }, > "osd_pool_default_min_size": { > "default": 0, > "file": 1, > "final": 1 > }, > "osd_pool_default_size": { > "default": 3, > "file": 3, > "final": 3 > }, > "osd_recovery_max_active": { > "default": 3, > "file": 1, > "final": 1 > }, > "osd_recovery_max_single_start": { > "default": 1, > "file": 1, > "final": 1 > }, > "osd_recovery_op_priority": { > "default": 3, > "file": 3, > "final": 3 > }, > "osd_recovery_sleep_hdd": { > "default": 0.10000000000000001, > "file": 0, > "final": 0 > }, > "osd_scrub_begin_hour": { > "default": 0, > "file": 5, > "final": 5 > }, > "osd_scrub_chunk_max": { > "default": 25, > "file": 1, > "final": 1 > }, > "osd_scrub_chunk_min": { > "default": 5, > "file": 1, > "final": 1 > }, > > "osd_recovery_op_priority": { > "default": 3, > "file": 3, > "final": 3 > }, > "osd_recovery_sleep_hdd": { > "default": 0.10000000000000001, > "file": 0, > "final": 0 > }, > "osd_scrub_begin_hour": { > "default": 0, > "file": 5, > "final": 5 > }, > "osd_scrub_chunk_max": { > "default": 25, > "file": 1, > "final": 1 > }, > "osd_scrub_chunk_min": { > "default": 5, > "file": 1, > "final": 1 > }, > "osd_scrub_during_recovery": { > "default": false, > "file": true, > "final": true > }, > "osd_scrub_end_hour": { > "default": 24, > "file": 23, > "final": 23 > }, > "osd_scrub_load_threshold": { > "default": 0.5, > "file": 1, > "final": 1 > }, > "osd_scrub_priority": { > "default": 5, > "file": 1, > "final": 1 > }, > "osd_snap_trim_priority": { > "default": 5, > "file": 1, > "final": 1 > }, > "osd_snap_trim_sleep": { > "default": 0, > "file": 1, > "final": 1 > }, > "public_network": { > "default": "", > "file": "#####/26", > "final": "#####/26" > }, > "rbd_default_features": { > "default": "61", > "final": "61" > }, > "rgw_dns_name": { > "default": "", > "file": "#####", > "final": "#####" > }, > "rgw_frontends": { > "default": "beast port=7480", > "file": "beast ssl_endpoint=#####:443 > ssl_certificate=/etc/ceph/rgw-ssl/#####.pem > ssl_private_key=/etc/ceph/rgw-ssl/#####.key", > "final": "beast ssl_endpoint=#####:443 > ssl_certificate=/etc/ceph/rgw-ssl/#####.pem > ssl_private_key=/etc/ceph/rgw-ssl/#####.key" > }, > "rgw_ignore_get_invalid_range": { > "default": false, > "file": true, > "final": true > }, > > "rgw_ldap_binddn": { > "default": "uid=admin,cn=users,dc=example,dc=com", > "file": "uid=#####,cn=#####,cn=mf,ou=#####", > "final": "uid=#####,cn=#####,cn=mf,ou=#####" > }, > "rgw_ldap_dnattr": { > "default": "uid", > "file": "uid", > "final": "uid" > }, > "rgw_ldap_searchdn": { > "default": "cn=users,cn=accounts,dc=example,dc=com", > "file": "ou=#####", > "final": "ou=#####" > }, > "rgw_ldap_secret": { > "default": "/etc/openldap/secret", > "file": "/etc/ceph/ldap/bindpw", > "final": "/etc/ceph/ldap/bindpw" > }, > "rgw_ldap_uri": { > "default": "ldaps://<ldap.your.domain>", > "file": "ldaps://#####:636", > "final": "ldaps://#####:636" > }, > "rgw_remote_addr_param": { > "default": "REMOTE_ADDR", > "file": "http_x_forwarded_for", > "final": "http_x_forwarded_for" > }, > "rgw_s3_auth_use_ldap": { > "default": false, > "file": true, > "final": true > }, > "rgw_s3_auth_use_sts": { > "default": false, > "file": true, > "final": true > }, > "rgw_sts_key": { > "default": "sts", > "file": "#####", > "final": "#####" > }, > "rgw_user_max_buckets": { > "default": 1000, > "file": -1, > "final": -1 > }, > "setgroup": { > "default": "", > "cmdline": "ceph", > "final": "ceph" > }, > "setuser": { > "default": "", > "cmdline": "ceph", > "final": "ceph" > } > } > } > > > > > ________________________________ > Von: Konstantin Shalygin <k0ste@xxxxxxxx> > Gesendet: Freitag, 13. August 2021 13:21 > An: Martin Traxl > Cc: ceph-users@xxxxxxx > Betreff: Re: RGW memory consumption > > Hi, > > On 13 Aug 2021, at 14:10, Martin Traxl <martin.traxl@xxxxxxxx<mailto: > martin.traxl@xxxxxxxx>> wrote: > > yesterday evening one of my rgw nodes died again, radosgw was killed by > the kernel oom killer. > > [Thu Aug 12 22:10:04 2021] Out of memory: Killed process 1376 (radosgw) > total-vm:70747176kB, anon-rss:63900544kB, file-rss:0kB, shmem-rss:0kB, > UID:167 pgtables:131008kB oom_score_adj:0 > [Thu Aug 12 22:10:09 2021] oom_reaper: reaped process 1376 (radosgw), now > anon-rss:0kB, file-rss:0kB, shmem-rss:0kB > > The radosgw was eating up all the 64GB system memory. > A few hours before this happened, mempool dump showed a total usage of > only 2.1 GB of ram, while in fact radosgw was using already 84.7% of 64GB. > > "total": { > "items": 88757980, > "bytes": 2147532284 > > PID USER PR NI VIRT RES SHR S %CPU %MEM TIME+ > COMMAND > 1376 ceph 20 0 58.8g 52.7g 17824 S 48.2 84.7 20158:04 > radosgw > > > It seems the radowgw loses track of some memory, like there is a memory > leak. > > Some additional information. I am running on CentOS 8.4, kernel 4.18. As > already mentioned, Ceph 14.2.22. radosgw is the only notable service > running on this machine. > Any suggestions on this? Are there maybe any tuning settings? How could I > debug this further? > > Please show your "config diff" from admin socket > Couple of days ago I was upgraded our RGW's to 14.2.21 to 14.2.22 and > don't see increase memory consumption > > > Thanks, > k > _______________________________________________ > ceph-users mailing list -- ceph-users@xxxxxxx > To unsubscribe send an email to ceph-users-leave@xxxxxxx > _______________________________________________ ceph-users mailing list -- ceph-users@xxxxxxx To unsubscribe send an email to ceph-users-leave@xxxxxxx