For Example top - 22:53:47 up 1:29, 2 users, load average: 2.23, 2.08, 1.92 Tasks: 255 total, 2 running, 253 sleeping, 0 stopped, 0 zombie %Cpu(s): 4.2 us, 4.5 sy, 0.0 ni, 91.1 id, 0.1 wa, 0.0 hi, 0.1 si, 0.0 st MiB Mem : 161169.7 total, 23993.9 free, 132036.5 used, 5139.3 buff/cache MiB Swap: 0.0 total, 0.0 free, 0.0 used. 24425.1 avail Mem PID USER PR NI VIRT RES SHR S %CPU %MEM TIME+ COMMAND 32014 ceph 20 0 126.1g 124.1g 38656 S 100.3 78.8 5:44.77 ceph-osd 17651 ceph 20 0 1122932 345204 14148 S 10.3 0.2 4:49.79 ceph-osd 17248 ceph 20 0 8014892 3.3g 15956 S 8.0 2.1 8:07.75 ceph-osd 1069 root 20 0 336504 225300 222248 S 6.6 0.1 4:10.84 systemd-journal 17862 ceph 20 0 1228508 443328 16740 S 6.3 0.3 2:05.88 ceph-osd 2708 63150 20 0 431968 43560 41972 R 3.0 0.0 1:26.26 systemd-journal 2718 root 20 0 226564 5016 1824 S 2.7 0.0 1:50.74 rsyslogd 32727 root 20 0 0 0 0 I 1.0 0.0 0:01.00 kworker/2:2-mm_percpu_wq 1511 root 20 0 2264860 29052 8552 S 0.7 0.0 3:55.76 croitd 13 root 20 0 0 0 0 I 0.3 0.0 0:03.65 rcu_sched 1 root 20 0 167900 8804 5332 S 0.0 0.0 0:30.66 systemd 2 root 20 0 0 0 0 S 0.0 0.0 0:00.16 kthreadd 3 root 0 -20 0 0 0 I 0.0 0.0 0:00.00 rcu_gp 4 root 0 -20 0 0 0 I 0.0 0.0 0:00.00 rcu_par_gp 6 root 0 -20 0 0 0 I 0.0 0.0 0:00.00 kworker/0:0H-events_highpri 9 root 0 -20 0 0 0 I 0.0 0.0 0:00.00 mm_percpu_wq 10 root 20 0 0 0 0 S 0.0 0.0 0:00.00 rcu_tasks_rude_ 11 root 20 0 0 0 0 S 0.0 0.0 0:00.00 rcu_tasks_trace 12 root 20 0 0 0 0 S 0.0 0.0 0:00.37 ksoftirqd/0 14 root rt 0 0 0 0 S 0.0 0.0 0:00.09 migration/0 root@bb-ceph-enc-rm63-osd03-31 ~ $ ceph daemon osd.13 dump_mempools -h { "mempool": { "by_pool": { "bloom_filter": { "items": 0, "bytes": 0 }, "bluestore_alloc": { "items": 4671335, "bytes": 97772992 }, "bluestore_cache_data": { "items": 293, "bytes": 272261 }, "bluestore_cache_onode": { "items": 281, "bytes": 173096 }, "bluestore_cache_meta": { "items": 10777, "bytes": 63953 }, "bluestore_cache_other": { "items": 638, "bytes": 34200 }, "bluestore_Buffer": { "items": 8, "bytes": 768 }, "bluestore_Extent": { "items": 8, "bytes": 384 }, "bluestore_Blob": { "items": 8, "bytes": 832 }, "bluestore_SharedBlob": { "items": 8, "bytes": 896 }, "bluestore_inline_bl": { "items": 0, "bytes": 0 }, "bluestore_fsck": { "items": 0, "bytes": 0 }, "bluestore_txc": { "items": 0, "bytes": 0 }, "bluestore_writing_deferred": { "items": 0, "bytes": 0 }, "bluestore_writing": { "items": 0, "bytes": 0 }, "bluefs": { "items": 440, "bytes": 15760 }, "bluefs_file_reader": { "items": 62, "bytes": 5898112 }, "bluefs_file_writer": { "items": 3, "bytes": 672 }, "buffer_anon": { "items": 30941954, "bytes": 126064178281 }, "buffer_meta": { "items": 2708, "bytes": 238304 }, "osd": { "items": 277, "bytes": 3583272 }, "osd_mapbl": { "items": 0, "bytes": 0 }, "osd_pglog": { "items": 45797772, "bytes": 4854818176 }, "osdmap": { "items": 3792, "bytes": 140872 }, "osdmap_mapping": { "items": 0, "bytes": 0 }, "pgmap": { "items": 0, "bytes": 0 }, "mds_co": { "items": 0, "bytes": 0 }, "unittest_1": { "items": 0, "bytes": 0 }, "unittest_2": { "items": 0, "bytes": 0 } }, "total": { "items": 81430364, "bytes": 131027192831 } } } root@bb-ceph-enc-rm63-osd03-31 ~ $ On Wed, 5 Jan 2022 at 22:43, Lee <lquince@xxxxxxxxx> wrote: > The first OSD took 156Gb of Ram to boot.. :( > > Is there a easy way to stop Mempool pulling so much memory. > > On Wed, 5 Jan 2022 at 22:12, Mazzystr <mazzystr@xxxxxxxxx> wrote: > >> and that is exactly why I run osds containerized with limited cpu and >> memory as well as "bluestore cache size", "osd memory target", and "mds >> cache memory limit". Osd processes have become noisy neighbors in the last >> few versions. >> >> >> >> On Wed, Jan 5, 2022 at 1:47 PM Lee <lquince@xxxxxxxxx> wrote: >> >>> I'm not rushing, >>> >>> I have found the issue, Im am getting OOM errors as the OSD boots, >>> basically is starts to process the PG's and then the node runs out of >>> memory and the daemon kill's >>> >>> 2022-01-05 20:09:08 bb-ceph-enc-rm63-osd03-31 osd.51 >>> 2022-01-05T20:09:01.024+0000 7fce3c6bc700 10 osd.51 24448261 tick >>> 2022-01-05 20:09:10 bb-ceph-enc-rm63-osd03-31 osd.51 >>> 2022-01-05T20:09:01.060+0000 7fce3b441700 10 osd.51 24448261 >>> tick_without_osd_lock >>> 2022-01-05 20:09:13 bb-ceph-enc-rm63-osd03-31 osd.51 >>> 2022-01-05T20:09:02.268+0000 7fce3c6bc700 10 osd.51 24448261 do_waiters >>> -- >>> start >>> 2022-01-05 20:09:13 bb-ceph-enc-rm63-osd03-31 osd.51 >>> 2022-01-05T20:09:09.544+0000 7fce3c6bc700 10 osd.51 24448261 do_waiters >>> -- >>> finish >>> 2022-01-05 20:09:13 bb-ceph-enc-rm63-osd03-31 osd.51 >>> 2022-01-05T20:09:10.260+0000 7fce1e407700 5 osd.51 24448261 heartbeat >>> osd_stat(store_statfs(0x2258948000/0x40000000/0x3a38800000, data >>> 0x17919fd8c4/0x179feb4000, compress 0x0/0x0/0x0, omap 0xc9773, meta >>> 0x3ff3688d), peers [] op hist []) >>> 2022-01-05 20:09:13 bb-ceph-enc-rm63-osd03-31 osd.51 >>> 2022-01-05T20:09:13.060+0000 7fce3c6bc700 20 osd.51 24448261 tick >>> last_purged_snaps_scrub 2022-01-04T22:29:39.121925+0000 next >>> 2022-01-05T22:29:39.121925+0000 >>> 2022-01-05 20:09:13 bb-ceph-enc-rm63-osd03-31 osd.51 >>> 2022-01-05T20:09:13.104+0000 7fce1e407700 20 osd.51 24448261 >>> check_full_status cur ratio 0.410072, physical ratio 0.410072, new state >>> none >>> 2022-01-05 20:09:13 bb-ceph-enc-rm63-osd03-31 osd.51 >>> 2022-01-05T20:09:13.108+0000 7fce34c34700 20 >>> bluestore(/var/lib/ceph/osd/ceph-51) deferred_try_submit 0 osrs, 0 txcs >>> 2022-01-05 20:09:13 bb-ceph-enc-rm63-osd03-31 osd.51 >>> 2022-01-05T20:09:13.108+0000 7fce34c34700 5 >>> bluestore.MempoolThread(0x55f42e762a98) _resize_shards cache_size: >>> 134217728 kv_alloc: 67108864 kv_used: 67082912 meta_alloc: 67108864 >>> meta_used: 75234 data_alloc: 67108864 data_used: 0 >>> 2022-01-05 20:09:13 bb-ceph-enc-rm63-osd03-31 osd.51 >>> 2022-01-05T20:09:13.160+0000 7fce34c34700 20 >>> bluestore.MempoolThread(0x55f42e762a98) _resize_shards cache_size: >>> 134217728 kv_alloc: 67108864 kv_used: 67082912 meta_alloc: 67108864 >>> meta_used: 75234 data_alloc: 67108864 data_used: 0 >>> 2022-01-05 20:09:13 bb-ceph-enc-rm63-osd03-31 osd.51 >>> 2022-01-05T20:09:13.216+0000 7fce34c34700 20 >>> bluestore.MempoolThread(0x55f42e762a98) _resize_shards cache_size: >>> 134217728 kv_alloc: 67108864 kv_used: 67082912 meta_alloc: 67108864 >>> meta_used: 75234 data_alloc: 67108864 data_used: 0 >>> 2022-01-05 20:09:13 bb-ceph-enc-rm63-osd03-31 osd.51 >>> 2022-01-05T20:09:13.264+0000 7fce34c34700 20 >>> bluestore.MempoolThread(0x55f42e762a98) _resize_shards cache_size: >>> 134217728 kv_alloc: 67108864 kv_used: 67082912 meta_alloc: 67108864 >>> meta_used: 75234 data_alloc: 67108864 data_used: 0 >>> 2022-01-05 20:09:13 bb-ceph-enc-rm63-osd03-31 osd.51 >>> 2022-01-05T20:09:13.400+0000 7fce34c34700 20 >>> bluestore.MempoolThread(0x55f42e762a98) _resize_shards cache_size: >>> 134217728 kv_alloc: 67108864 kv_used: 67082912 meta_alloc: 67108864 >>> meta_used: 75234 data_alloc: 67108864 data_used: 0 >>> 2022-01-05 20:09:13 bb-ceph-enc-rm63-osd03-31 osd.51 >>> 2022-01-05T20:09:13.536+0000 7fce34c34700 20 >>> bluestore.MempoolThread(0x55f42e762a98) _resize_shards cache_size: >>> 134217728 kv_alloc: 67108864 kv_used: 67082912 meta_alloc: 67108864 >>> meta_used: 75234 data_alloc: 67108864 data_used: 0 >>> 2022-01-05 20:09:13 bb-ceph-enc-rm63-osd03-31 osd.51 >>> 2022-01-05T20:09:13.640+0000 7fce34c34700 20 >>> bluestore.MempoolThread(0x55f42e762a98) _resize_shards cache_size: >>> 134217728 kv_alloc: 67108864 kv_used: 67082912 meta_alloc: 67108864 >>> meta_used: 75234 data_alloc: 67108864 data_used: 0 >>> 2022-01-05 20:09:13 bb-ceph-enc-rm63-osd03-31 osd.51 >>> 2022-01-05T20:09:13.644+0000 7fce1e407700 5 osd.51 24448261 heartbeat >>> osd_stat(store_statfs(0x2258948000/0x40000000/0x3a38800000, data >>> 0x17919fd8c4/0x179feb4000, compress 0x0/0x0/0x0, omap 0xc9773, meta >>> 0x3ff3688d), peers [] op hist []) >>> 2022-01-05 20:09:13 bb-ceph-enc-rm63-osd03-31 osd.51 >>> 2022-01-05T20:09:13.712+0000 7fce34c34700 20 >>> bluestore.MempoolThread(0x55f42e762a98) _resize_shards cache_size: >>> 134217728 kv_alloc: 67108864 kv_used: 67082912 meta_alloc: 67108864 >>> meta_used: 75234 data_alloc: 67108864 data_used: 0 >>> 2022-01-05 20:09:13 bb-ceph-enc-rm63-osd03-31 osd.51 >>> 2022-01-05T20:09:13.688+0000 7fce1e407700 20 osd.51 24448261 >>> check_full_status cur ratio 0.410072, physical ratio 0.410072, new state >>> none >>> 2022-01-05 20:09:13 bb-ceph-enc-rm63-osd03-31 osd.51 >>> 2022-01-05T20:09:13.480+0000 7fce3b441700 20 >>> bluestore(/var/lib/ceph/osd/ceph-51) statfs >>> store_statfs(0x2258948000/0x40000000/0x3a38800000, data >>> 0x17919fd8c4/0x179feb4000, compress 0x0/0x0/0x0, omap 0xc9773, meta >>> 0x3ff3688d) >>> 2022-01-05 20:09:13 bb-ceph-enc-rm63-osd03-31 osd.51 >>> 2022-01-05T20:09:13.844+0000 7fce34c34700 20 >>> bluestore.MempoolThread(0x55f42e762a98) _resize_shards cache_size: >>> 134217728 kv_alloc: 67108864 kv_used: 67082912 meta_alloc: 67108864 >>> meta_used: 75234 data_alloc: 67108864 data_used: 0 >>> 2022-01-05 20:09:14 bb-ceph-enc-rm63-osd03-31 osd.51 >>> 2022-01-05T20:09:14.016+0000 7fce34c34700 20 >>> bluestore.MempoolThread(0x55f42e762a98) _resize_shards cache_size: >>> 134217728 kv_alloc: 67108864 kv_used: 67082912 meta_alloc: 67108864 >>> meta_used: 75234 data_alloc: 67108864 data_used: 0 >>> 2022-01-05 20:09:38 bb-ceph-enc-rm63-osd03-31 osd.51 >>> 2022-01-05T20:09:14.104+0000 7fce3c6bc700 10 osd.51 24448261 tick >>> 2022-01-05 20:10:37 bb-ceph-enc-rm63-osd03-31 init.scope >>> ceph-osd@51.service: >>> Main process exited, code=killed, status=9/KILL >>> 2022-01-05 20:10:37 bb-ceph-enc-rm63-osd03-31 init.scope >>> ceph-osd@51.service: >>> Failed with result 'signal'. >>> 2022-01-05 20:10:47 bb-ceph-enc-rm63-osd03-31 init.scope >>> ceph-osd@51.service: >>> Scheduled restart job, restart counter is at 1. >>> 2022-01-05 20:10:47 bb-ceph-enc-rm63-osd03-31 init.scope Stopped Ceph >>> object storage daemon osd.51. >>> >>> I have just increased the RAM physically in one of the node's removed the >>> other OSD's physically for now and managed to get one of the 3 down to >>> come >>> up. Just stepping through each at the moment. >>> >>> Regards >>> >>> Lee >>> >>> On Wed, 5 Jan 2022 at 21:10, mhnx <morphinwithyou@xxxxxxxxx> wrote: >>> >>> > First of all, do not rush into bad decisions. >>> > Production is down and you wanna make it online but you should fix the >>> > problem and be sure first. If a second crash occurs in a healing state >>> > you will lose metadata. >>> > You don't need to debug first! >>> > >>> > You didn't mention your cluster status and we don't know what you have. >>> > We need some information; >>> > 1- ceph -s >>> > 2- ceph health detail >>> > 3- ceph df >>> > 4- tail /var/log/ceph/ceph-osd{crashed osd number}.log -n 1000 >>> > >>> > >>> > >>> > Lee <lquince@xxxxxxxxx>, 5 Oca 2022 Çar, 23:14 tarihinde şunu yazdı: >>> > > >>> > > Looking for some help as this is production effecting.. >>> > > >>> > > We run a 3 Node cluster with a mix of 5xSSD,15xSATA and 5xSAS in each >>> > node. >>> > > Running 15.2.15. All using DB/WAL on NVME SSD except the SSD's >>> > > >>> > > Earlier today I increased the PG num from 32 to 128 on one of our >>> pools, >>> > > due to the status complaining. pretty normally really. 2-3 mins in I >>> > > watched in horror as SSD based OSD's crashed on all 3 nodes, >>> refusing to >>> > > restart. >>> > > >>> > > I've set debug_bluefs and bluestore to 20 it will get so far and >>> then the >>> > > daemon fails. >>> > > >>> > > 2022-01-05 19:39:23 bb-ceph-enc-rm63-osd03-31 osd.51 >>> > > 2022-01-05T19:39:23.335+0000 7f2794383700 20 >>> > > bluestore(/var/lib/ceph/osd/ceph-51) deferred_try_submit 0 osrs, 0 >>> txcs >>> > > 2022-01-05 19:39:23 bb-ceph-enc-rm63-osd03-31 osd.51 >>> > > 2022-01-05T19:39:23.335+0000 7f2794383700 5 >>> > > bluestore.MempoolThread(0x560433f0aa98) _resize_shards cache_size: >>> > > 134217728 kv_alloc: 67108864 kv_used: 67075728 meta_alloc: 67108864 >>> > > meta_used: 75234 data_alloc: 67108864 data_used: 0 >>> > > 2022-01-05 19:39:23 bb-ceph-enc-rm63-osd03-31 osd.51 >>> > > 2022-01-05T19:39:23.387+0000 7f2794383700 20 >>> > > bluestore.MempoolThread(0x560433f0aa98) _resize_shards cache_size: >>> > > 134217728 kv_alloc: 67108864 kv_used: 67075728 meta_alloc: 67108864 >>> > > meta_used: 75234 data_alloc: 67108864 data_used: 0 >>> > > 2022-01-05 19:39:23 bb-ceph-enc-rm63-osd03-31 osd.51 >>> > > 2022-01-05T19:39:23.467+0000 7f2794383700 20 >>> > > bluestore.MempoolThread(0x560433f0aa98) _resize_shards cache_size: >>> > > 134217728 kv_alloc: 67108864 kv_used: 67075728 meta_alloc: 67108864 >>> > > meta_used: 75234 data_alloc: 67108864 data_used: 0 >>> > > 2022-01-05 19:39:24 bb-ceph-enc-rm63-osd03-31 osd.51 >>> > > 2022-01-05T19:39:23.979+0000 7f2794383700 20 >>> > > bluestore.MempoolThread(0x560433f0aa98) _resize_shards cache_size: >>> > > 134217728 kv_alloc: 67108864 kv_used: 67075728 meta_alloc: 67108864 >>> > > meta_used: 75234 data_alloc: 67108864 data_used: 0 >>> > > 2022-01-05 19:39:24 bb-ceph-enc-rm63-osd03-31 osd.51 >>> > > 2022-01-05T19:39:24.167+0000 7f2794383700 20 >>> > > bluestore.MempoolThread(0x560433f0aa98) _resize_shards cache_size: >>> > > 134217728 kv_alloc: 67108864 kv_used: 67075728 meta_alloc: 67108864 >>> > > meta_used: 75234 data_alloc: 67108864 data_used: 0 >>> > > 2022-01-05 19:39:24 bb-ceph-enc-rm63-osd03-31 osd.51 >>> > > 2022-01-05T19:39:24.271+0000 7f2794383700 20 >>> > > bluestore.MempoolThread(0x560433f0aa98) _resize_shards cache_size: >>> > > 134217728 kv_alloc: 67108864 kv_used: 67075728 meta_alloc: 67108864 >>> > > meta_used: 75234 data_alloc: 67108864 data_used: 0 >>> > > 2022-01-05 19:39:24 bb-ceph-enc-rm63-osd03-31 osd.51 >>> > > 2022-01-05T19:39:24.327+0000 7f2794383700 20 >>> > > bluestore.MempoolThread(0x560433f0aa98) _resize_shards cache_size: >>> > > 134217728 kv_alloc: 67108864 kv_used: 67075728 meta_alloc: 67108864 >>> > > meta_used: 75234 data_alloc: 67108864 data_used: 0 >>> > > 2022-01-05 19:39:32 bb-ceph-enc-rm63-osd03-31 init.scope >>> > ceph-osd@51.service: >>> > > Main process exited, code=killed, status=9/KILL >>> > > 2022-01-05 19:39:32 bb-ceph-enc-rm63-osd03-31 init.scope >>> > ceph-osd@51.service: >>> > > Failed with result 'signal'. >>> > > 2022-01-05 19:39:42 bb-ceph-enc-rm63-osd03-31 init.scope >>> > ceph-osd@51.service: >>> > > Scheduled restart job, restart counter is at 1. >>> > > >>> > > I've run >>> > > ceph-bluestore-tool bluefs-bdev-sizes --path >>> /var/lib/ceph/osd/ceph-51 >>> > > inferring bluefs devices from bluestore path >>> > > 1 : device size 0x3a38800000 : own 0x[1bf2200000~254300000] = >>> > 0x254300000 : >>> > > using 0x3fd10000(1021 MiB) : bluestore has 0x1d83400000(118 GiB) >>> > available >>> > > >>> > > Also fsck and repair all seems to be ok. >>> > > >>> > > The normal log looks like >>> > > >>> > > 2022-01-05 19:39:42 bb-ceph-enc-rm63-osd03-31 init.scope Starting >>> Ceph >>> > > object storage daemon osd.51... >>> > > 2022-01-05 19:39:46 bb-ceph-enc-rm63-osd03-31 osd.51 >>> > > 2022-01-05T19:39:46.467+0000 7fca32943e00 0 set uid:gid to >>> 64045:64045 >>> > > (ceph:ceph) >>> > > 2022-01-05 19:39:46 bb-ceph-enc-rm63-osd03-31 osd.51 >>> > > 2022-01-05T19:39:46.467+0000 7fca32943e00 0 ceph version 15.2.15 >>> > > (2dfb18841cfecc2f7eb7eb2afd65986ca4d95985) octopus (stable), process >>> > > ceph-osd, pid 139577 >>> > > 2022-01-05 19:39:46 bb-ceph-enc-rm63-osd03-31 osd.51 >>> > > 2022-01-05T19:39:46.467+0000 7fca32943e00 0 pidfile_write: ignore >>> empty >>> > > --pid-file >>> > > 2022-01-05 19:39:46 bb-ceph-enc-rm63-osd03-31 osd.51 >>> > > 2022-01-05T19:39:46.491+0000 7fca32943e00 1 bdev create path >>> > > /var/lib/ceph/osd/ceph-51/block type kernel >>> > > 2022-01-05 19:39:46 bb-ceph-enc-rm63-osd03-31 osd.51 >>> > > 2022-01-05T19:39:46.491+0000 7fca32943e00 1 bdev(0x55b4b234e000 >>> > > /var/lib/ceph/osd/ceph-51/block) open path >>> > /var/lib/ceph/osd/ceph-51/block >>> > > 2022-01-05 19:39:46 bb-ceph-enc-rm63-osd03-31 osd.51 >>> > > 2022-01-05T19:39:46.491+0000 7fca32943e00 1 bdev(0x55b4b234e000 >>> > > /var/lib/ceph/osd/ceph-51/block) open size 250056015872 >>> (0x3a38800000, >>> > 233 >>> > > GiB) block_size 4096 (4 KiB) non-rotational discard not supported >>> > > 2022-01-05 19:39:46 bb-ceph-enc-rm63-osd03-31 osd.51 >>> > > 2022-01-05T19:39:46.491+0000 7fca32943e00 1 >>> > > bluestore(/var/lib/ceph/osd/ceph-51) _set_cache_sizes cache_size >>> > 1073741824 >>> > > meta 0.4 kv 0.4 data 0.2 >>> > > 2022-01-05 19:39:46 bb-ceph-enc-rm63-osd03-31 osd.51 >>> > > 2022-01-05T19:39:46.491+0000 7fca32943e00 1 bdev create path >>> > > /var/lib/ceph/osd/ceph-51/block type kernel >>> > > 2022-01-05 19:39:46 bb-ceph-enc-rm63-osd03-31 osd.51 >>> > > 2022-01-05T19:39:46.491+0000 7fca32943e00 1 bdev(0x55b4b234e380 >>> > > /var/lib/ceph/osd/ceph-51/block) open path >>> > /var/lib/ceph/osd/ceph-51/block >>> > > 2022-01-05 19:39:46 bb-ceph-enc-rm63-osd03-31 osd.51 >>> > > 2022-01-05T19:39:46.491+0000 7fca32943e00 1 bdev(0x55b4b234e380 >>> > > /var/lib/ceph/osd/ceph-51/block) open size 250056015872 >>> (0x3a38800000, >>> > 233 >>> > > GiB) block_size 4096 (4 KiB) non-rotational discard not supported >>> > > 2022-01-05 19:39:46 bb-ceph-enc-rm63-osd03-31 osd.51 >>> > > 2022-01-05T19:39:46.491+0000 7fca32943e00 1 bluefs add_block_device >>> > bdev 1 >>> > > path /var/lib/ceph/osd/ceph-51/block size 233 GiB >>> > > 2022-01-05 19:39:46 bb-ceph-enc-rm63-osd03-31 osd.51 >>> > > 2022-01-05T19:39:46.491+0000 7fca32943e00 1 bdev(0x55b4b234e380 >>> > > /var/lib/ceph/osd/ceph-51/block) close >>> > > 2022-01-05 19:39:47 bb-ceph-enc-rm63-osd03-31 osd.51 >>> > > 2022-01-05T19:39:47.067+0000 7fca32943e00 0 starting osd.51 osd_data >>> > > /var/lib/ceph/osd/ceph-51 /var/lib/ceph/osd/ceph-51/journal >>> > > 2022-01-05 19:39:47 bb-ceph-enc-rm63-osd03-31 osd.51 >>> > > 2022-01-05T19:39:47.159+0000 7fca32943e00 0 load: jerasure load: lrc >>> > load: >>> > > isa >>> > > 2022-01-05 19:39:47 bb-ceph-enc-rm63-osd03-31 osd.51 >>> > > 2022-01-05T19:39:47.159+0000 7fca32943e00 1 bdev create path >>> > > /var/lib/ceph/osd/ceph-51/block type kernel >>> > > 2022-01-05 19:39:47 bb-ceph-enc-rm63-osd03-31 osd.51 >>> > > 2022-01-05T19:39:47.159+0000 7fca32943e00 1 bdev(0x55b4b234e000 >>> > > /var/lib/ceph/osd/ceph-51/block) open path >>> > /var/lib/ceph/osd/ceph-51/block >>> > > 2022-01-05 19:39:47 bb-ceph-enc-rm63-osd03-31 osd.51 >>> > > 2022-01-05T19:39:47.163+0000 7fca32943e00 1 bdev(0x55b4b234e000 >>> > > /var/lib/ceph/osd/ceph-51/block) open size 250056015872 >>> (0x3a38800000, >>> > 233 >>> > > GiB) block_size 4096 (4 KiB) non-rotational discard not supported >>> > > 2022-01-05 19:39:47 bb-ceph-enc-rm63-osd03-31 osd.51 >>> > > 2022-01-05T19:39:47.163+0000 7fca32943e00 1 >>> > > bluestore(/var/lib/ceph/osd/ceph-51) _set_cache_sizes cache_size >>> > 1073741824 >>> > > meta 0.4 kv 0.4 data 0.2 >>> > > 2022-01-05 19:39:47 bb-ceph-enc-rm63-osd03-31 osd.51 >>> > > 2022-01-05T19:39:47.163+0000 7fca32943e00 1 bdev(0x55b4b234e000 >>> > > /var/lib/ceph/osd/ceph-51/block) close >>> > > 2022-01-05 19:39:48 bb-ceph-enc-rm63-osd03-31 osd.51 >>> > > 2022-01-05T19:39:48.619+0000 7fca32943e00 1 >>> > > bluestore(/var/lib/ceph/osd/ceph-51) _open_alloc loaded 138 GiB in >>> 276582 >>> > > extents available 129 GiB >>> > > 2022-01-05 19:39:48 bb-ceph-enc-rm63-osd03-31 osd.51 >>> > > 2022-01-05T19:39:48.619+0000 7fca32943e00 1 bluefs umount >>> > > 2022-01-05 19:39:48 bb-ceph-enc-rm63-osd03-31 osd.51 >>> > > 2022-01-05T19:39:48.619+0000 7fca32943e00 1 bdev(0x55b4b234e380 >>> > > /var/lib/ceph/osd/ceph-51/block) close >>> > > 2022-01-05 19:39:48 bb-ceph-enc-rm63-osd03-31 osd.51 >>> > > 2022-01-05T19:39:48.803+0000 7fca32943e00 1 bdev create path >>> > > /var/lib/ceph/osd/ceph-51/block type kernel >>> > > 2022-01-05 19:39:48 bb-ceph-enc-rm63-osd03-31 osd.51 >>> > > 2022-01-05T19:39:48.803+0000 7fca32943e00 1 bdev(0x55b4b234e380 >>> > > /var/lib/ceph/osd/ceph-51/block) open path >>> > /var/lib/ceph/osd/ceph-51/block >>> > > 2022-01-05 19:39:48 bb-ceph-enc-rm63-osd03-31 osd.51 >>> > > 2022-01-05T19:39:48.803+0000 7fca32943e00 1 bdev(0x55b4b234e380 >>> > > /var/lib/ceph/osd/ceph-51/block) open size 250056015872 >>> (0x3a38800000, >>> > 233 >>> > > GiB) block_size 4096 (4 KiB) non-rotational discard not supported >>> > > 2022-01-05 19:39:48 bb-ceph-enc-rm63-osd03-31 osd.51 >>> > > 2022-01-05T19:39:48.803+0000 7fca32943e00 1 bluefs add_block_device >>> > bdev 1 >>> > > path /var/lib/ceph/osd/ceph-51/block size 233 GiB >>> > > 2022-01-05 19:39:48 bb-ceph-enc-rm63-osd03-31 osd.51 >>> > > 2022-01-05T19:39:48.803+0000 7fca32943e00 1 bluefs mount >>> > > 2022-01-05 19:39:49 bb-ceph-enc-rm63-osd03-31 osd.51 >>> > > 2022-01-05T19:39:49.087+0000 7fca32943e00 1 >>> > > bluestore(/var/lib/ceph/osd/ceph-51) _open_db opened rocksdb path db >>> > > options >>> > > >>> > >>> compression=kNoCompression,max_write_buffer_number=4,min_write_buffer_number_to_merge=1,recycle_log_file_num=4,write_buffer_size=268435456,writable_file_max_buffer_size=0,compaction_readahead_size=2097152,max_background_compactions=2 >>> > > 2022-01-05 19:39:49 bb-ceph-enc-rm63-osd03-31 osd.51 >>> > > 2022-01-05T19:39:49.087+0000 7fca32943e00 1 >>> > > bluestore(/var/lib/ceph/osd/ceph-51) _upgrade_super from 4, latest 4 >>> > > 2022-01-05 19:39:49 bb-ceph-enc-rm63-osd03-31 osd.51 >>> > > 2022-01-05T19:39:49.087+0000 7fca32943e00 1 >>> > > bluestore(/var/lib/ceph/osd/ceph-51) _upgrade_super done >>> > > 2022-01-05 19:39:49 bb-ceph-enc-rm63-osd03-31 osd.51 >>> > > 2022-01-05T19:39:49.131+0000 7fca32943e00 0 >>> > > /build/ceph-15.2.15/src/cls/cephfs/cls_cephfs.cc:198: loading cephfs >>> > > 2022-01-05 19:39:49 bb-ceph-enc-rm63-osd03-31 osd.51 >>> > > 2022-01-05T19:39:49.131+0000 7fca32943e00 0 >>> > > /build/ceph-15.2.15/src/cls/hello/cls_hello.cc:312: loading >>> cls_hello >>> > > 2022-01-05 19:39:49 bb-ceph-enc-rm63-osd03-31 osd.51 >>> > > 2022-01-05T19:39:49.135+0000 7fca32943e00 0 _get_class not >>> permitted to >>> > > load kvs >>> > > 2022-01-05 19:39:49 bb-ceph-enc-rm63-osd03-31 osd.51 >>> > > 2022-01-05T19:39:49.171+0000 7fca32943e00 0 _get_class not >>> permitted to >>> > > load lua >>> > > 2022-01-05 19:39:49 bb-ceph-enc-rm63-osd03-31 osd.51 >>> > > 2022-01-05T19:39:49.207+0000 7fca32943e00 0 _get_class not >>> permitted to >>> > > load queue >>> > > 2022-01-05 19:39:49 bb-ceph-enc-rm63-osd03-31 osd.51 >>> > > 2022-01-05T19:39:49.319+0000 7fca32943e00 0 _get_class not >>> permitted to >>> > > load sdk >>> > > 2022-01-05 19:39:49 bb-ceph-enc-rm63-osd03-31 osd.51 >>> > > 2022-01-05T19:39:49.319+0000 7fca32943e00 0 osd.51 24448261 crush >>> map >>> > has >>> > > features 288514051259236352, adjusting msgr requires for clients >>> > > 2022-01-05 19:39:49 bb-ceph-enc-rm63-osd03-31 osd.51 >>> > > 2022-01-05T19:39:49.319+0000 7fca32943e00 0 osd.51 24448261 crush >>> map >>> > has >>> > > features 288514051259236352 was 8705, adjusting msgr requires for >>> mons >>> > > 2022-01-05 19:39:49 bb-ceph-enc-rm63-osd03-31 osd.51 >>> > > 2022-01-05T19:39:49.319+0000 7fca32943e00 0 osd.51 24448261 crush >>> map >>> > has >>> > > features 3314933000852226048, adjusting msgr requires for osds >>> > > 2022-01-05 19:39:49 bb-ceph-enc-rm63-osd03-31 osd.51 >>> > > 2022-01-05T19:39:49.319+0000 7fca32943e00 1 osd.51 24448261 >>> > > check_osdmap_features require_osd_release unknown -> octopus >>> > > 2022-01-05 19:41:25 bb-ceph-enc-rm63-osd03-31 osd.51 >>> > > 2022-01-05T19:41:24.999+0000 7fca32943e00 0 osd.51 24448261 load_pgs >>> > > opened 66 pgs >>> > > 2022-01-05 19:41:25 bb-ceph-enc-rm63-osd03-31 osd.51 >>> > > 2022-01-05T19:41:25.071+0000 7fca32943e00 -1 osd.51 24448261 >>> > > log_to_monitors {default=true} >>> > > 2022-01-05 19:41:25 bb-ceph-enc-rm63-osd03-31 osd.51 >>> > > 2022-01-05T19:41:25.071+0000 7fca32943e00 -1 osd.51 24448261 >>> > > log_to_monitors {default=true} >>> > > 2022-01-05 19:42:16 bb-ceph-enc-rm63-osd03-31 osd.51 >>> > > 2022-01-05T19:42:16.631+0000 7fca32943e00 0 osd.51 24448261 done >>> with >>> > > init, starting boot process >>> > > 2022-01-05 19:42:16 bb-ceph-enc-rm63-osd03-31 osd.51 >>> > > 2022-01-05T19:42:16.631+0000 7fca32943e00 1 osd.51 24448261 >>> start_boot >>> > > 2022-01-05 19:42:16 bb-ceph-enc-rm63-osd03-31 osd.51 >>> > > 2022-01-05T19:42:16.635+0000 7fca14615700 1 osd.51 pg_epoch: >>> 24448130 >>> > > pg[44.17( v 24448128'27126321 (24447767'27121032,24448128'27126321] >>> > > local-lis/les=24447864/24447865 n=2356 ec=4550661/4550661 >>> > > lis/c=24447864/24447864 les/c/f=24447865/24447865/22709931 >>> sis=24448130) >>> > > [51,48,15] r=0 lpr=24448130 pi=[24447864,24448130)/1 >>> > crt=24448128'27126321 >>> > > lcod 0'0 mlcod 0'0 unknown mbc={}] start_peering_interval up >>> [51,48,15] >>> > -> >>> > > [51,48,15], acting [51,48,15] -> [51,48,15], acting_primary 51 -> 51, >>> > > up_primary 51 -> 51, role 0 -> 0, features acting 4540138292840890367 >>> > > upacting 4540138292840890367 >>> > > 2022-01-05 19:42:16 bb-ceph-enc-rm63-osd03-31 osd.51 >>> > > 2022-01-05T19:42:16.635+0000 7fca13613700 1 osd.51 pg_epoch: >>> 24448130 >>> > > pg[44.1( v 24448129'31648690 (24447777'31643388,24448129'31648690] >>> > > local-lis/les=24447865/24447866 n=2314 ec=4550661/4550661 >>> > > lis/c=24447865/24447865 les/c/f=24447866/24447866/22709931 >>> sis=24448130) >>> > > [51,15,5] r=0 lpr=24448130 pi=[24447865,24448130)/1 >>> crt=24448129'31648690 >>> > > lcod 0'0 mlcod 0'0 unknown mbc={}] start_peering_interval up >>> [51,15,5] -> >>> > > [51,15,5], acting [51,15,5] -> [51,15,5], acting_primary 51 -> 51, >>> > > up_primary 51 -> 51, role 0 -> 0, features acting 4540138292840890367 >>> > > upacting 4540138292840890367 >>> > > 2022-01-05 19:42:16 bb-ceph-enc-rm63-osd03-31 osd.51 >>> > > 2022-01-05T19:42:16.635+0000 7fca15617700 1 osd.51 pg_epoch: >>> 24448130 >>> > > pg[44.15( v 24448129'37939392 (24447777'37936883,24448129'37939392] >>> > > local-lis/les=24448118/24448119 n=2350 ec=4550661/4550661 >>> > > lis/c=24448118/24448118 les/c/f=24448119/24448119/22709931 >>> sis=24448130) >>> > > [5,14,51] r=2 lpr=24448130 pi=[24448118,24448130)/1 >>> crt=24448129'37939392 >>> > > lcod 0'0 mlcod 0'0 unknown mbc={}] start_peering_interval up >>> [5,14,51] -> >>> > > [5,14,51], acting [5,14,51] -> [5,14,51], acting_primary 5 -> 5, >>> > up_primary >>> > > 5 -> 5, role 2 -> 2, features acting 4540138292840890367 upacting >>> > > 4540138292840890367 >>> > > 2022-01-05 19:42:51 bb-ceph-enc-rm63-osd03-31 init.scope >>> > ceph-osd@51.service: >>> > > Main process exited, code=killed, status=9/KILL >>> > > 2022-01-05 19:42:51 bb-ceph-enc-rm63-osd03-31 init.scope >>> > ceph-osd@51.service: >>> > > Failed with result 'signal'. >>> > > 2022-01-05 19:43:01 bb-ceph-enc-rm63-osd03-31 init.scope >>> > ceph-osd@51.service: >>> > > Scheduled restart job, restart counter is at 2. >>> > > >>> > > >>> > > The problem I have this has basically taken the production and >>> metadata >>> > SSD >>> > > pool's down fully and all 3 copies are offline. And I cannot find a >>> way >>> > to >>> > > find out what is causing these to crash. >>> > > >>> > > Kind Regards >>> > > >>> > > Lee >>> > > _______________________________________________ >>> > > ceph-users mailing list -- ceph-users@xxxxxxx >>> > > To unsubscribe send an email to ceph-users-leave@xxxxxxx >>> > >>> _______________________________________________ >>> ceph-users mailing list -- ceph-users@xxxxxxx >>> To unsubscribe send an email to ceph-users-leave@xxxxxxx >>> >> _______________________________________________ ceph-users mailing list -- ceph-users@xxxxxxx To unsubscribe send an email to ceph-users-leave@xxxxxxx