Hi Eugen LV tags seem to look ok to me. LV_tags: ------------------------- root@zephir:~# lvs -a -o +devices,tags | egrep 'osd1| LV' | grep -v osd12 LV VG Attr LSize Pool Origin Data% Meta% Move Log Cpy%Sync Convert Devices LV Tags osd-block-cdd02721-6876-4db8-bdb2-12ac6c70127c ceph-dec5bd7c-d84f-40d9-ba14-6bd8aadf2957 -wi-ao---- 16.37t /dev/sde(0) ceph.block_device=/dev/ceph-de c5bd7c-d84f-40d9-ba14-6bd8aadf2957/osd-block-cdd02721-6876-4db8-bdb2-12ac6c70127c,ceph.block_uuid=tHvTi7-nkde-JCvt-USL1-52V7-ejIi-J14rkb,ceph.cephx_lockbox_secret=,ceph.cluster_fsid=27923302-87a5-11ec-ac5b-976d21a49941,ceph.cluster_name=ceph,c eph.crush_device_class=,ceph.db_device=/dev/optane/ceph-db-osd1,ceph.db_uuid=NfBaWG-ZnB2-1RV0-zRqJ-EbUe-v7ds-230OW9,ceph.encrypted=0,ceph.osd_fsid=cdd02721-6876-4db8-bdb2-12ac6c70127c,ceph.osd_id=1,ceph.osdspec_affinity=,ceph.type=block,ceph.v do=0 ceph-db-osd1 optane rwi-aor--- 50.00g 100.00 ceph-db-osd1_rimage_0(0),ceph-db-osd1_rimage_1(0) ceph.block_device=/dev/ceph-de c5bd7c-d84f-40d9-ba14-6bd8aadf2957/osd-block-cdd02721-6876-4db8-bdb2-12ac6c70127c,ceph.block_uuid=tHvTi7-nkde-JCvt-USL1-52V7-ejIi-J14rkb,ceph.cephx_lockbox_secret=,ceph.cluster_fsid=27923302-87a5-11ec-ac5b-976d21a49941,ceph.cluster_name=ceph,c eph.crush_device_class=,ceph.db_device=/dev/optane/ceph-db-osd1,ceph.db_uuid=NfBaWG-ZnB2-1RV0-zRqJ-EbUe-v7ds-230OW9,ceph.encrypted=0,ceph.osd_fsid=cdd02721-6876-4db8-bdb2-12ac6c70127c,ceph.osd_id=1,ceph.osdspec_affinity=,ceph.type=db,ceph.vdo= 0 [ceph-db-osd1_rimage_0] optane iwi-aor--- 50.00g /dev/sdi(9216) [ceph-db-osd1_rimage_0] optane iwi-aor--- 50.00g /dev/sdi(82518) [ceph-db-osd1_rimage_0] optane iwi-aor--- 50.00g /dev/sdi(55297) [ceph-db-osd1_rimage_1] optane iwi-aor--- 50.00g /dev/sdj(1) [ceph-db-osd1_rmeta_0] optane ewi-aor--- 4.00m /dev/sdi(46080) [ceph-db-osd1_rmeta_1] optane ewi-aor--- 4.00m /dev/sdj(0) root@zephir:~# ------------------ root@zephir:~# ceph osd metadata osd.1 { "id": 1, "arch": "x86_64", "back_addr": "[v2: 192.168.0.3:6832/1767446902,v1:192.168.0.3:6833/1767446902]", "back_iface": "", "bluefs": "1", "bluefs_db_access_mode": "blk", "bluefs_db_block_size": "4096", "bluefs_db_dev_node": "/dev/dm-13", "bluefs_db_devices": "sdi,sdj", "bluefs_db_driver": "KernelDevice", "bluefs_db_optimal_io_size": "0", "bluefs_db_partition_path": "/dev/dm-13", "bluefs_db_rotational": "0", "bluefs_db_size": "53687091200", "bluefs_db_support_discard": "1", "bluefs_db_type": "ssd", "bluefs_dedicated_db": "1", "bluefs_dedicated_wal": "0", "bluefs_single_shared_device": "0", "bluestore_bdev_access_mode": "blk", "bluestore_bdev_block_size": "4096", "bluestore_bdev_dev_node": "/dev/dm-1", "bluestore_bdev_devices": "sde", "bluestore_bdev_driver": "KernelDevice", "bluestore_bdev_optimal_io_size": "0", "bluestore_bdev_partition_path": "/dev/dm-1", "bluestore_bdev_rotational": "1", "bluestore_bdev_size": "18000203743232", "bluestore_bdev_support_discard": "0", "bluestore_bdev_type": "hdd", "bluestore_min_alloc_size": "4096", "ceph_release": "reef", "ceph_version": "ceph version 18.2.1 (7fe91d5d5842e04be3b4f514d6dd990c54b29c76) reef (stable)", "ceph_version_short": "18.2.1", "ceph_version_when_created": "ceph version 17.2.6 (d7ff0d10654d2280e08f1ab989c7cdf3064446a5) quincy (stable)", "container_hostname": "zephir", "container_image": "quay.io/ceph/ceph:v18.2.1", "cpu": "AMD Ryzen Threadripper PRO 3975WX 32-Cores", "created_at": "2023-05-08T16:42:46.313648Z", "default_device_class": "hdd", "device_ids": "sde=ATA_ST18000NM000J-2T_ZR53Z55N,sdi=NVME_INTEL_SSDPF21Q40_PHAL1505013J400BGN,sdj=NVME_INTEL_SSDPF21Q80_PHAL2165005L800CGN", "device_paths": "sde=/dev/disk/by-path/pci-0000:30:00.0-sas-0x30000d1e0129e385-lun-0,sdi=/dev/disk/by-path/pci-0000:30:00.0-sas-0x5cd2e439fb210500-lun-0,sdj=/dev/disk/by-path/pci-0000:30:00.0-sas-0x5cd2e4abd7530500-lun-0", "devices": "sde,sdi,sdj", "distro": "centos", "distro_description": "CentOS Stream 8", "distro_version": "8", "front_addr": "[v2: 192.168.1.1:6838/1767446902,v1:192.168.1.1:6839/1767446902]", "front_iface": "", "hb_back_addr": "[v2: 192.168.0.3:6834/1767446902,v1:192.168.0.3:6835/1767446902]", "hb_front_addr": "[v2: 192.168.1.1:6840/1767446902,v1:192.168.1.1:6841/1767446902]", "hostname": "zephir", "journal_rotational": "0", "kernel_description": "#1 SMP PREEMPT_DYNAMIC Debian 6.1.69-1 (2023-12-30)", "kernel_version": "6.1.0-17-amd64", "mem_swap_kb": "249999356", "mem_total_kb": "263932564", "network_numa_unknown_ifaces": "back_iface,front_iface", "objectstore_numa_unknown_devices": "sde,sdi,sdj", "os": "Linux", "osd_data": "/var/lib/ceph/osd/ceph-1", "osd_objectstore": "bluestore", "osdspec_affinity": "", "rotational": "1" } root@zephir:~# As I wrote shortly before I saw your response, the problem happens if the osd gets activated by RAWActivate instead of LVMActivate. As a workaround I changed the osd container unit.run file to use ceph-volume lvm activate instead of ceph-volume activate, as ceph-volume lvm activate works correctly: #/usr/bin/podman run --rm --ipc=host --stop-signal=SIGTERM --net=host --entrypoint /usr/sbin/ceph-volume --privileged --group-add=disk --init --name ceph-27923302-87a5-11ec-ac5b-976d21a49941-osd-1-activate -e CONTAINER_IMAGE=quay.io/ceph/ceph: v18.2.1 -e NODE_NAME=zephir -e CEPH_USE_RANDOM_NONCE=1 -e CEPH_VOLUME_SKIP_RESTORECON=yes -e CEPH_VOLUME_DEBUG=1 -v /var/run/ceph/27923302-87a5-11ec-ac5b-976d21a49941:/var/run/ceph:z -v /var/log/ceph/27923302-87a5-11ec-ac5b-976d21a49941:/var/l og/ceph:z -v /var/lib/ceph/27923302-87a5-11ec-ac5b-976d21a49941/crash:/var/lib/ceph/crash:z -v /run/systemd/journal:/run/systemd/journal -v /var/lib/ceph/27923302-87a5-11ec-ac5b-976d21a49941/osd.1:/var/lib/ceph/osd/ceph-1:z -v /var/lib/ceph/27 923302-87a5-11ec-ac5b-976d21a49941/osd.1/config:/etc/ceph/ceph.conf:z -v /dev:/dev -v /run/udev:/run/udev -v /sys:/sys -v /run/lvm:/run/lvm -v /run/lock/lvm:/run/lock/lvm -v /:/rootfs -v /etc/hosts:/etc/hosts:ro quay.io/ceph/ceph:v18.2.1 activ ate --osd-id 1 --osd-uuid cdd02721-6876-4db8-bdb2-12ac6c70127c --no-systemd --no-tmpfs /usr/bin/podman run --rm --ipc=host --stop-signal=SIGTERM --net=host --entrypoint /usr/sbin/ceph-volume --privileged --group-add=disk --init --name ceph-27923302-87a5-11ec-ac5b-976d21a49941-osd-1-activate -e CONTAINER_IMAGE=quay.io/ceph/ceph:v 18.2.1 -e NODE_NAME=zephir -e CEPH_USE_RANDOM_NONCE=1 -e CEPH_VOLUME_SKIP_RESTORECON=yes -e CEPH_VOLUME_DEBUG=1 -v /var/run/ceph/27923302-87a5-11ec-ac5b-976d21a49941:/var/run/ceph:z -v /var/log/ceph/27923302-87a5-11ec-ac5b-976d21a49941:/var/lo g/ceph:z -v /var/lib/ceph/27923302-87a5-11ec-ac5b-976d21a49941/crash:/var/lib/ceph/crash:z -v /run/systemd/journal:/run/systemd/journal -v /var/lib/ceph/27923302-87a5-11ec-ac5b-976d21a49941/osd.1:/var/lib/ceph/osd/ceph-1:z -v /var/lib/ceph/279 23302-87a5-11ec-ac5b-976d21a49941/osd.1/config:/etc/ceph/ceph.conf:z -v /dev:/dev -v /run/udev:/run/udev -v /sys:/sys -v /run/lvm:/run/lvm -v /run/lock/lvm:/run/lock/lvm -v /:/rootfs -v /etc/hosts:/etc/hosts:ro quay.io/ceph/ceph:v18.2.1 lvm ac tivate 1 cdd02721-6876-4db8-bdb2-12ac6c70127c --no-systemd --no-tmpfs but this will get overwritten if the osd container gets redeployed. Reagrds Reto Am Do., 11. Jan. 2024 um 16:09 Uhr schrieb Eugen Block <eblock@xxxxxx>: > Hi, > > I don't really have any advice but I'm curious how the LV tags look > like (lvs -o lv_tags). Do they point to the correct LVs for the > block.db? Does the 'ceph osd metadata <OSD>' show anything weird? Is > there something useful in the ceph-volume.log > (/var/log/ceph/{FSID}/ceph-volume.log)? > > Regards, > Eugen > > Zitat von Reto Gysi <rlgysi@xxxxxxxxx>: > > > Hi ceph community > > > > I noticed the following problem after upgrading my ceph instance on > Debian > > 12.4 from 17.2.7 to 18.2.1: > > > > I had placed bluestore block.db for hdd osd's on raid1/mirrored logical > > volumes on 2 nvme devices, so that if a single block.db nvme device > fails, > > that not all hdd osds fail. > > That worked fine under 17.2.7 and had no problems during host/osd > restarts. > > During the upgrade to 18.2.1 the osd's wouldn't with the block.db on > > mirrored lv wouldn't start anymore because the block.db symlink was > updated > > to pointing to the wrong device mapper device, and the osd startup failed > > with error message that block.db device is busy. > > > > OSD1: > > 2024-01-05T19:56:43.592+0000 7fdde9f43640 -1 > > bluestore(/var/lib/ceph/osd/ceph-1) _minimal_open_bluefs add block > > device(/var/lib/ceph/osd/ceph-1/block.db) returned: (16) Device or > resource > > busy > > 2024-01-05T19:56:43.592+0000 7fdde9f43640 -1 > > bluestore(/var/lib/ceph/osd/ceph-1) _open_db failed to prepare db > > environment: > > 2024-01-05T19:56:43.592+0000 7fdde9f43640 1 bdev(0x55a2d5014000 > > /var/lib/ceph/osd/ceph-1/block) close > > 2024-01-05T19:56:43.892+0000 7fdde9f43640 -1 osd.1 0 OSD:init: unable to > > mount object store > > > > the symlink was updated to point to > > lrwxrwxrwx 1 ceph ceph 111 Jan 5 20:57 block -> > > > /dev/mapper/ceph--dec5bd7c--d84f--40d9--ba14--6bd8aadf2957-osd--block--cdd02721--6876--4db8--bdb2--12ac6c70127c > > > > lrwxrwxrwx 1 ceph ceph 48 Jan 5 20:57 block.db -> > > /dev/mapper/optane-ceph--db--osd1_rimage_1_iorig > > > > the correct symlink would have been: > > lrwxrwxrwx 1 ceph ceph 111 Jan 5 20:57 block -> > > > /dev/mapper/ceph--dec5bd7c--d84f--40d9--ba14--6bd8aadf2957-osd--block--cdd02721--6876--4db8--bdb2--12ac6c70127c > > > > lrwxrwxrwx 1 ceph ceph 48 Jan 5 20:57 block.db -> > > /dev/mapper/optane-ceph--db--osd1 > > > > > > To continue with the upgrade I converted one by one all the block.db lvm > > logical volumes back to linear volumes, and fixed the symlinks manually. > > converting the lv's back to linear was necessary, because even when I > fixed > > the symlink manually, after a osd restart the symlink would be created > > wrong again if the block.db would point to a raid1 lv. > > > > Here's any example how the symlink looked before an osd was touched by > the > > 18.2.1 upgrade: > > OSD2: > > lrwxrwxrwx 1 ceph ceph 93 Jan 4 03:38 block -> > > > /dev/ceph-17a894d6-3a64-4e5e-9fa0-8dd3b5f4bf33/osd-block-3cd7a5af-9002-47a7-b4c2-540381d53be7 > > > > lrwxrwxrwx 1 ceph ceph 24 Jan 4 03:38 block.db -> > > /dev/optane/ceph-db-osd2 > > > > > > Here's what the output of lvs -a -o +devices looked like for OSD1 > block.db > > device when it was an raid1 lv: > > > > LV VG > > Attr LSize Pool Origin > > Data% Meta% Move Log Cpy%Sync Convert Devices > > > > ceph-db-osd1 optane > > rwi-a-r--- 44.00g > > 100.00 > > ceph-db-osd1_rimage_0(0),ceph-db-osd1_rimage_1(0) > > > > [ceph-db-osd1_rimage_0] optane > > gwi-aor--- 44.00g > > [ceph-db-osd1_rimage_0_iorig] 100.00 > > ceph-db-osd1_rimage_0_iorig(0) > > [ceph-db-osd1_rimage_0_imeta] optane > > ewi-ao---- 428.00m > > > > /dev/sdg(55482) > > > > [ceph-db-osd1_rimage_0_imeta] optane > > ewi-ao---- 428.00m > > > > /dev/sdg(84566) > > > > [ceph-db-osd1_rimage_0_iorig] optane > > -wi-ao---- 44.00g > > > > /dev/sdg(9216) > > > > [ceph-db-osd1_rimage_0_iorig] optane > > -wi-ao---- 44.00g > > > > /dev/sdg(82518) > > > > [ceph-db-osd1_rimage_1] optane > > gwi-aor--- 44.00g > > [ceph-db-osd1_rimage_1_iorig] 100.00 > > ceph-db-osd1_rimage_1_iorig(0) > > [ceph-db-osd1_rimage_1_imeta] optane > > ewi-ao---- 428.00m > > > > /dev/sdj(55392) > > > > [ceph-db-osd1_rimage_1_imeta] optane > > ewi-ao---- 428.00m > > > > /dev/sdj(75457) > > > > [ceph-db-osd1_rimage_1_iorig] optane > > -wi-ao---- 44.00g > > > > /dev/sdj(9218) > > > > [ceph-db-osd1_rimage_1_iorig] optane > > -wi-ao---- 44.00g > > > > /dev/sdj(73409) > > > > [ceph-db-osd1_rmeta_0] optane > > ewi-aor--- 4.00m > > > > /dev/sdg(55388) > > > > [ceph-db-osd1_rmeta_1] optane > > ewi-aor--- 4.00m > > > > /dev/sdj(9217) > > > > > > > > > > It would be good if the symlinks were recreated pointing to the correct > > device even when they point to a raid1 lv. > > Not sure if this problem has been reported yet. > > > > > > Cheers > > > > Reto > > _______________________________________________ > > ceph-users mailing list -- ceph-users@xxxxxxx > > To unsubscribe send an email to ceph-users-leave@xxxxxxx > > > _______________________________________________ > ceph-users mailing list -- ceph-users@xxxxxxx > To unsubscribe send an email to ceph-users-leave@xxxxxxx > _______________________________________________ ceph-users mailing list -- ceph-users@xxxxxxx To unsubscribe send an email to ceph-users-leave@xxxxxxx