Hi Eugen
LV tags seem to look ok to me.
LV_tags:
-------------------------
root@zephir:~# lvs -a -o +devices,tags | egrep 'osd1| LV' | grep -v osd12
LV VG
Attr LSize Pool Origin
Data% Meta% Move Log Cpy%Sync Convert Devices
LV Tags
osd-block-cdd02721-6876-4db8-bdb2-12ac6c70127c
ceph-dec5bd7c-d84f-40d9-ba14-6bd8aadf2957 -wi-ao---- 16.37t
/dev/sde(0)
ceph.block_device=/dev/ceph-de
c5bd7c-d84f-40d9-ba14-6bd8aadf2957/osd-block-cdd02721-6876-4db8-bdb2-12ac6c70127c,ceph.block_uuid=tHvTi7-nkde-JCvt-USL1-52V7-ejIi-J14rkb,ceph.cephx_lockbox_secret=,ceph.cluster_fsid=27923302-87a5-11ec-ac5b-976d21a49941,ceph.cluster_name=ceph,c
eph.crush_device_class=,ceph.db_device=/dev/optane/ceph-db-osd1,ceph.db_uuid=NfBaWG-ZnB2-1RV0-zRqJ-EbUe-v7ds-230OW9,ceph.encrypted=0,ceph.osd_fsid=cdd02721-6876-4db8-bdb2-12ac6c70127c,ceph.osd_id=1,ceph.osdspec_affinity=,ceph.type=block,ceph.v
do=0
ceph-db-osd1 optane
rwi-aor--- 50.00g
100.00
ceph-db-osd1_rimage_0(0),ceph-db-osd1_rimage_1(0)
ceph.block_device=/dev/ceph-de
c5bd7c-d84f-40d9-ba14-6bd8aadf2957/osd-block-cdd02721-6876-4db8-bdb2-12ac6c70127c,ceph.block_uuid=tHvTi7-nkde-JCvt-USL1-52V7-ejIi-J14rkb,ceph.cephx_lockbox_secret=,ceph.cluster_fsid=27923302-87a5-11ec-ac5b-976d21a49941,ceph.cluster_name=ceph,c
eph.crush_device_class=,ceph.db_device=/dev/optane/ceph-db-osd1,ceph.db_uuid=NfBaWG-ZnB2-1RV0-zRqJ-EbUe-v7ds-230OW9,ceph.encrypted=0,ceph.osd_fsid=cdd02721-6876-4db8-bdb2-12ac6c70127c,ceph.osd_id=1,ceph.osdspec_affinity=,ceph.type=db,ceph.vdo=
[ceph-db-osd1_rimage_0] optane
iwi-aor--- 50.00g
/dev/sdi(9216)
[ceph-db-osd1_rimage_0] optane
iwi-aor--- 50.00g
/dev/sdi(82518)
[ceph-db-osd1_rimage_0] optane
iwi-aor--- 50.00g
/dev/sdi(55297)
[ceph-db-osd1_rimage_1] optane
iwi-aor--- 50.00g
/dev/sdj(1)
[ceph-db-osd1_rmeta_0] optane
ewi-aor--- 4.00m
/dev/sdi(46080)
[ceph-db-osd1_rmeta_1] optane
ewi-aor--- 4.00m
/dev/sdj(0)
root@zephir:~#
------------------
root@zephir:~# ceph osd metadata osd.1
{
"id": 1,
"arch": "x86_64",
"back_addr": "[v2:
192.168.0.3:6832/1767446902,v1:192.168.0.3:6833/1767446902]",
"back_iface": "",
"bluefs": "1",
"bluefs_db_access_mode": "blk",
"bluefs_db_block_size": "4096",
"bluefs_db_dev_node": "/dev/dm-13",
"bluefs_db_devices": "sdi,sdj",
"bluefs_db_driver": "KernelDevice",
"bluefs_db_optimal_io_size": "0",
"bluefs_db_partition_path": "/dev/dm-13",
"bluefs_db_rotational": "0",
"bluefs_db_size": "53687091200",
"bluefs_db_support_discard": "1",
"bluefs_db_type": "ssd",
"bluefs_dedicated_db": "1",
"bluefs_dedicated_wal": "0",
"bluefs_single_shared_device": "0",
"bluestore_bdev_access_mode": "blk",
"bluestore_bdev_block_size": "4096",
"bluestore_bdev_dev_node": "/dev/dm-1",
"bluestore_bdev_devices": "sde",
"bluestore_bdev_driver": "KernelDevice",
"bluestore_bdev_optimal_io_size": "0",
"bluestore_bdev_partition_path": "/dev/dm-1",
"bluestore_bdev_rotational": "1",
"bluestore_bdev_size": "18000203743232",
"bluestore_bdev_support_discard": "0",
"bluestore_bdev_type": "hdd",
"bluestore_min_alloc_size": "4096",
"ceph_release": "reef",
"ceph_version": "ceph version 18.2.1
(7fe91d5d5842e04be3b4f514d6dd990c54b29c76) reef (stable)",
"ceph_version_short": "18.2.1",
"ceph_version_when_created": "ceph version 17.2.6
(d7ff0d10654d2280e08f1ab989c7cdf3064446a5) quincy (stable)",
"container_hostname": "zephir",
"container_image": "quay.io/ceph/ceph:v18.2.1",
"cpu": "AMD Ryzen Threadripper PRO 3975WX 32-Cores",
"created_at": "2023-05-08T16:42:46.313648Z",
"default_device_class": "hdd",
"device_ids":
"sde=ATA_ST18000NM000J-2T_ZR53Z55N,sdi=NVME_INTEL_SSDPF21Q40_PHAL1505013J400BGN,sdj=NVME_INTEL_SSDPF21Q80_PHAL2165005L800CGN",
"device_paths":
"sde=/dev/disk/by-path/pci-0000:30:00.0-sas-0x30000d1e0129e385-lun-0,sdi=/dev/disk/by-path/pci-0000:30:00.0-sas-0x5cd2e439fb210500-lun-0,sdj=/dev/disk/by-path/pci-0000:30:00.0-sas-0x5cd2e4abd7530500-lun-0",
"devices": "sde,sdi,sdj",
"distro": "centos",
"distro_description": "CentOS Stream 8",
"distro_version": "8",
"front_addr": "[v2:
192.168.1.1:6838/1767446902,v1:192.168.1.1:6839/1767446902]",
"front_iface": "",
"hb_back_addr": "[v2:
192.168.0.3:6834/1767446902,v1:192.168.0.3:6835/1767446902]",
"hb_front_addr": "[v2:
192.168.1.1:6840/1767446902,v1:192.168.1.1:6841/1767446902]",
"hostname": "zephir",
"journal_rotational": "0",
"kernel_description": "#1 SMP PREEMPT_DYNAMIC Debian 6.1.69-1
(2023-12-30)",
"kernel_version": "6.1.0-17-amd64",
"mem_swap_kb": "249999356",
"mem_total_kb": "263932564",
"network_numa_unknown_ifaces": "back_iface,front_iface",
"objectstore_numa_unknown_devices": "sde,sdi,sdj",
"os": "Linux",
"osd_data": "/var/lib/ceph/osd/ceph-1",
"osd_objectstore": "bluestore",
"osdspec_affinity": "",
"rotational": "1"
}
root@zephir:~#
As I wrote shortly before I saw your response, the problem happens if the
osd gets activated by RAWActivate instead of LVMActivate.
As a workaround I changed the osd container unit.run file to use
ceph-volume lvm activate instead of ceph-volume activate, as ceph-volume
lvm activate works correctly:
#/usr/bin/podman run --rm --ipc=host --stop-signal=SIGTERM --net=host
--entrypoint /usr/sbin/ceph-volume --privileged --group-add=disk --init
--name ceph-27923302-87a5-11ec-ac5b-976d21a49941-osd-1-activate -e
CONTAINER_IMAGE=quay.io/ceph/ceph:
v18.2.1 -e NODE_NAME=zephir -e CEPH_USE_RANDOM_NONCE=1 -e
CEPH_VOLUME_SKIP_RESTORECON=yes -e CEPH_VOLUME_DEBUG=1 -v
/var/run/ceph/27923302-87a5-11ec-ac5b-976d21a49941:/var/run/ceph:z -v
/var/log/ceph/27923302-87a5-11ec-ac5b-976d21a49941:/var/l
og/ceph:z -v
/var/lib/ceph/27923302-87a5-11ec-ac5b-976d21a49941/crash:/var/lib/ceph/crash:z
-v /run/systemd/journal:/run/systemd/journal -v
/var/lib/ceph/27923302-87a5-11ec-ac5b-976d21a49941/osd.1:/var/lib/ceph/osd/ceph-1:z
-v /var/lib/ceph/27
923302-87a5-11ec-ac5b-976d21a49941/osd.1/config:/etc/ceph/ceph.conf:z -v
/dev:/dev -v /run/udev:/run/udev -v /sys:/sys -v /run/lvm:/run/lvm -v
/run/lock/lvm:/run/lock/lvm -v /:/rootfs -v /etc/hosts:/etc/hosts:ro
quay.io/ceph/ceph:v18.2.1 activ
ate --osd-id 1 --osd-uuid cdd02721-6876-4db8-bdb2-12ac6c70127c --no-systemd
--no-tmpfs
/usr/bin/podman run --rm --ipc=host --stop-signal=SIGTERM --net=host
--entrypoint /usr/sbin/ceph-volume --privileged --group-add=disk --init
--name ceph-27923302-87a5-11ec-ac5b-976d21a49941-osd-1-activate -e
CONTAINER_IMAGE=quay.io/ceph/ceph:v
18.2.1 -e NODE_NAME=zephir -e CEPH_USE_RANDOM_NONCE=1 -e
CEPH_VOLUME_SKIP_RESTORECON=yes -e CEPH_VOLUME_DEBUG=1 -v
/var/run/ceph/27923302-87a5-11ec-ac5b-976d21a49941:/var/run/ceph:z -v
/var/log/ceph/27923302-87a5-11ec-ac5b-976d21a49941:/var/lo
g/ceph:z -v
/var/lib/ceph/27923302-87a5-11ec-ac5b-976d21a49941/crash:/var/lib/ceph/crash:z
-v /run/systemd/journal:/run/systemd/journal -v
/var/lib/ceph/27923302-87a5-11ec-ac5b-976d21a49941/osd.1:/var/lib/ceph/osd/ceph-1:z
-v /var/lib/ceph/279
23302-87a5-11ec-ac5b-976d21a49941/osd.1/config:/etc/ceph/ceph.conf:z -v
/dev:/dev -v /run/udev:/run/udev -v /sys:/sys -v /run/lvm:/run/lvm -v
/run/lock/lvm:/run/lock/lvm -v /:/rootfs -v /etc/hosts:/etc/hosts:ro
quay.io/ceph/ceph:v18.2.1 lvm ac
tivate 1 cdd02721-6876-4db8-bdb2-12ac6c70127c --no-systemd --no-tmpfs
but this will get overwritten if the osd container gets redeployed.
Reagrds
Reto
Am Do., 11. Jan. 2024 um 16:09 Uhr schrieb Eugen Block <eblock@xxxxxx>:
Hi,
I don't really have any advice but I'm curious how the LV tags look
like (lvs -o lv_tags). Do they point to the correct LVs for the
block.db? Does the 'ceph osd metadata <OSD>' show anything weird? Is
there something useful in the ceph-volume.log
(/var/log/ceph/{FSID}/ceph-volume.log)?
Regards,
Eugen
Zitat von Reto Gysi <rlgysi@xxxxxxxxx>:
> Hi ceph community
>
> I noticed the following problem after upgrading my ceph instance on
Debian
> 12.4 from 17.2.7 to 18.2.1:
>
> I had placed bluestore block.db for hdd osd's on raid1/mirrored logical
> volumes on 2 nvme devices, so that if a single block.db nvme device
fails,
> that not all hdd osds fail.
> That worked fine under 17.2.7 and had no problems during host/osd
restarts.
> During the upgrade to 18.2.1 the osd's wouldn't with the block.db on
> mirrored lv wouldn't start anymore because the block.db symlink was
updated
> to pointing to the wrong device mapper device, and the osd startup failed
> with error message that block.db device is busy.
>
> OSD1:
> 2024-01-05T19:56:43.592+0000 7fdde9f43640 -1
> bluestore(/var/lib/ceph/osd/ceph-1) _minimal_open_bluefs add block
> device(/var/lib/ceph/osd/ceph-1/block.db) returned: (16) Device or
resource
> busy
> 2024-01-05T19:56:43.592+0000 7fdde9f43640 -1
> bluestore(/var/lib/ceph/osd/ceph-1) _open_db failed to prepare db
> environment:
> 2024-01-05T19:56:43.592+0000 7fdde9f43640 1 bdev(0x55a2d5014000
> /var/lib/ceph/osd/ceph-1/block) close
> 2024-01-05T19:56:43.892+0000 7fdde9f43640 -1 osd.1 0 OSD:init: unable to
> mount object store
>
> the symlink was updated to point to
> lrwxrwxrwx 1 ceph ceph 111 Jan 5 20:57 block ->
>
/dev/mapper/ceph--dec5bd7c--d84f--40d9--ba14--6bd8aadf2957-osd--block--cdd02721--6876--4db8--bdb2--12ac6c70127c
>
> lrwxrwxrwx 1 ceph ceph 48 Jan 5 20:57 block.db ->
> /dev/mapper/optane-ceph--db--osd1_rimage_1_iorig
>
> the correct symlink would have been:
> lrwxrwxrwx 1 ceph ceph 111 Jan 5 20:57 block ->
>
/dev/mapper/ceph--dec5bd7c--d84f--40d9--ba14--6bd8aadf2957-osd--block--cdd02721--6876--4db8--bdb2--12ac6c70127c
>
> lrwxrwxrwx 1 ceph ceph 48 Jan 5 20:57 block.db ->
> /dev/mapper/optane-ceph--db--osd1
>
>
> To continue with the upgrade I converted one by one all the block.db lvm
> logical volumes back to linear volumes, and fixed the symlinks manually.
> converting the lv's back to linear was necessary, because even when I
fixed
> the symlink manually, after a osd restart the symlink would be created
> wrong again if the block.db would point to a raid1 lv.
>
> Here's any example how the symlink looked before an osd was touched by
the
> 18.2.1 upgrade:
> OSD2:
> lrwxrwxrwx 1 ceph ceph 93 Jan 4 03:38 block ->
>
/dev/ceph-17a894d6-3a64-4e5e-9fa0-8dd3b5f4bf33/osd-block-3cd7a5af-9002-47a7-b4c2-540381d53be7
>
> lrwxrwxrwx 1 ceph ceph 24 Jan 4 03:38 block.db ->
> /dev/optane/ceph-db-osd2
>
>
> Here's what the output of lvs -a -o +devices looked like for OSD1
block.db
> device when it was an raid1 lv:
>
> LV VG
> Attr LSize Pool Origin
> Data% Meta% Move Log Cpy%Sync Convert Devices
>
> ceph-db-osd1 optane
> rwi-a-r--- 44.00g
> 100.00
> ceph-db-osd1_rimage_0(0),ceph-db-osd1_rimage_1(0)
>
> [ceph-db-osd1_rimage_0] optane
> gwi-aor--- 44.00g
> [ceph-db-osd1_rimage_0_iorig] 100.00
> ceph-db-osd1_rimage_0_iorig(0)
> [ceph-db-osd1_rimage_0_imeta] optane
> ewi-ao---- 428.00m
>
> /dev/sdg(55482)
>
> [ceph-db-osd1_rimage_0_imeta] optane
> ewi-ao---- 428.00m
>
> /dev/sdg(84566)
>
> [ceph-db-osd1_rimage_0_iorig] optane
> -wi-ao---- 44.00g
>
> /dev/sdg(9216)
>
> [ceph-db-osd1_rimage_0_iorig] optane
> -wi-ao---- 44.00g
>
> /dev/sdg(82518)
>
> [ceph-db-osd1_rimage_1] optane
> gwi-aor--- 44.00g
> [ceph-db-osd1_rimage_1_iorig] 100.00
> ceph-db-osd1_rimage_1_iorig(0)
> [ceph-db-osd1_rimage_1_imeta] optane
> ewi-ao---- 428.00m
>
> /dev/sdj(55392)
>
> [ceph-db-osd1_rimage_1_imeta] optane
> ewi-ao---- 428.00m
>
> /dev/sdj(75457)
>
> [ceph-db-osd1_rimage_1_iorig] optane
> -wi-ao---- 44.00g
>
> /dev/sdj(9218)
>
> [ceph-db-osd1_rimage_1_iorig] optane
> -wi-ao---- 44.00g
>
> /dev/sdj(73409)
>
> [ceph-db-osd1_rmeta_0] optane
> ewi-aor--- 4.00m
>
> /dev/sdg(55388)
>
> [ceph-db-osd1_rmeta_1] optane
> ewi-aor--- 4.00m
>
> /dev/sdj(9217)
>
>
>
>
> It would be good if the symlinks were recreated pointing to the correct
> device even when they point to a raid1 lv.
> Not sure if this problem has been reported yet.
>
>
> Cheers
>
> Reto
> _______________________________________________
> ceph-users mailing list -- ceph-users@xxxxxxx
> To unsubscribe send an email to ceph-users-leave@xxxxxxx
_______________________________________________
ceph-users mailing list -- ceph-users@xxxxxxx
To unsubscribe send an email to ceph-users-leave@xxxxxxx