Re: [v18.2.1] problem with wrong osd device symlinks after upgrade to 18.2.1

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Did you find an existing tracker issue for that? I suggest to report your findings there.

Thanks!
Eugen

Zitat von Reto Gysi <rlgysi@xxxxxxxxx>:

Hi Eugen

LV tags seem to look ok to me.

LV_tags:
-------------------------
root@zephir:~# lvs -a -o +devices,tags  | egrep 'osd1|  LV' | grep -v osd12

 LV                                             VG
                                       Attr       LSize    Pool Origin
Data%  Meta%  Move Log Cpy%Sync Convert Devices
                                          LV Tags



 osd-block-cdd02721-6876-4db8-bdb2-12ac6c70127c
ceph-dec5bd7c-d84f-40d9-ba14-6bd8aadf2957 -wi-ao----   16.37t
                                                    /dev/sde(0)
                                      ceph.block_device=/dev/ceph-de
c5bd7c-d84f-40d9-ba14-6bd8aadf2957/osd-block-cdd02721-6876-4db8-bdb2-12ac6c70127c,ceph.block_uuid=tHvTi7-nkde-JCvt-USL1-52V7-ejIi-J14rkb,ceph.cephx_lockbox_secret=,ceph.cluster_fsid=27923302-87a5-11ec-ac5b-976d21a49941,ceph.cluster_name=ceph,c
eph.crush_device_class=,ceph.db_device=/dev/optane/ceph-db-osd1,ceph.db_uuid=NfBaWG-ZnB2-1RV0-zRqJ-EbUe-v7ds-230OW9,ceph.encrypted=0,ceph.osd_fsid=cdd02721-6876-4db8-bdb2-12ac6c70127c,ceph.osd_id=1,ceph.osdspec_affinity=,ceph.type=block,ceph.v
do=0
 ceph-db-osd1                                   optane
                                   rwi-aor---   50.00g
                                   100.00
          ceph-db-osd1_rimage_0(0),ceph-db-osd1_rimage_1(0)
ceph.block_device=/dev/ceph-de
c5bd7c-d84f-40d9-ba14-6bd8aadf2957/osd-block-cdd02721-6876-4db8-bdb2-12ac6c70127c,ceph.block_uuid=tHvTi7-nkde-JCvt-USL1-52V7-ejIi-J14rkb,ceph.cephx_lockbox_secret=,ceph.cluster_fsid=27923302-87a5-11ec-ac5b-976d21a49941,ceph.cluster_name=ceph,c
eph.crush_device_class=,ceph.db_device=/dev/optane/ceph-db-osd1,ceph.db_uuid=NfBaWG-ZnB2-1RV0-zRqJ-EbUe-v7ds-230OW9,ceph.encrypted=0,ceph.osd_fsid=cdd02721-6876-4db8-bdb2-12ac6c70127c,ceph.osd_id=1,ceph.osdspec_affinity=,ceph.type=db,ceph.vdo=

 [ceph-db-osd1_rimage_0]                        optane
                                   iwi-aor---   50.00g
                                                    /dev/sdi(9216)




 [ceph-db-osd1_rimage_0]                        optane
                                   iwi-aor---   50.00g
                                                    /dev/sdi(82518)




 [ceph-db-osd1_rimage_0]                        optane
                                   iwi-aor---   50.00g
                                                    /dev/sdi(55297)




 [ceph-db-osd1_rimage_1]                        optane
                                   iwi-aor---   50.00g
                                                    /dev/sdj(1)




 [ceph-db-osd1_rmeta_0]                         optane
                                   ewi-aor---    4.00m
                                                    /dev/sdi(46080)




 [ceph-db-osd1_rmeta_1]                         optane
                                   ewi-aor---    4.00m
                                                    /dev/sdj(0)




root@zephir:~#
------------------


root@zephir:~# ceph osd metadata osd.1
{
   "id": 1,
   "arch": "x86_64",
   "back_addr": "[v2:
192.168.0.3:6832/1767446902,v1:192.168.0.3:6833/1767446902]",
   "back_iface": "",
   "bluefs": "1",
   "bluefs_db_access_mode": "blk",
   "bluefs_db_block_size": "4096",
   "bluefs_db_dev_node": "/dev/dm-13",
   "bluefs_db_devices": "sdi,sdj",
   "bluefs_db_driver": "KernelDevice",
   "bluefs_db_optimal_io_size": "0",
   "bluefs_db_partition_path": "/dev/dm-13",
   "bluefs_db_rotational": "0",
   "bluefs_db_size": "53687091200",
   "bluefs_db_support_discard": "1",
   "bluefs_db_type": "ssd",
   "bluefs_dedicated_db": "1",
   "bluefs_dedicated_wal": "0",
   "bluefs_single_shared_device": "0",
   "bluestore_bdev_access_mode": "blk",
   "bluestore_bdev_block_size": "4096",
   "bluestore_bdev_dev_node": "/dev/dm-1",
   "bluestore_bdev_devices": "sde",
   "bluestore_bdev_driver": "KernelDevice",
   "bluestore_bdev_optimal_io_size": "0",
   "bluestore_bdev_partition_path": "/dev/dm-1",
   "bluestore_bdev_rotational": "1",
   "bluestore_bdev_size": "18000203743232",
   "bluestore_bdev_support_discard": "0",
   "bluestore_bdev_type": "hdd",
   "bluestore_min_alloc_size": "4096",
   "ceph_release": "reef",
   "ceph_version": "ceph version 18.2.1
(7fe91d5d5842e04be3b4f514d6dd990c54b29c76) reef (stable)",
   "ceph_version_short": "18.2.1",
   "ceph_version_when_created": "ceph version 17.2.6
(d7ff0d10654d2280e08f1ab989c7cdf3064446a5) quincy (stable)",
   "container_hostname": "zephir",
   "container_image": "quay.io/ceph/ceph:v18.2.1",
   "cpu": "AMD Ryzen Threadripper PRO 3975WX 32-Cores",
   "created_at": "2023-05-08T16:42:46.313648Z",
   "default_device_class": "hdd",
   "device_ids":
"sde=ATA_ST18000NM000J-2T_ZR53Z55N,sdi=NVME_INTEL_SSDPF21Q40_PHAL1505013J400BGN,sdj=NVME_INTEL_SSDPF21Q80_PHAL2165005L800CGN",

   "device_paths":
"sde=/dev/disk/by-path/pci-0000:30:00.0-sas-0x30000d1e0129e385-lun-0,sdi=/dev/disk/by-path/pci-0000:30:00.0-sas-0x5cd2e439fb210500-lun-0,sdj=/dev/disk/by-path/pci-0000:30:00.0-sas-0x5cd2e4abd7530500-lun-0",

   "devices": "sde,sdi,sdj",
   "distro": "centos",
   "distro_description": "CentOS Stream 8",
   "distro_version": "8",
   "front_addr": "[v2:
192.168.1.1:6838/1767446902,v1:192.168.1.1:6839/1767446902]",
   "front_iface": "",
   "hb_back_addr": "[v2:
192.168.0.3:6834/1767446902,v1:192.168.0.3:6835/1767446902]",
   "hb_front_addr": "[v2:
192.168.1.1:6840/1767446902,v1:192.168.1.1:6841/1767446902]",
   "hostname": "zephir",
   "journal_rotational": "0",
   "kernel_description": "#1 SMP PREEMPT_DYNAMIC Debian 6.1.69-1
(2023-12-30)",
   "kernel_version": "6.1.0-17-amd64",
   "mem_swap_kb": "249999356",
   "mem_total_kb": "263932564",
   "network_numa_unknown_ifaces": "back_iface,front_iface",
   "objectstore_numa_unknown_devices": "sde,sdi,sdj",
   "os": "Linux",
   "osd_data": "/var/lib/ceph/osd/ceph-1",
   "osd_objectstore": "bluestore",
   "osdspec_affinity": "",
   "rotational": "1"
}
root@zephir:~#


As I wrote shortly before I saw your response, the problem happens if the
osd gets activated by RAWActivate instead of LVMActivate.
As a workaround I changed the osd container unit.run file to use
ceph-volume lvm activate instead of ceph-volume activate, as ceph-volume
lvm activate works correctly:

#/usr/bin/podman run --rm --ipc=host --stop-signal=SIGTERM --net=host
--entrypoint /usr/sbin/ceph-volume --privileged --group-add=disk --init
--name ceph-27923302-87a5-11ec-ac5b-976d21a49941-osd-1-activate -e
CONTAINER_IMAGE=quay.io/ceph/ceph:
v18.2.1 -e NODE_NAME=zephir -e CEPH_USE_RANDOM_NONCE=1 -e
CEPH_VOLUME_SKIP_RESTORECON=yes -e CEPH_VOLUME_DEBUG=1 -v
/var/run/ceph/27923302-87a5-11ec-ac5b-976d21a49941:/var/run/ceph:z -v
/var/log/ceph/27923302-87a5-11ec-ac5b-976d21a49941:/var/l
og/ceph:z -v
/var/lib/ceph/27923302-87a5-11ec-ac5b-976d21a49941/crash:/var/lib/ceph/crash:z
-v /run/systemd/journal:/run/systemd/journal -v
/var/lib/ceph/27923302-87a5-11ec-ac5b-976d21a49941/osd.1:/var/lib/ceph/osd/ceph-1:z
-v /var/lib/ceph/27
923302-87a5-11ec-ac5b-976d21a49941/osd.1/config:/etc/ceph/ceph.conf:z -v
/dev:/dev -v /run/udev:/run/udev -v /sys:/sys -v /run/lvm:/run/lvm -v
/run/lock/lvm:/run/lock/lvm -v /:/rootfs -v /etc/hosts:/etc/hosts:ro
quay.io/ceph/ceph:v18.2.1 activ
ate --osd-id 1 --osd-uuid cdd02721-6876-4db8-bdb2-12ac6c70127c --no-systemd
--no-tmpfs
/usr/bin/podman run --rm --ipc=host --stop-signal=SIGTERM --net=host
--entrypoint /usr/sbin/ceph-volume --privileged --group-add=disk --init
--name ceph-27923302-87a5-11ec-ac5b-976d21a49941-osd-1-activate -e
CONTAINER_IMAGE=quay.io/ceph/ceph:v
18.2.1 -e NODE_NAME=zephir -e CEPH_USE_RANDOM_NONCE=1 -e
CEPH_VOLUME_SKIP_RESTORECON=yes -e CEPH_VOLUME_DEBUG=1 -v
/var/run/ceph/27923302-87a5-11ec-ac5b-976d21a49941:/var/run/ceph:z -v
/var/log/ceph/27923302-87a5-11ec-ac5b-976d21a49941:/var/lo
g/ceph:z -v
/var/lib/ceph/27923302-87a5-11ec-ac5b-976d21a49941/crash:/var/lib/ceph/crash:z
-v /run/systemd/journal:/run/systemd/journal -v
/var/lib/ceph/27923302-87a5-11ec-ac5b-976d21a49941/osd.1:/var/lib/ceph/osd/ceph-1:z
-v /var/lib/ceph/279
23302-87a5-11ec-ac5b-976d21a49941/osd.1/config:/etc/ceph/ceph.conf:z -v
/dev:/dev -v /run/udev:/run/udev -v /sys:/sys -v /run/lvm:/run/lvm -v
/run/lock/lvm:/run/lock/lvm -v /:/rootfs -v /etc/hosts:/etc/hosts:ro
quay.io/ceph/ceph:v18.2.1 lvm ac
tivate 1 cdd02721-6876-4db8-bdb2-12ac6c70127c --no-systemd --no-tmpfs

but this will get overwritten if the osd container gets redeployed.

Reagrds

Reto

Am Do., 11. Jan. 2024 um 16:09 Uhr schrieb Eugen Block <eblock@xxxxxx>:

Hi,

I don't really have any advice but I'm curious how the LV tags look
like (lvs -o lv_tags). Do they point to the correct LVs for the
block.db? Does the 'ceph osd metadata <OSD>' show anything weird? Is
there something useful in the ceph-volume.log
(/var/log/ceph/{FSID}/ceph-volume.log)?

Regards,
Eugen

Zitat von Reto Gysi <rlgysi@xxxxxxxxx>:

> Hi ceph community
>
> I noticed the following problem after upgrading my ceph instance on
Debian
> 12.4 from 17.2.7 to 18.2.1:
>
> I had placed bluestore block.db for hdd osd's on raid1/mirrored logical
> volumes on 2 nvme devices, so that if a single block.db nvme device
fails,
> that not all hdd osds fail.
> That worked fine under 17.2.7 and had no problems during host/osd
restarts.
> During the upgrade to 18.2.1 the osd's wouldn't with the block.db on
> mirrored lv wouldn't start anymore because the block.db symlink was
updated
> to pointing to the wrong device mapper device, and the osd startup failed
> with error message that block.db device is busy.
>
> OSD1:
> 2024-01-05T19:56:43.592+0000 7fdde9f43640 -1
> bluestore(/var/lib/ceph/osd/ceph-1) _minimal_open_bluefs add block
> device(/var/lib/ceph/osd/ceph-1/block.db) returned: (16) Device or
resource
> busy
> 2024-01-05T19:56:43.592+0000 7fdde9f43640 -1
> bluestore(/var/lib/ceph/osd/ceph-1) _open_db failed to prepare db
> environment:
> 2024-01-05T19:56:43.592+0000 7fdde9f43640  1 bdev(0x55a2d5014000
> /var/lib/ceph/osd/ceph-1/block) close
> 2024-01-05T19:56:43.892+0000 7fdde9f43640 -1 osd.1 0 OSD:init: unable to
> mount object store
>
> the symlink was updated to point to
> lrwxrwxrwx 1 ceph ceph  111 Jan  5 20:57 block ->
>
/dev/mapper/ceph--dec5bd7c--d84f--40d9--ba14--6bd8aadf2957-osd--block--cdd02721--6876--4db8--bdb2--12ac6c70127c
>
> lrwxrwxrwx 1 ceph ceph   48 Jan  5 20:57 block.db ->
> /dev/mapper/optane-ceph--db--osd1_rimage_1_iorig
>
> the correct symlink would have been:
> lrwxrwxrwx 1 ceph ceph  111 Jan  5 20:57 block ->
>
/dev/mapper/ceph--dec5bd7c--d84f--40d9--ba14--6bd8aadf2957-osd--block--cdd02721--6876--4db8--bdb2--12ac6c70127c
>
> lrwxrwxrwx 1 ceph ceph   48 Jan  5 20:57 block.db ->
> /dev/mapper/optane-ceph--db--osd1
>
>
> To continue with the upgrade I converted one by one all the block.db lvm
> logical volumes back to linear volumes, and fixed the symlinks manually.
> converting the lv's back to linear was necessary, because even when I
fixed
> the symlink manually, after a osd restart the symlink would be created
> wrong again if the block.db would point to a raid1 lv.
>
> Here's any example how the symlink looked before an osd was touched by
the
> 18.2.1 upgrade:
> OSD2:
> lrwxrwxrwx 1 ceph ceph   93 Jan  4 03:38 block ->
>
/dev/ceph-17a894d6-3a64-4e5e-9fa0-8dd3b5f4bf33/osd-block-3cd7a5af-9002-47a7-b4c2-540381d53be7
>
> lrwxrwxrwx 1 ceph ceph   24 Jan  4 03:38 block.db ->
> /dev/optane/ceph-db-osd2
>
>
> Here's what the output of lvs -a -o +devices looked like for OSD1
block.db
> device when it was an raid1 lv:
>
>   LV                                             VG
>                                        Attr       LSize    Pool Origin
>                         Data%  Meta%  Move Log Cpy%Sync Convert Devices
>
>  ceph-db-osd1                                   optane
>                                    rwi-a-r---   44.00g
>                                                            100.00
>           ceph-db-osd1_rimage_0(0),ceph-db-osd1_rimage_1(0)
>
>  [ceph-db-osd1_rimage_0]                        optane
>                                    gwi-aor---   44.00g
>      [ceph-db-osd1_rimage_0_iorig]                         100.00
>           ceph-db-osd1_rimage_0_iorig(0)
>  [ceph-db-osd1_rimage_0_imeta]                  optane
>                                    ewi-ao----  428.00m
>
>      /dev/sdg(55482)
>
>  [ceph-db-osd1_rimage_0_imeta]                  optane
>                                    ewi-ao----  428.00m
>
>      /dev/sdg(84566)
>
>  [ceph-db-osd1_rimage_0_iorig]                  optane
>                                    -wi-ao----   44.00g
>
>      /dev/sdg(9216)
>
>  [ceph-db-osd1_rimage_0_iorig]                  optane
>                                    -wi-ao----   44.00g
>
>      /dev/sdg(82518)
>
>  [ceph-db-osd1_rimage_1]                        optane
>                                    gwi-aor---   44.00g
>      [ceph-db-osd1_rimage_1_iorig]                         100.00
>           ceph-db-osd1_rimage_1_iorig(0)
>  [ceph-db-osd1_rimage_1_imeta]                  optane
>                                    ewi-ao----  428.00m
>
>      /dev/sdj(55392)
>
>  [ceph-db-osd1_rimage_1_imeta]                  optane
>                                    ewi-ao----  428.00m
>
>      /dev/sdj(75457)
>
>  [ceph-db-osd1_rimage_1_iorig]                  optane
>                                    -wi-ao----   44.00g
>
>      /dev/sdj(9218)
>
>  [ceph-db-osd1_rimage_1_iorig]                  optane
>                                    -wi-ao----   44.00g
>
>      /dev/sdj(73409)
>
>  [ceph-db-osd1_rmeta_0]                         optane
>                                    ewi-aor---    4.00m
>
>      /dev/sdg(55388)
>
>  [ceph-db-osd1_rmeta_1]                         optane
>                                    ewi-aor---    4.00m
>
>      /dev/sdj(9217)
>
>
>
>
> It would be good if the symlinks were recreated pointing to the correct
> device even when they point to a raid1 lv.
> Not sure if this problem has been reported yet.
>
>
> Cheers
>
> Reto
> _______________________________________________
> ceph-users mailing list -- ceph-users@xxxxxxx
> To unsubscribe send an email to ceph-users-leave@xxxxxxx


_______________________________________________
ceph-users mailing list -- ceph-users@xxxxxxx
To unsubscribe send an email to ceph-users-leave@xxxxxxx



_______________________________________________
ceph-users mailing list -- ceph-users@xxxxxxx
To unsubscribe send an email to ceph-users-leave@xxxxxxx



[Index of Archives]     [Information on CEPH]     [Linux Filesystem Development]     [Ceph Development]     [Ceph Large]     [Ceph Dev]     [Linux USB Development]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]     [xfs]


  Powered by Linux