Dear Ceph-users,
I have a problem that I'd like to have your input for.
Preface:
I have got a test-cluster and a productive-cluster. Both are setup the
same and both are having the same "issue". I am running Ubuntu 22.04
and deployed ceph 17.2.3 via cephadm. Upgraded to 17.2.7 later on,
which is the version we are currently running. Since the issue seem to
be the exact same on the test-cluster, I will post
test-cluster-outputs here for better readability.
The issue:
I have replaced disks and after the replacement, it does not show that
it would use the NVMe as WAL device anymore. The LV still exists, but
the metadata of the osd does not show it, as it would be with any
other osd/hdd, that hasnt been replaced.
ODS.1 (incorrect, bluefs_dedicated_wal: "0")
```
{
"id": 1,
"arch": "x86_64",
"back_addr":
"[v2:192.168.6.241:6802/3213655489,v1:192.168.6.241:6803/3213655489]",
"back_iface": "",
"bluefs": "1",
"bluefs_dedicated_db": "0",
"bluefs_dedicated_wal": "0",
"bluefs_single_shared_device": "1",
"bluestore_bdev_access_mode": "blk",
"bluestore_bdev_block_size": "4096",
"bluestore_bdev_dev_node": "/dev/dm-3",
"bluestore_bdev_devices": "sdd",
"bluestore_bdev_driver": "KernelDevice",
"bluestore_bdev_optimal_io_size": "0",
"bluestore_bdev_partition_path": "/dev/dm-3",
"bluestore_bdev_rotational": "1",
"bluestore_bdev_size": "17175674880",
"bluestore_bdev_support_discard": "1",
"bluestore_bdev_type": "hdd",
"bluestore_min_alloc_size": "4096",
"ceph_release": "quincy",
"ceph_version": "ceph version 17.2.7
(b12291d110049b2f35e32e0de30d70e9a4c060d2) quincy (stable)",
"ceph_version_short": "17.2.7",
"ceph_version_when_created": "",
"container_hostname": "bi-ubu-srv-ceph2-01",
"container_image":
"quay.io/ceph/ceph@sha256:28323e41a7d17db238bdcc0a4d7f38d272f75c1a499bc30f59b0b504af132c6b",
"cpu": "AMD EPYC 75F3 32-Core Processor",
"created_at": "",
"default_device_class": "hdd",
"device_ids": "sdd=QEMU_HARDDISK_drive-scsi3",
"device_paths":
"sdd=/dev/disk/by-path/pci-0000:00:05.0-scsi-0:0:3:0",
"devices": "sdd",
"distro": "centos",
"distro_description": "CentOS Stream 8",
"distro_version": "8",
"front_addr":
"[v2:<my_public_ip>.241:6800/3213655489,v1:<my_public_ip>.241:6801/3213655489]",
"front_iface": "",
"hb_back_addr":
"[v2:192.168.6.241:6806/3213655489,v1:192.168.6.241:6807/3213655489]",
"hb_front_addr":
"[v2:<my_public_ip>.241:6804/3213655489,v1:<my_public_ip>.241:6805/3213655489]",
"hostname": "bi-ubu-srv-ceph2-01",
"journal_rotational": "1",
"kernel_description": "#132-Ubuntu SMP Thu Aug 29 13:45:52 UTC 2024",
"kernel_version": "5.15.0-122-generic",
"mem_swap_kb": "4018172",
"mem_total_kb": "5025288",
"network_numa_unknown_ifaces": "back_iface,front_iface",
"objectstore_numa_unknown_devices": "sdd",
"os": "Linux",
"osd_data": "/var/lib/ceph/osd/ceph-1",
"osd_objectstore": "bluestore",
"osdspec_affinity": "dashboard-admin-1661853488642",
"rotational": "1"
}
```
ODS.0 (correct, bluefs_dedicated_wal: "1")
```
{
"id": 0,
"arch": "x86_64",
"back_addr":
"[v2:192.168.6.241:6810/3249286142,v1:192.168.6.241:6811/3249286142]",
"back_iface": "",
"bluefs": "1",
"bluefs_dedicated_db": "0",
"bluefs_dedicated_wal": "1",
"bluefs_single_shared_device": "0",
"bluefs_wal_access_mode": "blk",
"bluefs_wal_block_size": "4096",
"bluefs_wal_dev_node": "/dev/dm-0",
"bluefs_wal_devices": "sdb",
"bluefs_wal_driver": "KernelDevice",
"bluefs_wal_optimal_io_size": "0",
"bluefs_wal_partition_path": "/dev/dm-0",
"bluefs_wal_rotational": "0",
"bluefs_wal_size": "4290772992",
"bluefs_wal_support_discard": "1",
"bluefs_wal_type": "ssd",
"bluestore_bdev_access_mode": "blk",
"bluestore_bdev_block_size": "4096",
"bluestore_bdev_dev_node": "/dev/dm-2",
"bluestore_bdev_devices": "sdc",
"bluestore_bdev_driver": "KernelDevice",
"bluestore_bdev_optimal_io_size": "0",
"bluestore_bdev_partition_path": "/dev/dm-2",
"bluestore_bdev_rotational": "1",
"bluestore_bdev_size": "17175674880",
"bluestore_bdev_support_discard": "1",
"bluestore_bdev_type": "hdd",
"bluestore_min_alloc_size": "4096",
"ceph_release": "quincy",
"ceph_version": "ceph version 17.2.7
(b12291d110049b2f35e32e0de30d70e9a4c060d2) quincy (stable)",
"ceph_version_short": "17.2.7",
"ceph_version_when_created": "",
"container_hostname": "bi-ubu-srv-ceph2-01",
"container_image":
"quay.io/ceph/ceph@sha256:28323e41a7d17db238bdcc0a4d7f38d272f75c1a499bc30f59b0b504af132c6b",
"cpu": "AMD EPYC 75F3 32-Core Processor",
"created_at": "",
"default_device_class": "hdd",
"device_ids":
"sdb=QEMU_HARDDISK_drive-scsi1,sdc=QEMU_HARDDISK_drive-scsi2",
"device_paths":
"sdb=/dev/disk/by-path/pci-0000:00:05.0-scsi-0:0:1:0,sdc=/dev/disk/by-path/pci-0000:00:05.0-scsi-0:0:2:0",
"devices": "sdb,sdc",
"distro": "centos",
"distro_description": "CentOS Stream 8",
"distro_version": "8",
"front_addr":
"[v2:<my_public_ip>.241:6808/3249286142,v1:<my_public_ip>.241:6809/3249286142]",
"front_iface": "",
"hb_back_addr":
"[v2:192.168.6.241:6814/3249286142,v1:192.168.6.241:6815/3249286142]",
"hb_front_addr":
"[v2:<my_public_ip>.241:6812/3249286142,v1:<my_public_ip>.241:6813/3249286142]",
"hostname": "bi-ubu-srv-ceph2-01",
"journal_rotational": "0",
"kernel_description": "#132-Ubuntu SMP Thu Aug 29 13:45:52 UTC 2024",
"kernel_version": "5.15.0-122-generic",
"mem_swap_kb": "4018172",
"mem_total_kb": "5025288",
"network_numa_unknown_ifaces": "back_iface,front_iface",
"objectstore_numa_unknown_devices": "sdb,sdc",
"os": "Linux",
"osd_data": "/var/lib/ceph/osd/ceph-0",
"osd_objectstore": "bluestore",
"osdspec_affinity": "dashboard-admin-1661853488642",
"rotational": "1"
}
```
The LV that should contain the WAL is existing:
```
:/# lvs
LV VG Attr LSize Pool
Origin Data% Meta% Move Log Cpy%Sync Convert
osd-block-d472bf9f-c17d-4939-baf5-514a07db66bc
ceph-7cd212d6-29af-4c14-8e0b-bcd1984ba368 -wi-ao---- <16.00g
osd-block-d94bda82-e59f-4d3d-81cd-28ea69c5e02f
ceph-960d0016-077a-4ffa-a60e-32f6efcfcb35 -wi-ao---- <16.00g
osd-wal-3e18feca-01cf-4c5b-9934-8a30bc242bbd
ceph-a84cbc55-33b5-4c31-8426-66e375fc157b -wi-ao---- <4.00g
osd-wal-a6202ebe-0959-40c6-8ba2-bc387d550431
ceph-a84cbc55-33b5-4c31-8426-66e375fc157b -wi-a----- <4.00g
```
We can also see there, that there is no (o) as in open in the LV
attributes.
For good measure, this is the pattern for the orch osd service.
```
service_type: osd
service_id: dashboard-admin-1661853488642
service_name: osd.dashboard-admin-1661853488642
placement:
host_pattern: '*'
spec:
data_devices:
size: 16GB
db_devices:
rotational: false
filter_logic: AND
objectstore: bluestore
wal_devices:
rotational: false
status:
created: '2022-08-30T09:58:11.210571Z'
last_refresh: '2024-09-27T12:19:14.619434Z'
running: 16
size: 16
```
"ceph-volume lvm list" looks fine, in my opinion, as it should be:
```
{
"0": [
{
"devices": [
"/dev/sdc"
],
"lv_name": "osd-block-d472bf9f-c17d-4939-baf5-514a07db66bc",
"lv_path":
"/dev/ceph-7cd212d6-29af-4c14-8e0b-bcd1984ba368/osd-block-d472bf9f-c17d-4939-baf5-514a07db66bc",
"lv_size": "17175674880",
"lv_tags":
"ceph.block_device=/dev/ceph-7cd212d6-29af-4c14-8e0b-bcd1984ba368/osd-block-d472bf9f-c17d-4939-baf5-514a07db66bc,ceph.block_uuid=3tkcvo-XKmb-03UQ-xMie-1Y1d-6ptW-NugdPk,ceph.cephx_lockbox_secret=,ceph.cluster_fsid=a12b3ade-2849-11ed-9b46-c5b62beb178a,ceph.cluster_name=ceph,ceph.crush_device_class=,ceph.encrypted=0,ceph.osd_fsid=d472bf9f-c17d-4939-baf5-514a07db66bc,ceph.osd_id=0,ceph.osdspec_affinity=dashboard-admin-1661853488642,ceph.type=block,ceph.vdo=0,ceph.wal_device=/dev/ceph-a84cbc55-33b5-4c31-8426-66e375fc157b/osd-wal-3e18feca-01cf-4c5b-9934-8a30bc242bbd,ceph.wal_uuid=BgRlZX-YX36-SM0q-UiLE-B5xC-eBbO-yxL3et",
"lv_uuid": "3tkcvo-XKmb-03UQ-xMie-1Y1d-6ptW-NugdPk",
"name": "osd-block-d472bf9f-c17d-4939-baf5-514a07db66bc",
"path":
"/dev/ceph-7cd212d6-29af-4c14-8e0b-bcd1984ba368/osd-block-d472bf9f-c17d-4939-baf5-514a07db66bc",
"tags": {
"ceph.block_device":
"/dev/ceph-7cd212d6-29af-4c14-8e0b-bcd1984ba368/osd-block-d472bf9f-c17d-4939-baf5-514a07db66bc",
"ceph.block_uuid":
"3tkcvo-XKmb-03UQ-xMie-1Y1d-6ptW-NugdPk",
"ceph.cephx_lockbox_secret": "",
"ceph.cluster_fsid":
"a12b3ade-2849-11ed-9b46-c5b62beb178a",
"ceph.cluster_name": "ceph",
"ceph.crush_device_class": "",
"ceph.encrypted": "0",
"ceph.osd_fsid": "d472bf9f-c17d-4939-baf5-514a07db66bc",
"ceph.osd_id": "0",
"ceph.osdspec_affinity": "dashboard-admin-1661853488642",
"ceph.type": "block",
"ceph.vdo": "0",
"ceph.wal_device":
"/dev/ceph-a84cbc55-33b5-4c31-8426-66e375fc157b/osd-wal-3e18feca-01cf-4c5b-9934-8a30bc242bbd",
"ceph.wal_uuid": "BgRlZX-YX36-SM0q-UiLE-B5xC-eBbO-yxL3et"
},
"type": "block",
"vg_name": "ceph-7cd212d6-29af-4c14-8e0b-bcd1984ba368"
},
{
"devices": [
"/dev/sdb"
],
"lv_name": "osd-wal-3e18feca-01cf-4c5b-9934-8a30bc242bbd",
"lv_path":
"/dev/ceph-a84cbc55-33b5-4c31-8426-66e375fc157b/osd-wal-3e18feca-01cf-4c5b-9934-8a30bc242bbd",
"lv_size": "4290772992",
"lv_tags":
"ceph.block_device=/dev/ceph-7cd212d6-29af-4c14-8e0b-bcd1984ba368/osd-block-d472bf9f-c17d-4939-baf5-514a07db66bc,ceph.block_uuid=3tkcvo-XKmb-03UQ-xMie-1Y1d-6ptW-NugdPk,ceph.cephx_lockbox_secret=,ceph.cluster_fsid=a12b3ade-2849-11ed-9b46-c5b62beb178a,ceph.cluster_name=ceph,ceph.crush_device_class=,ceph.encrypted=0,ceph.osd_fsid=d472bf9f-c17d-4939-baf5-514a07db66bc,ceph.osd_id=0,ceph.osdspec_affinity=dashboard-admin-1661853488642,ceph.type=wal,ceph.vdo=0,ceph.wal_device=/dev/ceph-a84cbc55-33b5-4c31-8426-66e375fc157b/osd-wal-3e18feca-01cf-4c5b-9934-8a30bc242bbd,ceph.wal_uuid=BgRlZX-YX36-SM0q-UiLE-B5xC-eBbO-yxL3et",
"lv_uuid": "BgRlZX-YX36-SM0q-UiLE-B5xC-eBbO-yxL3et",
"name": "osd-wal-3e18feca-01cf-4c5b-9934-8a30bc242bbd",
"path":
"/dev/ceph-a84cbc55-33b5-4c31-8426-66e375fc157b/osd-wal-3e18feca-01cf-4c5b-9934-8a30bc242bbd",
"tags": {
"ceph.block_device":
"/dev/ceph-7cd212d6-29af-4c14-8e0b-bcd1984ba368/osd-block-d472bf9f-c17d-4939-baf5-514a07db66bc",
"ceph.block_uuid":
"3tkcvo-XKmb-03UQ-xMie-1Y1d-6ptW-NugdPk",
"ceph.cephx_lockbox_secret": "",
"ceph.cluster_fsid":
"a12b3ade-2849-11ed-9b46-c5b62beb178a",
"ceph.cluster_name": "ceph",
"ceph.crush_device_class": "",
"ceph.encrypted": "0",
"ceph.osd_fsid": "d472bf9f-c17d-4939-baf5-514a07db66bc",
"ceph.osd_id": "0",
"ceph.osdspec_affinity": "dashboard-admin-1661853488642",
"ceph.type": "wal",
"ceph.vdo": "0",
"ceph.wal_device":
"/dev/ceph-a84cbc55-33b5-4c31-8426-66e375fc157b/osd-wal-3e18feca-01cf-4c5b-9934-8a30bc242bbd",
"ceph.wal_uuid": "BgRlZX-YX36-SM0q-UiLE-B5xC-eBbO-yxL3et"
},
"type": "wal",
"vg_name": "ceph-a84cbc55-33b5-4c31-8426-66e375fc157b"
}
],
"1": [
{
"devices": [
"/dev/sdd"
],
"lv_name": "osd-block-d94bda82-e59f-4d3d-81cd-28ea69c5e02f",
"lv_path":
"/dev/ceph-960d0016-077a-4ffa-a60e-32f6efcfcb35/osd-block-d94bda82-e59f-4d3d-81cd-28ea69c5e02f",
"lv_size": "17175674880",
"lv_tags":
"ceph.block_device=/dev/ceph-960d0016-077a-4ffa-a60e-32f6efcfcb35/osd-block-d94bda82-e59f-4d3d-81cd-28ea69c5e02f,ceph.block_uuid=QdBLn0-aWXn-Ud7g-k7E3-91Yv-HqqJ-Xh3TuO,ceph.cephx_lockbox_secret=,ceph.cluster_fsid=a12b3ade-2849-11ed-9b46-c5b62beb178a,ceph.cluster_name=ceph,ceph.crush_device_class=,ceph.encrypted=0,ceph.osd_fsid=d94bda82-e59f-4d3d-81cd-28ea69c5e02f,ceph.osd_id=1,ceph.osdspec_affinity=dashboard-admin-1661853488642,ceph.type=block,ceph.vdo=0",
"lv_uuid": "QdBLn0-aWXn-Ud7g-k7E3-91Yv-HqqJ-Xh3TuO",
"name": "osd-block-d94bda82-e59f-4d3d-81cd-28ea69c5e02f",
"path":
"/dev/ceph-960d0016-077a-4ffa-a60e-32f6efcfcb35/osd-block-d94bda82-e59f-4d3d-81cd-28ea69c5e02f",
"tags": {
"ceph.block_device":
"/dev/ceph-960d0016-077a-4ffa-a60e-32f6efcfcb35/osd-block-d94bda82-e59f-4d3d-81cd-28ea69c5e02f",
"ceph.block_uuid":
"QdBLn0-aWXn-Ud7g-k7E3-91Yv-HqqJ-Xh3TuO",
"ceph.cephx_lockbox_secret": "",
"ceph.cluster_fsid":
"a12b3ade-2849-11ed-9b46-c5b62beb178a",
"ceph.cluster_name": "ceph",
"ceph.crush_device_class": "",
"ceph.encrypted": "0",
"ceph.osd_fsid": "d94bda82-e59f-4d3d-81cd-28ea69c5e02f",
"ceph.osd_id": "1",
"ceph.osdspec_affinity": "dashboard-admin-1661853488642",
"ceph.type": "block",
"ceph.vdo": "0"
},
"type": "block",
"vg_name": "ceph-960d0016-077a-4ffa-a60e-32f6efcfcb35"
},
{
"devices": [
"/dev/sdb"
],
"lv_name": "osd-wal-a6202ebe-0959-40c6-8ba2-bc387d550431",
"lv_path":
"/dev/ceph-a84cbc55-33b5-4c31-8426-66e375fc157b/osd-wal-a6202ebe-0959-40c6-8ba2-bc387d550431",
"lv_size": "4290772992",
"lv_tags":
"ceph.block_device=/dev/ceph-179656b5-6783-4b4e-bb7e-695d4523072f/osd-block-7a1d0007-71ff-4011-8a18-e6de1499cbdf,ceph.block_uuid=gDt63k-v0Qd-89WE-bOGO-cmZv-Sc8v-tYOagJ,ceph.cephx_lockbox_secret=,ceph.cluster_fsid=a12b3ade-2849-11ed-9b46-c5b62beb178a,ceph.cluster_name=ceph,ceph.crush_device_class=,ceph.encrypted=0,ceph.osd_fsid=7a1d0007-71ff-4011-8a18-e6de1499cbdf,ceph.osd_id=1,ceph.osdspec_affinity=dashboard-admin-1661853488642,ceph.type=wal,ceph.vdo=0,ceph.wal_device=/dev/ceph-a84cbc55-33b5-4c31-8426-66e375fc157b/osd-wal-a6202ebe-0959-40c6-8ba2-bc387d550431,ceph.wal_uuid=UOffkj-TBZJ-9b66-wIW7-3DhH-xibs-Y5t5Xq",
"lv_uuid": "UOffkj-TBZJ-9b66-wIW7-3DhH-xibs-Y5t5Xq",
"name": "osd-wal-a6202ebe-0959-40c6-8ba2-bc387d550431",
"path":
"/dev/ceph-a84cbc55-33b5-4c31-8426-66e375fc157b/osd-wal-a6202ebe-0959-40c6-8ba2-bc387d550431",
"tags": {
"ceph.block_device":
"/dev/ceph-179656b5-6783-4b4e-bb7e-695d4523072f/osd-block-7a1d0007-71ff-4011-8a18-e6de1499cbdf",
"ceph.block_uuid":
"gDt63k-v0Qd-89WE-bOGO-cmZv-Sc8v-tYOagJ",
"ceph.cephx_lockbox_secret": "",
"ceph.cluster_fsid":
"a12b3ade-2849-11ed-9b46-c5b62beb178a",
"ceph.cluster_name": "ceph",
"ceph.crush_device_class": "",
"ceph.encrypted": "0",
"ceph.osd_fsid": "7a1d0007-71ff-4011-8a18-e6de1499cbdf",
"ceph.osd_id": "1",
"ceph.osdspec_affinity": "dashboard-admin-1661853488642",
"ceph.type": "wal",
"ceph.vdo": "0",
"ceph.wal_device":
"/dev/ceph-a84cbc55-33b5-4c31-8426-66e375fc157b/osd-wal-a6202ebe-0959-40c6-8ba2-bc387d550431",
"ceph.wal_uuid": "UOffkj-TBZJ-9b66-wIW7-3DhH-xibs-Y5t5Xq"
},
"type": "wal",
"vg_name": "ceph-a84cbc55-33b5-4c31-8426-66e375fc157b"
}
]
}
```
We can see two devices for osd.1, but the osd does not seem to use the
WAL device.
How can i tell osd.1 to use its WAL again?
Why does the orch osd service not do it anyways?
When i was replacing the disks i was using
```
ceph orch osd rm 1 --replace
```
The osd got back up and refilled, but without WAL.
While I was diggin into this, i realized that the DB is not on the
NVME, but on the HDD itself. Even though, i was specifying it in the
pattern above:
db_devices:
rotational: false
Shouldnt the db then be located on the NVMe/SSD where the wal is at?
At least that is what the dashboard suggest, when creating OSDs. Of
course I chose the NVME/SSD for DB and WAL.
I hope this is somewhat readable and coherent.
Thank you in advance!
Best wishes and a nice weekend!
_______________________________________________
ceph-users mailing list -- ceph-users@xxxxxxx
To unsubscribe send an email to ceph-users-leave@xxxxxxx