# ceph mon stat
e6: 2 mons at
{a001s017=[v2:10.45.128.27:3300/0,v1:10.45.128.27:6789/0],a001s018=[v2:10.45.128.28:3300/0,v1:10.45.128.28:6789/0]}, election epoch 162, leader 0 a001s018, quorum 0,1
a001s018,a001s017
# ceph orch ps | grep mon
mon.a001s016 a001s016 running
(3h) 6m ago 3h 527M 2048M 16.2.5
6e73176320aa 39db8cfba7e1
mon.a001s017 a001s017 running
(22h) 47s ago 1h 993M 2048M 16.2.5
6e73176320aa e5e5cb6c256c
mon.a001s018 a001s018 running
(5w) 48s ago 2y 1167M 2048M 16.2.5
6e73176320aa 7d2bb6d41f54
# ceph mgr stat
{
"epoch": 1130365,
"available": true,
"active_name": "a001s016.ctmoay",
"num_standby": 1
}
# ceph orch ps | grep mgr
mgr.a001s016.ctmoay a001s016 *:8443 running
(18M) 109s ago 23M 518M - 16.2.5
6e73176320aa 169cafcbbb99
mgr.a001s017.bpygfm a001s017 *:8443 running
(19M) 5m ago 23M 501M - 16.2.5
6e73176320aa 97257195158c
mgr.a001s018.hcxnef a001s018 *:8443 running
(20M) 5m ago 23M 113M - 16.2.5
6e73176320aa 21ba5896cee2
# ceph orch ls --service_name=mgr --export
service_type: mgr
service_name: mgr
placement:
count: 3
hosts:
- a001s016
- a001s017
- a001s018
# ceph orch ls --service_name=mon --export
service_type: mon
service_name: mon
placement:
count: 3
hosts:
- a001s016
- a001s017
- a001s018
-----Original Message-----
From: Adiga, Anantha
Sent: Monday, April 1, 2024 6:06 PM
To: Eugen Block <eblock@xxxxxx>
Cc: ceph-users@xxxxxxx
Subject: RE: Re: ceph status not showing correct
monitor services
# ceph tell mon.a001s016 mon_status Error ENOENT: problem getting
command descriptions from mon.a001s016
a001s016 is outside quorum see below
# ceph tell mon.a001s017 mon_status {
"name": "a001s017",
"rank": 1,
"state": "peon",
"election_epoch": 162,
"quorum": [
0,
1
],
"quorum_age": 79938,
"features": {
"required_con": "2449958747317026820",
"required_mon": [
"kraken",
"luminous",
"mimic",
"osdmap-prune",
"nautilus",
"octopus",
"pacific",
"elector-pinging"
],
"quorum_con": "4540138297136906239",
"quorum_mon": [
"kraken",
"luminous",
"mimic",
"osdmap-prune",
"nautilus",
"octopus",
"pacific",
"elector-pinging"
]
},
"outside_quorum": [],
"extra_probe_peers": [
{
"addrvec": [
{
"type": "v2",
"addr": "10.45.128.26:3300",
"nonce": 0
},
{
"type": "v1",
"addr": "10.45.128.26:6789",
"nonce": 0
}
]
}
],
"sync_provider": [],
"monmap": {
"epoch": 6,
"fsid": "604d56db-2fab-45db-a9ea-c418f9a8cca8",
"modified": "2024-03-31T23:54:18.692983Z",
"created": "2021-09-30T16:15:12.884602Z",
"min_mon_release": 16,
"min_mon_release_name": "pacific",
"election_strategy": 1,
"disallowed_leaders: ": "",
"stretch_mode": false,
"features": {
"persistent": [
"kraken",
"luminous",
"mimic",
"osdmap-prune",
"nautilus",
"octopus",
"pacific",
"elector-pinging"
],
"optional": []
},
"mons": [
{
"rank": 0,
"name": "a001s018",
"public_addrs": {
"addrvec": [
{
"type": "v2",
"addr": "10.45.128.28:3300",
"nonce": 0
},
{
"type": "v1",
"addr": "10.45.128.28:6789",
"nonce": 0
}
]
},
"addr": "10.45.128.28:6789/0",
"public_addr": "10.45.128.28:6789/0",
"priority": 0,
"weight": 0,
"crush_location": "{}"
},
{
"rank": 1,
"name": "a001s017",
"public_addrs": {
"addrvec": [
{
"type": "v2",
"addr": "10.45.128.27:3300",
"nonce": 0
},
{
"type": "v1",
"addr": "10.45.128.27:6789",
"nonce": 0
}
]
},
"addr": "10.45.128.27:6789/0",
"public_addr": "10.45.128.27:6789/0",
"priority": 0,
"weight": 0,
"crush_location": "{}"
}
]
},
"feature_map": {
"mon": [
{
"features": "0x3f01cfb9fffdffff",
"release": "luminous",
"num": 1
}
],
"mds": [
{
"features": "0x3f01cfb9fffdffff",
"release": "luminous",
"num": 3
}
],
"osd": [
{
"features": "0x3f01cfb9fffdffff",
"release": "luminous",
"num": 15
}
],
"client": [
{
"features": "0x2f018fb86aa42ada",
"release": "luminous",
"num": 50
},
{
"features": "0x2f018fb87aa4aafe",
"release": "luminous",
"num": 40
},
{
"features": "0x3f01cfb8ffedffff",
"release": "luminous",
"num": 1
},
{
"features": "0x3f01cfb9fffdffff",
"release": "luminous",
"num": 72
}
]
},
"stretch_mode": false
}
# ceph mon dump
dumped monmap epoch 6 epoch 6 fsid 604d56db-2fab-45db-a9ea-c418f9a8cca8
last_changed 2024-03-31T23:54:18.692983+0000 created
2021-09-30T16:15:12.884602+0000 min_mon_release 16 (pacific)
election_strategy: 1
0: [v2:10.45.128.28:3300/0,v1:10.45.128.28:6789/0] mon.a001s018
1: [v2:10.45.128.27:3300/0,v1:10.45.128.27:6789/0] mon.a001s017
root@a001s016:/var/run/ceph/604d56db-2fab-45db-a9ea-c418f9a8cca8#
-----Original Message-----
From: Adiga, Anantha
Sent: Monday, April 1, 2024 3:20 PM
To: Adiga, Anantha <anantha.adiga@xxxxxxxxx>; Eugen Block <eblock@xxxxxx>
Cc: ceph-users@xxxxxxx
Subject: RE: Re: ceph status not showing correct
monitor services
Both methods are not update the mon map, is there a way to inject
mon.a001s016 into the current mon map?
# ceph mon dump
dumped monmap epoch 6
epoch 6
fsid 604d56db-2fab-45db-a9ea-c418f9a8cca8
last_changed 2024-03-31T23:54:18.692983+0000 created
2021-09-30T16:15:12.884602+0000 min_mon_release 16 (pacific)
election_strategy: 1
0: [v2:10.45.128.28:3300/0,v1:10.45.128.28:6789/0] mon.a001s018
1: [v2:10.45.128.27:3300/0,v1:10.45.128.27:6789/0] mon.a001s017
# ceph tell mon.a001s016 mon_status
Error ENOENT: problem getting command descriptions from mon.a001s016
# ceph tell mon.a001s016 mon_status
Error ENOENT: problem getting command descriptions from mon.a001s016
# ceph tell mon.a001s017 mon_status
{
"name": "a001s017",
"rank": 1,
"state": "peon",
"election_epoch": 162,
"quorum": [
0,
1
],
"quorum_age": 69551,
"features": {
..............
..............
# ceph orch ls --service_name=mon --export > mon3.yml
service_type: mon
service_name: mon
placement:
count: 3
hosts:
- a001s016
- a001s017
- a001s018
# cp mon3.yml mon2.yml
# vi mon2.yml
#cat mon2.yml
service_type: mon
service_name: mon
placement:
count: 2
hosts:
- a001s017
- a001s018
# ceph orch apply -i mon2.yml --dry-run
WARNING! Dry-Runs are snapshots of a certain point in time and are
bound to the current inventory setup. If any on these conditions
changes, the preview will be invalid. Please make sure to have a
minimal timeframe between planning and applying the specs.
####################
SERVICESPEC PREVIEWS
####################
+---------+------+--------+--------------+
|SERVICE |NAME |ADD_TO |REMOVE_FROM |
+---------+------+--------+--------------+
|mon |mon | |mon.a001s016 |
+---------+------+--------+--------------+
################
OSDSPEC PREVIEWS
################
+---------+------+------+------+----+-----+
|SERVICE |NAME |HOST |DATA |DB |WAL |
+---------+------+------+------+----+-----+
+---------+------+------+------+----+-----+
# ceph orch ls --service_name=mon --refresh NAME PORTS RUNNING
REFRESHED AGE PLACEMENT
mon 3/3 5m ago 18h a001s016;a001s017;a001s018;count:3
# ceph orch ps --refresh | grep mon
mon.a001s016 a001s016 running
(21h) 2s ago 21h 734M 2048M 16.2.5
6e73176320aa 8484a912f96a
mon.a001s017 a001s017 running
(18h) 2s ago 21h 976M 2048M 16.2.5
6e73176320aa e5e5cb6c256c
mon.a001s018 a001s018 running
(5w) 2s ago 2y 1164M 2048M 16.2.5
6e73176320aa 7d2bb6d41f54
# ceph orch ps --refresh | grep mon
mon.a001s016 a001s016 running
(21h) 37s ago 21h 734M 2048M 16.2.5
6e73176320aa 8484a912f96a
mon.a001s017 a001s017 running
(18h) 37s ago 21h 977M 2048M 16.2.5
6e73176320aa e5e5cb6c256c
mon.a001s018 a001s018 running
(5w) 38s ago 2y 1166M 2048M 16.2.5
6e73176320aa 7d2bb6d41f54
# ceph orch apply -i mon2.yml
Scheduled mon update...
# ceph orch ps --refresh | grep mon
mon.a001s016 a001s016 running
(21h) 21s ago 21h 734M 2048M 16.2.5
6e73176320aa 8484a912f96a
mon.a001s017 a001s017 running
(18h) 20s ago 21h 962M 2048M 16.2.5
6e73176320aa e5e5cb6c256c
mon.a001s018 a001s018 running
(5w) 21s ago 2y 1156M 2048M 16.2.5
6e73176320aa 7d2bb6d41f54
# ceph orch ps --refresh | grep mon
mon.a001s017 a001s017 running
(18h) 23s ago 21h 962M 2048M 16.2.5
6e73176320aa e5e5cb6c256c
mon.a001s018 a001s018 running
(5w) 24s ago 2y 1156M 2048M 16.2.5
6e73176320aa 7d2bb6d41f54
# ceph orch ps --refresh | grep mon
mon.a001s017 a001s017 running
(18h) 27s ago 21h 962M 2048M 16.2.5
6e73176320aa e5e5cb6c256c
mon.a001s018 a001s018 running
(5w) 0s ago 2y 1154M 2048M 16.2.5
6e73176320aa 7d2bb6d41f54
# ceph orch ps --refresh | grep mon
mon.a001s017 a001s017 running
(18h) 2s ago 21h 960M 2048M 16.2.5
6e73176320aa e5e5cb6c256c
mon.a001s018 a001s018 running
(5w) 3s ago 2y 1154M 2048M 16.2.5
6e73176320aa 7d2bb6d41f54
# ceph orch ps --refresh | grep mon
mon.a001s017 a001s017 running
(18h) 5s ago 21h 960M 2048M 16.2.5
6e73176320aa e5e5cb6c256c
mon.a001s018 a001s018 running
(5w) 0s ago 2y 1154M 2048M 16.2.5
6e73176320aa 7d2bb6d41f54
# ceph orch ps --refresh | grep mon
mon.a001s017 a001s017 running
(18h) 2s ago 21h 962M 2048M 16.2.5
6e73176320aa e5e5cb6c256c
mon.a001s018 a001s018 running
(5w) 2s ago 2y 1154M 2048M 16.2.5
6e73176320aa 7d2bb6d41f54
# ls /var/lib/ceph/604d56db-2fab-45db-a9ea-c418f9a8cca8/
cephadm.d4237e4639c108308fe13147b1c08af93c3d5724d9ff21ae797eb4b78fea3931
keepalived.nfs.nfs.a001s016.ofqyjl osd.1 osd.22 osd.4
crash
mds.cephfs.a001s016.vtyivn osd.10 osd.24 osd.7
crash.a001s016
mgr.a001s016.ctmoay osd.13 osd.27 removed
haproxy.nfs.nfs.a001s016.vpjtxt
nfs.nfs.0.0.a001s016.wumnjs osd.15 osd.30
rgw.ceph.a001s016.gjyanl
home
node-exporter.a001s016 osd.18 osd.33
# ceph mon dump
dumped monmap epoch 6
epoch 6
fsid 604d56db-2fab-45db-a9ea-c418f9a8cca8
last_changed 2024-03-31T23:54:18.692983+0000 created
2021-09-30T16:15:12.884602+0000 min_mon_release 16 (pacific)
election_strategy: 1
0: [v2:10.45.128.28:3300/0,v1:10.45.128.28:6789/0] mon.a001s018
1: [v2:10.45.128.27:3300/0,v1:10.45.128.27:6789/0] mon.a001s017
# cat mon3.yml
service_type: mon
service_name: mon
placement:
count: 3
hosts:
- a001s016
- a001s017
- a001s018
# ceph orch apply -i mon3.yml --dry-run
WARNING! Dry-Runs are snapshots of a certain point in time and are
bound to the current inventory setup. If any on these conditions
changes, the preview will be invalid. Please make sure to have a
minimal timeframe between planning and applying the specs.
####################
SERVICESPEC PREVIEWS
####################
+---------+------+----------+-------------+
|SERVICE |NAME |ADD_TO |REMOVE_FROM |
+---------+------+----------+-------------+
|mon |mon |a001s016 | |
+---------+------+----------+-------------+
################
OSDSPEC PREVIEWS
################
+---------+------+------+------+----+-----+
|SERVICE |NAME |HOST |DATA |DB |WAL |
+---------+------+------+------+----+-----+
+---------+------+------+------+----+-----+
# ceph orch apply -i mon3.yml
Scheduled mon update...
# ceph orch ps --refresh | grep mon
mon.a001s016 a001s016 starting
- - - 2048M <unknown> <unknown>
<unknown>
mon.a001s017 a001s017 running
(18h) 2s ago 21h 960M 2048M 16.2.5
6e73176320aa e5e5cb6c256c
mon.a001s018 a001s018 running
(5w) 3s ago 2y 1152M 2048M 16.2.5
6e73176320aa 7d2bb6d41f54
# ceph orch ps --refresh | grep mon
mon.a001s016 a001s016 running
(6s) 0s ago 6s 14.4M 2048M 16.2.5
6e73176320aa dcd4705c2069
mon.a001s017 a001s017 running
(18h) 8s ago 21h 960M 2048M 16.2.5
6e73176320aa e5e5cb6c256c
mon.a001s018 a001s018 running
(5w) 9s ago 2y 1152M 2048M 16.2.5
6e73176320aa 7d2bb6d41f54
# ceph orch ps --refresh | grep mon
mon.a001s016 a001s016 running
(10s) 4s ago 10s 14.4M 2048M 16.2.5
6e73176320aa dcd4705c2069
mon.a001s017 a001s017 running
(18h) 12s ago 21h 960M 2048M 16.2.5
6e73176320aa e5e5cb6c256c
mon.a001s018 a001s018 running
(5w) 13s ago 2y 1152M 2048M 16.2.5
6e73176320aa 7d2bb6d41f54
# ls /var/lib/ceph/604d56db-2fab-45db-a9ea-c418f9a8cca8/
cephadm.d4237e4639c108308fe13147b1c08af93c3d5724d9ff21ae797eb4b78fea3931
mgr.a001s016.ctmoay osd.15 osd.4
crash
mon.a001s016 osd.18 osd.7
crash.a001s016
nfs.nfs.0.0.a001s016.wumnjs osd.22 removed
haproxy.nfs.nfs.a001s016.vpjtxt
node-exporter.a001s016 osd.24 rgw.ceph.a001s016.gjyanl
home
osd.1 osd.27
keepalived.nfs.nfs.a001s016.ofqyjl
osd.10 osd.30
mds.cephfs.a001s016.vtyivn
osd.13 osd.33
# ceph mon dump
dumped monmap epoch 6
epoch 6
fsid 604d56db-2fab-45db-a9ea-c418f9a8cca8
last_changed 2024-03-31T23:54:18.692983+0000 created
2021-09-30T16:15:12.884602+0000 min_mon_release 16 (pacific)
election_strategy: 1
0: [v2:10.45.128.28:3300/0,v1:10.45.128.28:6789/0] mon.a001s018
1: [v2:10.45.128.27:3300/0,v1:10.45.128.27:6789/0] mon.a001s017
# ceph -s
cluster:
id: 604d56db-2fab-45db-a9ea-c418f9a8cca8
health: HEALTH_OK
services:
mon: 2 daemons, quorum a001s018,a001s017 (age 18h)
mgr: a001s016.ctmoay(active, since 109m), standbys: a001s017.bpygfm
mds: 1/1 daemons up, 2 standby
osd: 36 osds: 36 up (since 2h), 36 in (since 2y)
rgw: 3 daemons active (3 hosts, 1 zones)
data:
volumes: 1/1 healthy
pools: 43 pools, 1633 pgs
objects: 51.64M objects, 77 TiB
usage: 119 TiB used, 132 TiB / 252 TiB avail
pgs: 1633 active+clean
io:
client: 356 MiB/s rd, 633 MiB/s wr, 506 op/s rd, 755 op/s wr
# ceph orch ps --refresh | grep mon
mon.a001s016 a001s016 running
(91s) 7s ago 91s 14.5M 2048M 16.2.5
6e73176320aa dcd4705c2069
mon.a001s017 a001s017 running
(18h) 7s ago 21h 978M 2048M 16.2.5
6e73176320aa e5e5cb6c256c
mon.a001s018 a001s018 running
(5w) 8s ago 2y 1163M 2048M 16.2.5
6e73176320aa 7d2bb6d41f54
# ceph orch ls | grep mon
mon 3/3 9s ago 114s
a001s016;a001s017;a001s018;count:3
# ceph log last cephadm
2024-04-01T18:56:29.631337+0000 mgr.a001s017.bpygfm (mgr.60881489)
18014760 : cephadm [INF] Schedule restart daemon osd.6
2024-04-01T18:57:05.983577+0000 mgr.a001s017.bpygfm (mgr.60881489)
18014780 : cephadm [INF] Schedule restart daemon osd.9
2024-04-01T18:57:17.919236+0000 mgr.a001s017.bpygfm (mgr.60881489)
18014788 : cephadm [INF] Schedule restart daemon osd.12
2024-04-01T18:57:59.823635+0000 mgr.a001s017.bpygfm (mgr.60881489)
18014811 : cephadm [INF] Schedule restart daemon osd.16
2024-04-01T18:58:26.039310+0000 mgr.a001s017.bpygfm (mgr.60881489)
18014826 : cephadm [INF] Schedule restart daemon osd.19
2024-04-01T18:58:57.278955+0000 mgr.a001s017.bpygfm (mgr.60881489)
18014844 : cephadm [INF] Schedule restart daemon osd.21
2024-04-01T18:59:36.656834+0000 mgr.a001s017.bpygfm (mgr.60881489)
18014867 : cephadm [INF] Schedule restart daemon osd.25
2024-04-01T19:00:03.272925+0000 mgr.a001s017.bpygfm (mgr.60881489)
18014882 : cephadm [INF] Schedule restart daemon osd.28
2024-04-01T19:00:28.348138+0000 mgr.a001s017.bpygfm (mgr.60881489)
18014897 : cephadm [INF] Schedule restart daemon osd.31
2024-04-01T19:00:56.297517+0000 mgr.a001s017.bpygfm (mgr.60881489)
18014913 : cephadm [INF] Schedule restart daemon osd.34
2024-04-01T19:38:21.552347+0000 mgr.a001s017.bpygfm (mgr.60881489)
18016056 : cephadm [INF] Schedule restart daemon mgr.a001s017.bpygfm
2024-04-01T19:38:21.603564+0000 mgr.a001s017.bpygfm (mgr.60881489)
18016057 : cephadm [INF] Failing over to other MGR
2024-04-01T21:21:24.414308+0000 mgr.a001s016.ctmoay (mgr.61827136)
3096 : cephadm [INF] Saving service mon spec with placement
a001s017;a001s018;count:2
2024-04-01T21:25:02.815151+0000 mgr.a001s016.ctmoay (mgr.61827136)
3211 : cephadm [INF] Saving service mon spec with placement
a001s017;a001s018;count:2
2024-04-01T21:25:02.844721+0000 mgr.a001s016.ctmoay (mgr.61827136)
3212 : cephadm [INF] Safe to remove mon.a001s016: not in monmap
(['a001s018', 'a001s017'])
2024-04-01T21:25:02.845059+0000 mgr.a001s016.ctmoay (mgr.61827136)
3213 : cephadm [INF] Removing monitor a001s016 from monmap...
2024-04-01T21:25:02.846401+0000 mgr.a001s016.ctmoay (mgr.61827136)
3214 : cephadm [INF] Removing daemon mon.a001s016 from a001s016
2024-04-01T21:25:06.870895+0000 mgr.a001s016.ctmoay (mgr.61827136)
3218 : cephadm [INF] Removing key for mon.
2024-04-01T21:26:27.861600+0000 mgr.a001s016.ctmoay (mgr.61827136)
3266 : cephadm [INF] Saving service mon spec with placement
a001s016;a001s017;a001s018;count:3
2024-04-01T21:26:38.247516+0000 mgr.a001s016.ctmoay (mgr.61827136)
3273 : cephadm [INF] Saving service mon spec with placement
a001s016;a001s017;a001s018;count:3
2024-04-01T21:26:44.494080+0000 mgr.a001s016.ctmoay (mgr.61827136)
3277 : cephadm [INF] Deploying daemon mon.a001s016 on a001s016
# cat
/var/lib/ceph/604d56db-2fab-45db-a9ea-c418f9a8cca8/mon.a001s016/unit.image
docker.io/ceph/daemon@sha256:261bbe628f4b438f5bf10de5a8ee05282f2697a5a2cb7ff7668f776b61b9d586
# ceph orch ps --daemon_type=mon
NAME HOST PORTS STATUS REFRESHED AGE MEM
USE MEM LIM VERSION IMAGE ID CONTAINER ID
mon.a001s016 a001s016 running (16m) 3m ago 16m
16.3M 2048M 16.2.5 6e73176320aa dcd4705c2069
mon.a001s017 a001s017 running (18h) 4m ago 21h
960M 2048M 16.2.5 6e73176320aa e5e5cb6c256c
mon.a001s018 a001s018 running (5w) 4m ago 2y
1155M 2048M 16.2.5 6e73176320aa 7d2bb6d41f54
# ceph orch daemon redeploy mon.a001s016 docker.io/ceph/daemon:latest-pacific
Scheduled to redeploy mon.a001s016 on host 'a001s016'
# ceph orch ps --daemon_type=mon --refresh
NAME HOST PORTS STATUS REFRESHED AGE MEM
USE MEM LIM VERSION IMAGE ID CONTAINER ID
mon.a001s016 a001s016 running (13s) 7s ago 18m
14.1M 2048M 16.2.5 6e73176320aa 1917b1edf4ab
mon.a001s017 a001s017 running (18h) 6m ago 21h
960M 2048M 16.2.5 6e73176320aa e5e5cb6c256c
mon.a001s018 a001s018 running (5w) 6m ago 2y
1155M 2048M 16.2.5 6e73176320aa 7d2bb6d41f54
=============================================================================================
The second method also does not update ceph mon map:
#ceph mon dump
dumped monmap epoch 6
epoch 6
fsid 604d56db-2fab-45db-a9ea-c418f9a8cca8
last_changed 2024-03-31T23:54:18.692983+0000 created
2021-09-30T16:15:12.884602+0000 min_mon_release 16 (pacific)
election_strategy: 1
0: [v2:10.45.128.28:3300/0,v1:10.45.128.28:6789/0] mon.a001s018
1: [v2:10.45.128.27:3300/0,v1:10.45.128.27:6789/0] mon.a001s017
# ceph orch ps --refresh | grep mon
mon.a001s016 a001s016 running
(81s) 63s ago 81s 14.2M 2048M 16.2.5
6e73176320aa 0c0ec88139e1
mon.a001s017 a001s017 running
(19h) 62s ago 22h 965M 2048M 16.2.5
6e73176320aa e5e5cb6c256c
mon.a001s018 a001s018 running
(5w) 63s ago 2y 1157M 2048M 16.2.5
6e73176320aa 7d2bb6d41f54
# ceph orch daemon stop mon.a001s016
Scheduled to stop mon.a001s016 on host 'a001s016'
# ceph orch ps --refresh | grep mon
mon.a001s016 a001s016 running
(96s) 6s ago 96s 14.4M 2048M 16.2.5
6e73176320aa 0c0ec88139e1
mon.a001s017 a001s017 running
(19h) 5s ago 22h 968M 2048M 16.2.5
6e73176320aa e5e5cb6c256c
mon.a001s018 a001s018 running
(5w) 6s ago 2y 1157M 2048M 16.2.5
6e73176320aa 7d2bb6d41f54
# ceph orch ps --refresh | grep mon
mon.a001s016 a001s016 running
(98s) 7s ago 98s 14.4M 2048M 16.2.5
6e73176320aa 0c0ec88139e1
mon.a001s017 a001s017 running
(19h) 7s ago 22h 968M 2048M 16.2.5
6e73176320aa e5e5cb6c256c
mon.a001s018 a001s018 running
(5w) 8s ago 2y 1157M 2048M 16.2.5
6e73176320aa 7d2bb6d41f54
# ceph orch ps --refresh | grep mon
mon.a001s016 a001s016 running
(100s) 1s ago 100s 14.4M 2048M 16.2.5
6e73176320aa 0c0ec88139e1
mon.a001s017 a001s017 running
(19h) 9s ago 22h 968M 2048M 16.2.5
6e73176320aa e5e5cb6c256c
mon.a001s018 a001s018 running
(5w) 10s ago 2y 1157M 2048M 16.2.5
6e73176320aa 7d2bb6d41f54
# ceph orch ps --refresh | grep mon
mon.a001s016 a001s016 running
(106s) 7s ago 106s 14.4M 2048M 16.2.5
6e73176320aa 0c0ec88139e1
mon.a001s017 a001s017 running
(19h) 15s ago 22h 968M 2048M 16.2.5
6e73176320aa e5e5cb6c256c
mon.a001s018 a001s018 running
(5w) 16s ago 2y 1157M 2048M 16.2.5
6e73176320aa 7d2bb6d41f54
# ceph orch ps --refresh | grep mon
mon.a001s016 a001s016 stopped
0s ago 108s - 2048M <unknown> <unknown>
<unknown>
mon.a001s017 a001s017 running
(19h) 17s ago 22h 968M 2048M 16.2.5
6e73176320aa e5e5cb6c256c
mon.a001s018 a001s018 running
(5w) 0s ago 2y 1160M 2048M 16.2.5
6e73176320aa 7d2bb6d41f54
# ceph orch ps --refresh | grep mon
mon.a001s016 a001s016 stopped
2s ago 111s - 2048M <unknown> <unknown>
<unknown>
mon.a001s017 a001s017 running
(19h) 2s ago 22h 972M 2048M 16.2.5
6e73176320aa e5e5cb6c256c
mon.a001s018 a001s018 running
(5w) 3s ago 2y 1160M 2048M 16.2.5
6e73176320aa 7d2bb6d41f54
# ceph orch daemon rm mon.a001s016
Error EINVAL: must pass --force to REMOVE daemon with potentially
PRECIOUS DATA for mon.a001s016
# ceph orch daemon rm mon.a001s016 --force Removed mon.a001s016
from host 'a001s016'
# ceph orch ps --refresh | grep mon
mon.a001s017 a001s017 running
(19h) 30s ago 22h 976M 2048M 16.2.5
6e73176320aa e5e5cb6c256c
mon.a001s018 a001s018 running
(5w) 0s ago 2y 1166M 2048M 16.2.5
6e73176320aa 7d2bb6d41f54
#ceph orch ps --refresh | grep mon
mon.a001s017 a001s017 running
(19h) 2s ago 22h 982M 2048M 16.2.5
6e73176320aa e5e5cb6c256c
mon.a001s018 a001s018 running
(5w) 3s ago 2y 1166M 2048M 16.2.5
6e73176320aa 7d2bb6d41f54
# ceph orch ps --refresh | grep mon
mon.a001s016 a001s016 starting
- - - 2048M <unknown> <unknown>
<unknown>
mon.a001s017 a001s017 running
(19h) 6s ago 22h 982M 2048M 16.2.5
6e73176320aa e5e5cb6c256c
mon.a001s018 a001s018 running
(5w) 7s ago 2y 1166M 2048M 16.2.5
6e73176320aa 7d2bb6d41f54
# ceph orch ps --refresh | grep mon
mon.a001s016 a001s016 running
(8s) 2s ago 8s 14.4M 2048M 16.2.5
6e73176320aa 39db8cfba7e1
mon.a001s017 a001s017 running
(19h) 1s ago 22h 987M 2048M 16.2.5
6e73176320aa e5e5cb6c256c
mon.a001s018 a001s018 running
(5w) 2s ago 2y 1171M 2048M 16.2.5
6e73176320aa 7d2bb6d41f54
# ceph mon dump
dumped monmap epoch 6
epoch 6
fsid 604d56db-2fab-45db-a9ea-c418f9a8cca8
last_changed 2024-03-31T23:54:18.692983+0000 created
2021-09-30T16:15:12.884602+0000 min_mon_release 16 (pacific)
election_strategy: 1
0: [v2:10.45.128.28:3300/0,v1:10.45.128.28:6789/0] mon.a001s018
1: [v2:10.45.128.27:3300/0,v1:10.45.128.27:6789/0] mon.a001s017
#
-----Original Message-----
From: Adiga, Anantha <anantha.adiga@xxxxxxxxx>
Sent: Monday, April 1, 2024 2:01 PM
To: Eugen Block <eblock@xxxxxx>
Cc: ceph-users@xxxxxxx
Subject: Re: ceph status not showing correct monitor services
Thank you. I will try the export and import method first.
Thank you,
Anantha
-----Original Message-----
From: Eugen Block <eblock@xxxxxx>
Sent: Monday, April 1, 2024 1:57 PM
To: Adiga, Anantha <anantha.adiga@xxxxxxxxx>
Cc: ceph-users@xxxxxxx
Subject: Re: Re: ceph status not showing correct
monitor services
I have two approaches in mind, first one (and preferred) would be to
edit the mon spec to first remove mon.a001s016 and have a clean state.
Get the current spec with:
ceph orch ls mon --export > mon-edit.yaml
Edit the spec file so that mon.a001s016 is not part of it, then apply:
ceph orch apply -i mon-edit.yaml
This should remove the mon.a001s016 daemon. Then wait a few minutes
or so (until the daemon is actually gone, check locally on the node
with 'cephadm ls' and in /var/lib/ceph/<FSID>/removed) and add it
back to the spec file, then apply again. I would expect a third MON
to be deployed. If that doesn't work for some reason you'll need to
inspect logs to find the root cause.
The second approach would be to remove and add the daemon manually:
ceph orch daemon rm mon.a001s016
Wait until it's really gone, then add it:
ceph orch daemon add mon a001s016
Not entirely sure about the daemon add mon command, you might need
to provide something else, I'm typing this by heart.
Zitat von "Adiga, Anantha" <anantha.adiga@xxxxxxxxx>:
Hi Eugen,
Yes that is it. OSDs were restarted since mon a001s017 was reporting
is low on available space. How to update the mon map to add
mon.a001s016 as it is already online?
And how to update mgr map to include standby mgr.a001s018 as it is
also running.
ceph mon dump
dumped monmap epoch 6
epoch 6
fsid 604d56db-2fab-45db-a9ea-c418f9a8cca8
last_changed 2024-03-31T23:54:18.692983+0000 created
2021-09-30T16:15:12.884602+0000 min_mon_release 16 (pacific)
election_strategy: 1
0: [v2:10.45.128.28:3300/0,v1:10.45.128.28:6789/0] mon.a001s018
1: [v2:10.45.128.27:3300/0,v1:10.45.128.27:6789/0] mon.a001s017
Thank you,
Anantha
-----Original Message-----
From: Eugen Block <eblock@xxxxxx>
Sent: Monday, April 1, 2024 1:10 PM
To: ceph-users@xxxxxxx
Subject: Re: ceph status not showing correct monitor
services
Maybe it’s just not in the monmap? Can you show the output of:
ceph mon dump
Did you do any maintenance (apparently OSDs restarted recently) and
maybe accidentally removed a MON from the monmap?
Zitat von "Adiga, Anantha" <anantha.adiga@xxxxxxxxx>:
Hi Anthony,
Seeing it since last after noon. It is same with mgr services as ,
"ceph -s" is reporting only TWO instead of THREE
Also mon and mgr shows " is_active: false" see below.
# ceph orch ps --daemon_type=mgr
NAME HOST PORTS STATUS REFRESHED AGE
MEM USE MEM LIM VERSION IMAGE ID CONTAINER ID
mgr.a001s016.ctmoay a001s016 *:8443 running (18M) 3m ago 23M
206M - 16.2.5 6e73176320aa 169cafcbbb99
mgr.a001s017.bpygfm a001s017 *:8443 running (19M) 3m ago 23M
332M - 16.2.5 6e73176320aa 97257195158c
mgr.a001s018.hcxnef a001s018 *:8443 running (20M) 3m ago 23M
113M - 16.2.5 6e73176320aa 21ba5896cee2
# ceph orch ls --service_name=mgr
NAME PORTS RUNNING REFRESHED AGE PLACEMENT
mgr 3/3 3m ago 23M a001s016;a001s017;a001s018;count:3
# ceph orch ps --daemon_type=mon --format=json-pretty
[
{
"container_id": "8484a912f96a",
"container_image_digests": [
docker.io/ceph/daemon@sha256:261bbe628f4b438f5bf10de5a8ee05282f2697a5a2cb7ff7668f776b61b9d586<mailto:docker.io/ceph/daemon@sha256:261bbe628f4b438f5bf10de5a8ee05282f2697a5a2cb7ff7668f776b61b9d586>
],
"container_image_id":
"6e73176320aaccf3b3fb660b9945d0514222bd7a83e28b96e8440c630ba6891f",
"container_image_name":
docker.io/ceph/daemon@sha256:261bbe628f4b438f5bf10de5a8ee05282f2697a5a2cb7ff7668f776b61b9d586<mailto:docker.io/ceph/daemon@sha256:261bbe628f4b438f5bf10de5a8ee05282f2697a5a2cb7ff7668f776b61b9d586>,
"created": "2024-03-31T23:55:16.164155Z",
"daemon_id": "a001s016",
"daemon_type": "mon",
"hostname": "a001s016",
"is_active": false,
<== why is it false
"last_refresh": "2024-04-01T19:38:30.929014Z",
"memory_request": 2147483648,
"memory_usage": 761685606,
"ports": [],
"service_name": "mon",
"started": "2024-03-31T23:55:16.268266Z",
"status": 1,
"status_desc": "running",
"version": "16.2.5"
},
Thank you,
Anantha
From: Anthony D'Atri <aad@xxxxxxxxxxxxxx>
Sent: Monday, April 1, 2024 12:25 PM
To: Adiga, Anantha <anantha.adiga@xxxxxxxxx>
Cc: ceph-users@xxxxxxx
Subject: Re: ceph status not showing correct monitor
services
a001s017.bpygfm(active, since 13M), standbys: a001s016.ctmoay
Looks like you just had an mgr failover? Could be that the secondary
mgr hasn't caught up with current events.
_______________________________________________
ceph-users mailing list -- ceph-users@xxxxxxx To unsubscribe send an
email to ceph-users-leave@xxxxxxx
_______________________________________________
ceph-users mailing list -- ceph-users@xxxxxxx To unsubscribe send an
email to ceph-users-leave@xxxxxxx
_______________________________________________
ceph-users mailing list -- ceph-users@xxxxxxx To unsubscribe send an
email to ceph-users-leave@xxxxxxx