Dear all,
I have a ceph installation (dev site) with two nodes, each running a mon daemon and osd daemon.
(Yes, I know running a cluster of two mon is bad, but I have no choice since I only have two nodes.)
Now, the two nodes are migrated to another datacenter, but after it is booted up the mon daemon are
unable to reach quorum. How can I proceed? (If there is no way to recover, I can accept the loss but
I wish to know how to avoid this to happen again.)
Here is the mon_status output of the two nodes:
-----------------------
root@openstack003:/var/log/ceph# ceph daemon mon.openstack003 mon_status
{
"name": "openstack003",
"rank": 0,
"state": "electing",
"election_epoch": 45,
"quorum": [],
"outside_quorum": [],
"extra_probe_peers": [],
"sync_provider": [
754974721,
"mon.1 10.41.41.4:6789\/0",
"2017-01-18 02:53:49.425917",
5654786,
","
],
"monmap": {
"epoch": 10,
"fsid": "71861477-db77-4fab-a8f8-10d3b16e1722",
"modified": "2016-10-19 06:41:29.202924",
"created": "2016-10-19 06:26:24.911408",
"mons": [
{
"rank": 0,
"name": "openstack003",
"addr": "10.41.41.3:6789\/0"
},
{
"rank": 1,
"name": "openstack004",
"addr": "10.41.41.4:6789\/0"
}
]
}
}
root@openstack004:/var/log/ceph# ceph daemon mon.openstack004 mon_status
{
"name": "openstack004",
"rank": 1,
"state": "probing",
"election_epoch": 0,
"quorum": [],
"outside_quorum": [
"openstack004"
],
"extra_probe_peers": [],
"sync_provider": [],
"monmap": {
"epoch": 10,
"fsid": "71861477-db77-4fab-a8f8-10d3b16e1722",
"modified": "2016-10-19 06:41:29.202924",
"created": "2016-10-19 06:26:24.911408",
"mons": [
{
"rank": 0,
"name": "openstack003",
"addr": "10.41.41.3:6789\/0"
},
{
"rank": 1,
"name": "openstack004",
"addr": "10.41.41.4:6789\/0"
}
]
}
}
----------------------------
Here are the logs of the two mons:
2017-01-18 03:15:04.675296 7fc892173700 5 mon.openstack003@0(electing).elector(45) start -- can i
be leader?
2017-01-18 03:15:04.675355 7fc892173700 1 mon.openstack003@0(electing).elector(45) init, last seen
epoch 45
2017-01-18 03:15:04.675932 7fc892173700 1 -- 10.41.41.3:6789/0 --> mon.1 10.41.41.4:6789/0 --
election(71861477-db77-4fab-a8f8-10d3b16e1722 propose 45) v5 -- ?+0 0x55b488984700
2017-01-18 03:15:05.515390 7fc891972700 1 -- 10.41.41.3:6789/0 <== mon.1 10.41.41.4:6789/0 675 ====
mon_probe(probe 71861477-db77-4fab-a8f8-10d3b16e1722 name openstack004) v6 ==== 69+0+0 (72044430 0
0) 0x55b4889bf600 con 0x55b487666900
2017-01-18 03:15:05.515458 7fc891972700 10 mon.openstack003@0(electing) e10 handle_probe
mon_probe(probe 71861477-db77-4fab-a8f8-10d3b16e1722 name openstack004) v6
2017-01-18 03:15:05.515463 7fc891972700 10 mon.openstack003@0(electing) e10 handle_probe_probe mon.1
10.41.41.4:6789/0mon_probe(probe 71861477-db77-4fab-a8f8-10d3b16e1722 name openstack004) v6 features
576460752032874495
2017-01-18 03:15:05.515500 7fc891972700 1 -- 10.41.41.3:6789/0 --> 10.41.41.4:6789/0 --
mon_probe(reply 71861477-db77-4fab-a8f8-10d3b16e1722 name openstack003 paxos( fc 5654529 lc 5655078
)) v6 -- ?+0 0x55b4889bf340 con 0x55b487666900
2017-01-18 03:15:07.515552 7fc891972700 1 -- 10.41.41.3:6789/0 <== mon.1 10.41.41.4:6789/0 676 ====
mon_probe(probe 71861477-db77-4fab-a8f8-10d3b16e1722 name openstack004) v6 ==== 69+0+0 (72044430 0
0) 0x55b4889bf8c0 con 0x55b487666900
2017-01-18 03:15:07.515620 7fc891972700 10 mon.openstack003@0(electing) e10 handle_probe
mon_probe(probe 71861477-db77-4fab-a8f8-10d3b16e1722 name openstack004) v6
2017-01-18 03:15:07.515625 7fc891972700 10 mon.openstack003@0(electing) e10 handle_probe_probe mon.1
10.41.41.4:6789/0mon_probe(probe 71861477-db77-4fab-a8f8-10d3b16e1722 name openstack004) v6 features
576460752032874495
2017-01-18 03:15:07.515652 7fc891972700 1 -- 10.41.41.3:6789/0 --> 10.41.41.4:6789/0 --
mon_probe(reply 71861477-db77-4fab-a8f8-10d3b16e1722 name openstack003 paxos( fc 5654529 lc 5655078
)) v6 -- ?+0 0x55b4889bf600 con 0x55b487666900
2017-01-18 03:15:09.515709 7fc891972700 1 -- 10.41.41.3:6789/0 <== mon.1 10.41.41.4:6789/0 677 ====
mon_probe(probe 71861477-db77-4fab-a8f8-10d3b16e1722 name openstack004) v6 ==== 69+0+0 (72044430 0
0) 0x55b4889bfb80 con 0x55b487666900
2017-01-18 03:15:09.515777 7fc891972700 10 mon.openstack003@0(electing) e10 handle_probe
mon_probe(probe 71861477-db77-4fab-a8f8-10d3b16e1722 name openstack004) v6
2017-01-18 03:15:09.515782 7fc891972700 10 mon.openstack003@0(electing) e10 handle_probe_probe mon.1
10.41.41.4:6789/0mon_probe(probe 71861477-db77-4fab-a8f8-10d3b16e1722 name openstack004) v6 features
576460752032874495
2017-01-18 03:15:09.515797 7fc891972700 1 -- 10.41.41.3:6789/0 --> 10.41.41.4:6789/0 --
mon_probe(reply 71861477-db77-4fab-a8f8-10d3b16e1722 name openstack003 paxos( fc 5654529 lc 5655078
)) v6 -- ?+0 0x55b4889bf8c0 con 0x55b487666900
2017-01-18 03:15:09.676118 7fc892173700 5 mon.openstack003@0(electing).elector(45) election timer
expired
------------------------------
2017-01-18 03:15:04.675296 7fc892173700 5 mon.openstack003@0(electing).elector(45) start -- can i
be leader?
2017-01-18 03:15:04.675355 7fc892173700 1 mon.openstack003@0(electing).elector(45) init, last seen
epoch 45
2017-01-18 03:15:04.675932 7fc892173700 1 -- 10.41.41.3:6789/0 --> mon.1 10.41.41.4:6789/0 --
election(71861477-db77-4fab-a8f8-10d3b16e1722 propose 45) v5 -- ?+0 0x55b488984700
2017-01-18 03:15:05.515390 7fc891972700 1 -- 10.41.41.3:6789/0 <== mon.1 10.41.41.4:6789/0 675 ====
mon_probe(probe 71861477-db77-4fab-a8f8-10d3b16e1722 name openstack004) v6 ==== 69+0+0 (72044430 0
0) 0x55b4889bf600 con 0x55b487666900
2017-01-18 03:15:05.515458 7fc891972700 10 mon.openstack003@0(electing) e10 handle_probe
mon_probe(probe 71861477-db77-4fab-a8f8-10d3b16e1722 name openstack004) v6
2017-01-18 03:15:05.515463 7fc891972700 10 mon.openstack003@0(electing) e10 handle_probe_probe mon.1
10.41.41.4:6789/0mon_probe(probe 71861477-db77-4fab-a8f8-10d3b16e1722 name openstack004) v6 features
576460752032874495
2017-01-18 03:15:05.515500 7fc891972700 1 -- 10.41.41.3:6789/0 --> 10.41.41.4:6789/0 --
mon_probe(reply 71861477-db77-4fab-a8f8-10d3b16e1722 name openstack003 paxos( fc 5654529 lc 5655078
)) v6 -- ?+0 0x55b4889bf340 con 0x55b487666900
2017-01-18 03:15:07.515552 7fc891972700 1 -- 10.41.41.3:6789/0 <== mon.1 10.41.41.4:6789/0 676 ====
mon_probe(probe 71861477-db77-4fab-a8f8-10d3b16e1722 name openstack004) v6 ==== 69+0+0 (72044430 0
0) 0x55b4889bf8c0 con 0x55b487666900
2017-01-18 03:15:07.515620 7fc891972700 10 mon.openstack003@0(electing) e10 handle_probe
mon_probe(probe 71861477-db77-4fab-a8f8-10d3b16e1722 name openstack004) v6
2017-01-18 03:15:07.515625 7fc891972700 10 mon.openstack003@0(electing) e10 handle_probe_probe mon.1
10.41.41.4:6789/0mon_probe(probe 71861477-db77-4fab-a8f8-10d3b16e1722 name openstack004) v6 features
576460752032874495
2017-01-18 03:15:07.515652 7fc891972700 1 -- 10.41.41.3:6789/0 --> 10.41.41.4:6789/0 --
mon_probe(reply 71861477-db77-4fab-a8f8-10d3b16e1722 name openstack003 paxos( fc 5654529 lc 5655078
)) v6 -- ?+0 0x55b4889bf600 con 0x55b487666900
2017-01-18 03:15:09.515709 7fc891972700 1 -- 10.41.41.3:6789/0 <== mon.1 10.41.41.4:6789/0 677 ====
mon_probe(probe 71861477-db77-4fab-a8f8-10d3b16e1722 name openstack004) v6 ==== 69+0+0 (72044430 0
0) 0x55b4889bfb80 con 0x55b487666900
2017-01-18 03:15:09.515777 7fc891972700 10 mon.openstack003@0(electing) e10 handle_probe
mon_probe(probe 71861477-db77-4fab-a8f8-10d3b16e1722 name openstack004) v6
2017-01-18 03:15:09.515782 7fc891972700 10 mon.openstack003@0(electing) e10 handle_probe_probe mon.1
10.41.41.4:6789/0mon_probe(probe 71861477-db77-4fab-a8f8-10d3b16e1722 name openstack004) v6 features
576460752032874495
2017-01-18 03:15:09.515797 7fc891972700 1 -- 10.41.41.3:6789/0 --> 10.41.41.4:6789/0 --
mon_probe(reply 71861477-db77-4fab-a8f8-10d3b16e1722 name openstack003 paxos( fc 5654529 lc 5655078
)) v6 -- ?+0 0x55b4889bf8c0 con 0x55b487666900
2017-01-18 03:15:09.676118 7fc892173700 5 mon.openstack003@0(electing).elector(45) election timer
expired
_______________________________________________
ceph-users mailing list
ceph-users@xxxxxxxxxxxxxx
http://lists.ceph.com/listinfo.cgi/ceph-users-ceph.com