Failing to create monitor in a working cluster.

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hello, i've been running a 3 node proxmox cluster with 4 ceph osd for 3 years as a production cluster.
As a test for trying to move ceph cluster network, i destroyed one of the 3 working monitors and tried to recreate it.
After destroying it, the new monitor refuses to join the cluster, even in the old network. I've tried all steps in documentation "Troubleshooting monitors" section.

New monitor has this config extracted from ceph --admin-daemon file.asok 
{
    "name": "n3ceph",
    "rank": -1,
    "state": "probing",
    "election_epoch": 0,
    "quorum": [],
    "features": {
        "required_con": "2449958197560098820",
        "required_mon": [
            "kraken",
            "luminous",
            "mimic",
            "osdmap-prune",
            "nautilus",
            "octopus",
            "pacific",
            "elector-pinging"
        ],
        "quorum_con": "0",
        "quorum_mon": []
    },
    "outside_quorum": [],
    "extra_probe_peers": [],
    "sync_provider": [],
    "monmap": {
        "epoch": 6,
        "fsid": "5e60d0bb-33b4-42db-bbe7-7032c35ee605",
        "modified": "2023-03-31T11:54:44.616569Z",
        "created": "2019-12-02T13:50:38.097448Z",
        "min_mon_release": 16,
        "min_mon_release_name": "pacific",
        "election_strategy": 1,
        "disallowed_leaders: ": "",
        "stretch_mode": false,
        "tiebreaker_mon": "",
        "removed_ranks: ": "1",
        "features": {
            "persistent": [
                "kraken",
                "luminous",
                "mimic",
                "osdmap-prune",
                "nautilus",
                "octopus",
                "pacific",
                "elector-pinging"
            ],
            "optional": []
        },
        "mons": [
            {
                "rank": 0,
                "name": "node1",
                "public_addrs": {
                    "addrvec": [
                        {
                            "type": "v2",
                            "addr": "10.100.100.1:3300",
                            "nonce": 0
                        },
                        {
                            "type": "v1",
                            "addr": "10.100.100.1:6789",
                            "nonce": 0
                        }
                    ]
                },
                "addr": "10.100.100.1:6789/0",
                "public_addr": "10.100.100.1:6789/0",
                "priority": 0,
                "weight": 0,
                "crush_location": "{}"
            },
            {
                "rank": 1,
                "name": "node2",
                "public_addrs": {
                    "addrvec": [
                        {
                            "type": "v2",
                            "addr": "10.100.100.2:3300",
                            "nonce": 0
                        },
                        {
                            "type": "v1",
                            "addr": "10.100.100.2:6789",
                            "nonce": 0
                        }
                    ]
                },
                "addr": "10.100.100.2:6789/0",
                "public_addr": "10.100.100.2:6789/0",
                "priority": 0,
                "weight": 0,
                "crush_location": "{}"
            }
        ]
    },
    "feature_map": {
        "mon": [
            {
                "features": "0x3f01cfbdfffdffff",
                "release": "luminous",
                "num": 1
            }
        ]
    },
    "stretch_mode": false
}

The quorum mon stat is as follows:
{
    "name": "node1",
    "rank": 0,
    "state": "leader",
    "election_epoch": 340,
    "quorum": [
        0,
        1
    ],
    "quorum_age": 13090,
    "features": {
        "required_con": "2449958747317026820",
        "required_mon": [
            "kraken",
            "luminous",
            "mimic",
            "osdmap-prune",
            "nautilus",
            "octopus",
            "pacific",
            "elector-pinging"
        ],
        "quorum_con": "4540138314316775423",
        "quorum_mon": [
            "kraken",
            "luminous",
            "mimic",
            "osdmap-prune",
            "nautilus",
            "octopus",
            "pacific",
            "elector-pinging"
        ]
    },
    "outside_quorum": [],
    "extra_probe_peers": [],
    "sync_provider": [],
    "monmap": {
        "epoch": 6,
        "fsid": "5e60d0bb-33b4-42db-bbe7-7032c35ee605",
        "modified": "2023-03-31T11:54:44.616569Z",
        "created": "2019-12-02T13:50:38.097448Z",
        "min_mon_release": 16,
        "min_mon_release_name": "pacific",
        "election_strategy": 1,
        "disallowed_leaders: ": "",
        "stretch_mode": false,
        "tiebreaker_mon": "",
        "removed_ranks: ": "1",
        "features": {
            "persistent": [
                "kraken",
                "luminous",
                "mimic",
                "osdmap-prune",
                "nautilus",
                "octopus",
                "pacific",
                "elector-pinging"
            ],
            "optional": []
        },
        "mons": [
            {
                "rank": 0,
                "name": "node1",
                "public_addrs": {
                    "addrvec": [
                        {
                            "type": "v2",
                            "addr": "10.100.100.1:3300",
                            "nonce": 0
                        },
                        {
                            "type": "v1",
                            "addr": "10.100.100.1:6789",
                            "nonce": 0
                        }
                    ]
                },
                "addr": "10.100.100.1:6789/0",
                "public_addr": "10.100.100.1:6789/0",
                "priority": 0,
                "weight": 0,
                "crush_location": "{}"
            },
            {
                "rank": 1,
                "name": "node2",
                "public_addrs": {
                    "addrvec": [
                        {
                            "type": "v2",
                            "addr": "10.100.100.2:3300",
                            "nonce": 0
                        },
                        {
                            "type": "v1",
                            "addr": "10.100.100.2:6789",
                            "nonce": 0
                        }
                    ]
                },
                "addr": "10.100.100.2:6789/0",
                "public_addr": "10.100.100.2:6789/0",
                "priority": 0,
                "weight": 0,
                "crush_location": "{}"
            }
        ]
    },
    "feature_map": {
        "mon": [
            {
                "features": "0x3f01cfbdfffdffff",
                "release": "luminous",
                "num": 1
            }
        ],
        "osd": [
            {
                "features": "0x3f01cfbdfffdffff",
                "release": "luminous",
                "num": 5
            }
        ],
        "client": [
            {
                "features": "0x2f018fb87aa4aafe",
                "release": "luminous",
                "num": 1
            },
            {
                "features": "0x3f01cfbdfffdffff",
                "release": "luminous",
                "num": 12
            }
        ],
        "mgr": [
            {
                "features": "0x3f01cfbdfffdffff",
                "release": "luminous",
                "num": 1
            }
        ]
    },
    "stretch_mode": false

I tried to get a debug log with ceph daemon mon.n3ceph config set debug_mon 10/10
 and restarting the service, but the ceph log file stoped working after i tried that setting.

journalctl -u tells me:
mar 31 17:35:22 node3 ceph-mon[240916]: 2023-03-31T17:35:22.926+0200 7f49e0699700 -1 mon.n3ceph@-1(probing) e6 get_health_metrics reporting 4 slow ops, oldest is log(1 entries from seq 1 at 2023-03-31T17:30:19.347379+0200)
mar 31 17:35:27 node3 ceph-mon[240916]: 2023-03-31T17:35:27.926+0200 7f49e0699700 -1 mon.n3ceph@-1(probing) e6 get_health_metrics reporting 4 slow ops, oldest is log(1 entries from seq 1 at 2023-03-31T17:30:19.347379+0200)
mar 31 17:35:32 node3 ceph-mon[240916]: 2023-03-31T17:35:32.926+0200 7f49e0699700 -1 mon.n3ceph@-1(probing) e6 get_health_metrics reporting 4 slow ops, oldest is log(1 entries from seq 1 at 2023-03-31T17:30:19.347379+0200).

Any ideas? Cluster is running fine with two monitors, but a reboot in one of the nodes might be a big problem.
Kind regards and many thanks.
_______________________________________________
ceph-users mailing list -- ceph-users@xxxxxxx
To unsubscribe send an email to ceph-users-leave@xxxxxxx



[Index of Archives]     [Information on CEPH]     [Linux Filesystem Development]     [Ceph Development]     [Ceph Large]     [Ceph Dev]     [Linux USB Development]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]     [xfs]


  Powered by Linux