Hi Same problem here too 'orch apply osd --all-available-devices' cephadm 2021-09-07T09:12:34.256134+0000 mgr.POC-568.iozqlk (mgr.44107) 499 : cephadm [ERR] executing create_from_spec_one(([('POC-569', <ceph Sep 07 11:12:35 JAU-POC-569 bash[840]: mgr_module.MonCommandFailed: auth get failed: failed to find osd.0 in keyring retval: -2 Sep 07 11:12:35 JAU-POC-569 bash[840]: raise MonCommandFailed(f'{cmd_dict["prefix"]} failed: {r.stderr} retval: {r.retval}') Sep 07 11:12:35 JAU-POC-569 bash[840]: File "/usr/share/ceph/mgr/mgr_module.py", line 1262, in check_mon_command Sep 07 11:12:35 JAU-POC-569 bash[840]: 'entity': entity, Sep 07 11:12:35 JAU-POC-569 bash[840]: File "/usr/share/ceph/mgr/cephadm/services/cephadmservice.py", line 467, in get_config_and_keyring Sep 07 11:12:35 JAU-POC-569 bash[840]: extra_ceph_config=daemon_spec.ceph_conf) Sep 07 11:12:35 JAU-POC-569 bash[840]: File "/usr/share/ceph/mgr/cephadm/services/cephadmservice.py", line 425, in generate_config Sep 07 11:12:35 JAU-POC-569 bash[840]: daemon_spec.final_config, daemon_spec.deps = self.generate_config(daemon_spec) Sep 07 11:12:35 JAU-POC-569 bash[840]: File "/usr/share/ceph/mgr/cephadm/services/osd.py", line 140, in deploy_osd_daemons_for_existing_osds Sep 07 11:12:35 JAU-POC-569 bash[840]: replace_osd_ids) Sep 07 11:12:35 JAU-POC-569 bash[840]: File "/usr/share/ceph/mgr/cephadm/services/osd.py", line 93, in create_single_host Sep 07 11:12:35 JAU-POC-569 bash[840]: replace_osd_ids=osd_id_claims_for_host, env_vars=env_vars Sep 07 11:12:35 JAU-POC-569 bash[840]: File "/usr/share/ceph/mgr/cephadm/services/osd.py", line 65, in create_from_spec_one Sep 07 11:12:35 JAU-POC-569 bash[840]: return f(*arg) Sep 07 11:12:35 JAU-POC-569 bash[840]: File "/usr/share/ceph/mgr/cephadm/utils.py", line 73, in do_work Sep 07 11:12:35 JAU-POC-569 bash[840]: Traceback (most recent call last): I don't know what is the problem 'Jof Le jeu. 2 sept. 2021 à 19:30, Matthew Pounsett <matt@xxxxxxxxxxxxx> a écrit : > I'm trying to bring up a new cluster, just installed, and I'm getting > errors while trying to deploy OSDs. Of the 85 candidates found, I've > got 63 in and 0 up. > > All of the hosts were successfully added to the cluster using 'ceph > orch host add ...' , but I'm seeing things in the logs like the large > traceback pasted below. I'm also seeing one-line errors for a few of > the OSDs that look like this: > mgr_module.MonCommandFailed: auth get failed: failed to find osd.17 in > keyring retval: -2 > > I'm seeing those for OSDs 0, 1, 10, 11, 12, 14, 15, and 17. > > `ceph auth ls` does not show osd.15 (or any of the OSDs above) in its > output, but this would have been auto-discovered on one of the 8 OSD > systems added to the cluster, so I'm not sure why it should be missing > from the auth info. > > What can I look at here to find the issue? I'm quite new with Ceph, > so I'm still flailing around a bit trying to find the troubleshooting > steps. > > > Sep 2, 2021, 1:09:58 PM [ERR] Failed to apply > osd.all-available-devices spec > > DriveGroupSpec(name=all-available-devices->placement=PlacementSpec(host_pattern='*'), > service_id='all-available-devices', service_type='osd', > data_devices=DeviceSelection(all=True), osd_id_claims={}, > unmanaged=False, filter_logic='AND', preview_only=False): auth get > failed: failed to find osd.15 in keyring retval: -2 > Traceback (most recent call last): > File "/usr/share/ceph/mgr/cephadm/serve.py", line 582, in > _apply_all_services > if self._apply_service(spec): > File "/usr/share/ceph/mgr/cephadm/serve.py", line 639, in _apply_service > self.mgr.osd_service.create_from_spec(cast(DriveGroupSpec, spec)) > File "/usr/share/ceph/mgr/cephadm/services/osd.py", line 73, in > create_from_spec > ret = create_from_spec_one(self.prepare_drivegroup(drive_group)) > File "/usr/share/ceph/mgr/cephadm/utils.py", line 79, in > forall_hosts_wrapper > return CephadmOrchestrator.instance._worker_pool.map(do_work, vals) > File "/lib64/python3.6/multiprocessing/pool.py", line 266, in map > return self._map_async(func, iterable, mapstar, chunksize).get() > File "/lib64/python3.6/multiprocessing/pool.py", line 644, in get > raise self._value > File "/lib64/python3.6/multiprocessing/pool.py", line 119, in worker > result = (True, func(*args, **kwds)) > File "/lib64/python3.6/multiprocessing/pool.py", line 44, in mapstar > return list(map(*args)) > File "/usr/share/ceph/mgr/cephadm/utils.py", line 73, in do_work > return f(*arg) > File "/usr/share/ceph/mgr/cephadm/services/osd.py", line 65, in > create_from_spec_one > replace_osd_ids=osd_id_claims_for_host, env_vars=env_vars > File "/usr/share/ceph/mgr/cephadm/services/osd.py", line 93, in > create_single_host > replace_osd_ids) > File "/usr/share/ceph/mgr/cephadm/services/osd.py", line 140, in > deploy_osd_daemons_for_existing_osds > daemon_spec.final_config, daemon_spec.deps = > self.generate_config(daemon_spec) > File "/usr/share/ceph/mgr/cephadm/services/cephadmservice.py", line > 425, in generate_config > extra_ceph_config=daemon_spec.ceph_conf) > File "/usr/share/ceph/mgr/cephadm/services/cephadmservice.py", line > 467, in get_config_and_keyring > 'entity': entity, > File "/usr/share/ceph/mgr/mgr_module.py", line 1262, in check_mon_command > raise MonCommandFailed(f'{cmd_dict["prefix"]} failed: {r.stderr} > retval: {r.retval}') > mgr_module.MonCommandFailed: auth get failed: failed to find osd.15 in > keyring retval: -2 > _______________________________________________ > ceph-users mailing list -- ceph-users@xxxxxxx > To unsubscribe send an email to ceph-users-leave@xxxxxxx Le jeu. 2 sept. 2021 à 19:30, Matthew Pounsett <matt@xxxxxxxxxxxxx> a écrit : > I'm trying to bring up a new cluster, just installed, and I'm getting > errors while trying to deploy OSDs. Of the 85 candidates found, I've > got 63 in and 0 up. > > All of the hosts were successfully added to the cluster using 'ceph > orch host add ...' , but I'm seeing things in the logs like the large > traceback pasted below. I'm also seeing one-line errors for a few of > the OSDs that look like this: > mgr_module.MonCommandFailed: auth get failed: failed to find osd.17 in > keyring retval: -2 > > I'm seeing those for OSDs 0, 1, 10, 11, 12, 14, 15, and 17. > > `ceph auth ls` does not show osd.15 (or any of the OSDs above) in its > output, but this would have been auto-discovered on one of the 8 OSD > systems added to the cluster, so I'm not sure why it should be missing > from the auth info. > > What can I look at here to find the issue? I'm quite new with Ceph, > so I'm still flailing around a bit trying to find the troubleshooting > steps. > > > Sep 2, 2021, 1:09:58 PM [ERR] Failed to apply > osd.all-available-devices spec > > DriveGroupSpec(name=all-available-devices->placement=PlacementSpec(host_pattern='*'), > service_id='all-available-devices', service_type='osd', > data_devices=DeviceSelection(all=True), osd_id_claims={}, > unmanaged=False, filter_logic='AND', preview_only=False): auth get > failed: failed to find osd.15 in keyring retval: -2 > Traceback (most recent call last): > File "/usr/share/ceph/mgr/cephadm/serve.py", line 582, in > _apply_all_services > if self._apply_service(spec): > File "/usr/share/ceph/mgr/cephadm/serve.py", line 639, in _apply_service > self.mgr.osd_service.create_from_spec(cast(DriveGroupSpec, spec)) > File "/usr/share/ceph/mgr/cephadm/services/osd.py", line 73, in > create_from_spec > ret = create_from_spec_one(self.prepare_drivegroup(drive_group)) > File "/usr/share/ceph/mgr/cephadm/utils.py", line 79, in > forall_hosts_wrapper > return CephadmOrchestrator.instance._worker_pool.map(do_work, vals) > File "/lib64/python3.6/multiprocessing/pool.py", line 266, in map > return self._map_async(func, iterable, mapstar, chunksize).get() > File "/lib64/python3.6/multiprocessing/pool.py", line 644, in get > raise self._value > File "/lib64/python3.6/multiprocessing/pool.py", line 119, in worker > result = (True, func(*args, **kwds)) > File "/lib64/python3.6/multiprocessing/pool.py", line 44, in mapstar > return list(map(*args)) > File "/usr/share/ceph/mgr/cephadm/utils.py", line 73, in do_work > return f(*arg) > File "/usr/share/ceph/mgr/cephadm/services/osd.py", line 65, in > create_from_spec_one > replace_osd_ids=osd_id_claims_for_host, env_vars=env_vars > File "/usr/share/ceph/mgr/cephadm/services/osd.py", line 93, in > create_single_host > replace_osd_ids) > File "/usr/share/ceph/mgr/cephadm/services/osd.py", line 140, in > deploy_osd_daemons_for_existing_osds > daemon_spec.final_config, daemon_spec.deps = > self.generate_config(daemon_spec) > File "/usr/share/ceph/mgr/cephadm/services/cephadmservice.py", line > 425, in generate_config > extra_ceph_config=daemon_spec.ceph_conf) > File "/usr/share/ceph/mgr/cephadm/services/cephadmservice.py", line > 467, in get_config_and_keyring > 'entity': entity, > File "/usr/share/ceph/mgr/mgr_module.py", line 1262, in check_mon_command > raise MonCommandFailed(f'{cmd_dict["prefix"]} failed: {r.stderr} > retval: {r.retval}') > mgr_module.MonCommandFailed: auth get failed: failed to find osd.15 in > keyring retval: -2 > _______________________________________________ > ceph-users mailing list -- ceph-users@xxxxxxx > To unsubscribe send an email to ceph-users-leave@xxxxxxx > _______________________________________________ ceph-users mailing list -- ceph-users@xxxxxxx To unsubscribe send an email to ceph-users-leave@xxxxxxx