Set 3 osd to "out", all were on the same host and should not impact the pool because it's 3x replication and CRUSH is one osd per host.
However, now we have one PG stuck UKNOWN. Not sure why this is the case, I do have background writes going on at the time of OSD out. Thoughts?
ceph osd tree
ID CLASS WEIGHT TYPE NAME STATUS REWEIGHT PRI-AFF
-1 0.08817 root default
-5 0.02939 host hostosd1
3 hdd 0.00980 osd.3 up 1.00000 1.00000
4 hdd 0.00980 osd.4 up 1.00000 1.00000
5 hdd 0.00980 osd.5 up 1.00000 1.00000
-7 0.02939 host hostosd2
0 hdd 0.00980 osd.0 up 1.00000 1.00000
6 hdd 0.00980 osd.6 up 1.00000 1.00000
8 hdd 0.00980 osd.8 up 1.00000 1.00000
-3 0.02939 host hostosd3
1 hdd 0.00980 osd.1 up 0 1.00000
2 hdd 0.00980 osd.2 up 0 1.00000
7 hdd 0.00980 osd.7 up 0 1.00000
ceph health detail
PG_AVAILABILITY Reduced data availability: 1 pg inactive
pg 1.e2 is stuck inactive for 1885.728547, current state unknown, last acting [4,0]
ceph pg 1.e2 query
{
"state": "unknown",
"snap_trimq": "[]",
"snap_trimq_len": 0,
"epoch": 132,
"up": [
4,
0
],
"acting": [
4,
0
],
"info": {
"pgid": "1.e2",
"last_update": "34'3072",
"last_complete": "34'3072",
"log_tail": "0'0",
"last_user_version": 3072,
"last_backfill": "MAX",
"last_backfill_bitwise": 0,
"purged_snaps": [],
"history": {
"epoch_created": 29,
"epoch_pool_created": 29,
"last_epoch_started": 30,
"last_interval_started": 29,
"last_epoch_clean": 30,
"last_interval_clean": 29,
"last_epoch_split": 0,
"last_epoch_marked_full": 0,
"same_up_since": 70,
"same_interval_since": 70,
"same_primary_since": 70,
"last_scrub": "0'0",
"last_scrub_stamp": "2019-05-20 21:15:42.448125",
"last_deep_scrub": "0'0",
"last_deep_scrub_stamp": "2019-05-20 21:15:42.448125",
"last_clean_scrub_stamp": "2019-05-20 21:15:42.448125"
},
"stats": {
"version": "34'3072",
"reported_seq": "3131",
"reported_epoch": "132",
"state": "unknown",
"last_fresh": "2019-05-20 22:52:07.898135",
"last_change": "2019-05-20 22:50:46.711730",
"last_active": "2019-05-20 22:50:26.109185",
"last_peered": "2019-05-20 22:02:01.008787",
"last_clean": "2019-05-20 22:02:01.008787",
"last_became_active": "2019-05-20 21:15:43.662550",
"last_became_peered": "2019-05-20 21:15:43.662550",
"last_unstale": "2019-05-20 22:52:07.898135",
"last_undegraded": "2019-05-20 22:52:07.898135",
"last_fullsized": "2019-05-20 22:52:07.898135",
"mapping_epoch": 70,
"log_start": "0'0",
"ondisk_log_start": "0'0",
"created": 29,
"last_epoch_clean": 30,
"parent": "0.0",
"parent_split_bits": 0,
"last_scrub": "0'0",
"last_scrub_stamp": "2019-05-20 21:15:42.448125",
"last_deep_scrub": "0'0",
"last_deep_scrub_stamp": "2019-05-20 21:15:42.448125",
"last_clean_scrub_stamp": "2019-05-20 21:15:42.448125",
"log_size": 3072,
"ondisk_log_size": 3072,
"stats_invalid": false,
"dirty_stats_invalid": false,
"omap_stats_invalid": false,
"hitset_stats_invalid": false,
"hitset_bytes_stats_invalid": false,
"pin_stats_invalid": false,
"manifest_stats_invalid": false,
"snaptrimq_len": 0,
"stat_sum": {
"num_bytes": 12582912,
"num_objects": 3,
"num_object_clones": 0,
"num_object_copies": 9,
"num_objects_missing_on_primary": 0,
"num_objects_missing": 0,
"num_objects_degraded": 0,
"num_objects_misplaced": 0,
"num_objects_unfound": 0,
"num_objects_dirty": 3,
"num_whiteouts": 0,
"num_read": 0,
"num_read_kb": 0,
"num_write": 3072,
"num_write_kb": 12288,
"num_scrub_errors": 0,
"num_shallow_scrub_errors": 0,
"num_deep_scrub_errors": 0,
"num_objects_recovered": 0,
"num_bytes_recovered": 0,
"num_keys_recovered": 0,
"num_objects_omap": 0,
"num_objects_hit_set_archive": 0,
"num_bytes_hit_set_archive": 0,
"num_flush": 0,
"num_flush_kb": 0,
"num_evict": 0,
"num_evict_kb": 0,
"num_promote": 0,
"num_flush_mode_high": 0,
"num_flush_mode_low": 0,
"num_evict_mode_some": 0,
"num_evict_mode_full": 0,
"num_objects_pinned": 0,
"num_legacy_snapsets": 0,
"num_large_omap_objects": 0,
"num_objects_manifest": 0
},
"up": [
4,
0
],
"acting": [
4,
0
],
"blocked_by": [],
"up_primary": 4,
"acting_primary": 4,
"purged_snaps": []
},
"empty": 0,
"dne": 0,
"incomplete": 0,
"last_epoch_started": 30,
"hit_set_history": {
"current_last_update": "0'0",
"history": []
}
},
"peer_info": [],
"recovery_state": [
{
"name": "Started/Primary/WaitActingChange",
"enter_time": "2019-05-20 22:50:46.355495",
"comment": "waiting for pg acting set to change"
},
{
"name": "Started",
"enter_time": "2019-05-20 22:50:26.109724"
}
],
"agent_state": {}
}
_______________________________________________ ceph-users mailing list ceph-users@xxxxxxxxxxxxxx http://lists.ceph.com/listinfo.cgi/ceph-users-ceph.com