Hi,
--
I have two OSD and Mon nodes.
I'm going to add third osd and mon on this cluster but before I want to fix this error:
```
# ceph -s
cluster 8461e3b5-abda-4471-98c0-913e56aec890
health HEALTH_WARN
64 pgs degraded
64 pgs stuck unclean
64 pgs undersized
recovery 8261/16522 objects degraded (50.000%)
monmap e1: 2 mons at {ceph-rbx-1=172.29.20.10:6789/0,ceph-rbx-2=172.29.20.11:6789/0}
election epoch 22, quorum 0,1 ceph-rbx-1,ceph-rbx-2
osdmap e57: 2 osds: 1 up, 1 in; 64 remapped pgs
flags sortbitwise,require_jewel_osds
pgmap v784695: 64 pgs, 1 pools, 31719 MB data, 8261 objects
31539 MB used, 65692 MB / 97231 MB avail
8261/16522 objects degraded (50.000%)
64 active+undersized+degraded
client io 22038 B/s wr, 0 op/s rd, 0 op/s wr
```
I have executed this command:
```
# ceph pg ls degraded | tail -n +2 | awk '{print $1}' | xargs -n 1 ceph pg force_create_pg
```
after which I have:
```
# ceph health
HEALTH_ERR 45 pgs are stuck inactive for more than 300 seconds; 19 pgs degraded; 45 pgs stuck inactive; 19 pgs stuck unclean; 19 pgs undersized; recovery 2514/5028 objects degraded (50.000%)
```
If I look the pg detail like explain here http://docs.ceph.com/docs/infernalis/rados/troubleshooting/troubleshooting-pg/#placement-group-down-peering-failure
I have:
```
# ceph pg 0.1 query
{
"state": "active+undersized+degraded",
"snap_trimq": "[]",
"epoch": 57,
"up": [
1
],
"acting": [
1
],
"actingbackfill": [
"1"
],
"info": {
"pgid": "0.1",
"last_update": "57'32353",
"last_complete": "57'32353",
"log_tail": "42'25917",
"last_user_version": 32353,
"last_backfill": "MAX",
"last_backfill_bitwise": 0,
"purged_snaps": "[1~3]",
"history": {
"epoch_created": 1,
"last_epoch_started": 52,
"last_epoch_clean": 52,
"last_epoch_split": 0,
"last_epoch_marked_full": 0,
"same_up_since": 51,
"same_interval_since": 51,
"same_primary_since": 34,
"last_scrub": "50'28863",
"last_scrub_stamp": "2017-01-14 07:12:27.930427",
"last_deep_scrub": "42'23417",
"last_deep_scrub_stamp": "2017-01-10 20:31:12.351497",
"last_clean_scrub_stamp": "2017-01-14 07:12:27.930427"
},
"stats": {
"version": "57'32353",
"reported_seq": "31704",
"reported_epoch": "57",
"state": "active+undersized+degraded",
"last_fresh": "2017-01-16 10:47:07.330850",
"last_change": "2017-01-14 13:42:42.104820",
"last_active": "2017-01-16 10:47:07.330850",
"last_peered": "2017-01-16 10:47:07.330850",
"last_clean": "2017-01-14 11:29:21.619183",
"last_became_active": "2017-01-14 13:42:42.104820",
"last_became_peered": "2017-01-14 13:42:42.104820",
"last_unstale": "2017-01-16 10:47:07.330850",
"last_undegraded": "2017-01-14 13:42:41.066061",
"last_fullsized": "2017-01-14 13:42:41.066061",
"mapping_epoch": 37,
"log_start": "42'25917",
"ondisk_log_start": "42'25917",
"created": 1,
"last_epoch_clean": 52,
"parent": "0.0",
"parent_split_bits": 0,
"last_scrub": "50'28863",
"last_scrub_stamp": "2017-01-14 07:12:27.930427",
"last_deep_scrub": "42'23417",
"last_deep_scrub_stamp": "2017-01-10 20:31:12.351497",
"last_clean_scrub_stamp": "2017-01-14 07:12:27.930427",
"log_size": 6436,
"ondisk_log_size": 6436,
"stats_invalid": false,
"dirty_stats_invalid": false,
"omap_stats_invalid": false,
"hitset_stats_invalid": false,
"hitset_bytes_stats_invalid": false,
"pin_stats_invalid": false,
"stat_sum": {
"num_bytes": 567734272,
"num_objects": 140,
"num_object_clones": 0,
"num_object_copies": 280,
"num_objects_missing_on_primary": 0,
"num_objects_missing": 0,
"num_objects_degraded": 140,
"num_objects_misplaced": 0,
"num_objects_unfound": 0,
"num_objects_dirty": 140,
"num_whiteouts": 0,
"num_read": 5801,
"num_read_kb": 176032,
"num_write": 64516,
"num_write_kb": 1211660,
"num_scrub_errors": 0,
"num_shallow_scrub_errors": 0,
"num_deep_scrub_errors": 0,
"num_objects_recovered": 2,
"num_bytes_recovered": 8388608,
"num_keys_recovered": 0,
"num_objects_omap": 0,
"num_objects_hit_set_archive": 0,
"num_bytes_hit_set_archive": 0,
"num_flush": 0,
"num_flush_kb": 0,
"num_evict": 0,
"num_evict_kb": 0,
"num_promote": 0,
"num_flush_mode_high": 0,
"num_flush_mode_low": 0,
"num_evict_mode_some": 0,
"num_evict_mode_full": 0,
"num_objects_pinned": 0
},
"up": [
1
],
"acting": [
1
],
"blocked_by": [],
"up_primary": 1,
"acting_primary": 1
},
"empty": 0,
"dne": 0,
"incomplete": 0,
"last_epoch_started": 52,
"hit_set_history": {
"current_last_update": "0'0",
"history": []
}
},
"peer_info": [],
"recovery_state": [
{
"name": "Started\/Primary\/Active",
"enter_time": "2017-01-14 13:42:42.084021",
"might_have_unfound": [],
"recovery_progress": {
"backfill_targets": [],
"waiting_on_backfill": [],
"last_backfill_started": "MIN",
"backfill_info": {
"begin": "MIN",
"end": "MIN",
"objects": []
},
"peer_backfill_info": [],
"backfills_in_flight": [],
"recovering": [],
"pg_backend": {
"pull_from_peer": [],
"pushing": []
}
},
"scrub": {
"scrubber.epoch_start": "37",
"scrubber.active": 0,
"scrubber.state": "INACTIVE",
"scrubber.start": "MIN",
"scrubber.end": "MIN",
"scrubber.subset_last_update": "0'0",
"scrubber.deep": false,
"scrubber.seed": 0,
"scrubber.waiting_on": 0,
"scrubber.waiting_on_whom": []
}
},
{
"name": "Started",
"enter_time": "2017-01-14 13:42:41.065959"
}
],
"agent_state": {}
}
```
I don't understand what it's mean.
Now, I don't know what I need to do to fix it.
Some tips?
Best regards,
Stéphane
Stéphane Klein <contact@xxxxxxxxxxxxxxxxxxx>
blog: http://stephane-klein.info
cv : http://cv.stephane-klein.info
Twitter: http://twitter.com/klein_stephane
blog: http://stephane-klein.info
cv : http://cv.stephane-klein.info
Twitter: http://twitter.com/klein_stephane
_______________________________________________ ceph-users mailing list ceph-users@xxxxxxxxxxxxxx http://lists.ceph.com/listinfo.cgi/ceph-users-ceph.com