How to fix: HEALTH_ERR 45 pgs are stuck inactive for more than 300 seconds; 19 pgs degraded; 45 pgs stuck inactive; 19 pgs stuck unclean; 19 pgs undersized; recovery 2514/5028 objects degraded (50.000%)

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hi,

I have two OSD and Mon nodes.

I'm going to add third osd and mon on this cluster but before I want to fix this error:

```
# ceph -s
    cluster 8461e3b5-abda-4471-98c0-913e56aec890
     health HEALTH_WARN
            64 pgs degraded
            64 pgs stuck unclean
            64 pgs undersized
            recovery 8261/16522 objects degraded (50.000%)
     monmap e1: 2 mons at {ceph-rbx-1=172.29.20.10:6789/0,ceph-rbx-2=172.29.20.11:6789/0}
            election epoch 22, quorum 0,1 ceph-rbx-1,ceph-rbx-2
     osdmap e57: 2 osds: 1 up, 1 in; 64 remapped pgs
            flags sortbitwise,require_jewel_osds
      pgmap v784695: 64 pgs, 1 pools, 31719 MB data, 8261 objects
            31539 MB used, 65692 MB / 97231 MB avail
            8261/16522 objects degraded (50.000%)
                  64 active+undersized+degraded
  client io 22038 B/s wr, 0 op/s rd, 0 op/s wr
```

I have executed this command:

```
# ceph pg ls degraded | tail -n +2 | awk '{print $1}' | xargs -n 1 ceph pg force_create_pg
```

after which I have:

```
# ceph health
HEALTH_ERR 45 pgs are stuck inactive for more than 300 seconds; 19 pgs degraded; 45 pgs stuck inactive; 19 pgs stuck unclean; 19 pgs undersized; recovery 2514/5028 objects degraded (50.000%)
```

If I look the pg detail like explain here http://docs.ceph.com/docs/infernalis/rados/troubleshooting/troubleshooting-pg/#placement-group-down-peering-failure
I have:

```
# ceph pg 0.1 query
{
    "state": "active+undersized+degraded",
    "snap_trimq": "[]",
    "epoch": 57,
    "up": [
        1
    ],
    "acting": [
        1
    ],
    "actingbackfill": [
        "1"
    ],
    "info": {
        "pgid": "0.1",
        "last_update": "57'32353",
        "last_complete": "57'32353",
        "log_tail": "42'25917",
        "last_user_version": 32353,
        "last_backfill": "MAX",
        "last_backfill_bitwise": 0,
        "purged_snaps": "[1~3]",
        "history": {
            "epoch_created": 1,
            "last_epoch_started": 52,
            "last_epoch_clean": 52,
            "last_epoch_split": 0,
            "last_epoch_marked_full": 0,
            "same_up_since": 51,
            "same_interval_since": 51,
            "same_primary_since": 34,
            "last_scrub": "50'28863",
            "last_scrub_stamp": "2017-01-14 07:12:27.930427",
            "last_deep_scrub": "42'23417",
            "last_deep_scrub_stamp": "2017-01-10 20:31:12.351497",
            "last_clean_scrub_stamp": "2017-01-14 07:12:27.930427"
        },
        "stats": {
            "version": "57'32353",
            "reported_seq": "31704",
            "reported_epoch": "57",
            "state": "active+undersized+degraded",
            "last_fresh": "2017-01-16 10:47:07.330850",
            "last_change": "2017-01-14 13:42:42.104820",
            "last_active": "2017-01-16 10:47:07.330850",
            "last_peered": "2017-01-16 10:47:07.330850",
            "last_clean": "2017-01-14 11:29:21.619183",
            "last_became_active": "2017-01-14 13:42:42.104820",
            "last_became_peered": "2017-01-14 13:42:42.104820",
            "last_unstale": "2017-01-16 10:47:07.330850",
            "last_undegraded": "2017-01-14 13:42:41.066061",
            "last_fullsized": "2017-01-14 13:42:41.066061",
            "mapping_epoch": 37,
            "log_start": "42'25917",
            "ondisk_log_start": "42'25917",
            "created": 1,
            "last_epoch_clean": 52,
            "parent": "0.0",
            "parent_split_bits": 0,
            "last_scrub": "50'28863",
            "last_scrub_stamp": "2017-01-14 07:12:27.930427",
            "last_deep_scrub": "42'23417",
            "last_deep_scrub_stamp": "2017-01-10 20:31:12.351497",
            "last_clean_scrub_stamp": "2017-01-14 07:12:27.930427",
            "log_size": 6436,
            "ondisk_log_size": 6436,
            "stats_invalid": false,
            "dirty_stats_invalid": false,
            "omap_stats_invalid": false,
            "hitset_stats_invalid": false,
            "hitset_bytes_stats_invalid": false,
            "pin_stats_invalid": false,
            "stat_sum": {
                "num_bytes": 567734272,
                "num_objects": 140,
                "num_object_clones": 0,
                "num_object_copies": 280,
                "num_objects_missing_on_primary": 0,
                "num_objects_missing": 0,
                "num_objects_degraded": 140,
                "num_objects_misplaced": 0,
                "num_objects_unfound": 0,
                "num_objects_dirty": 140,
                "num_whiteouts": 0,
                "num_read": 5801,
                "num_read_kb": 176032,
                "num_write": 64516,
                "num_write_kb": 1211660,
                "num_scrub_errors": 0,
                "num_shallow_scrub_errors": 0,
                "num_deep_scrub_errors": 0,
                "num_objects_recovered": 2,
                "num_bytes_recovered": 8388608,
                "num_keys_recovered": 0,
                "num_objects_omap": 0,
                "num_objects_hit_set_archive": 0,
                "num_bytes_hit_set_archive": 0,
                "num_flush": 0,
                "num_flush_kb": 0,
                "num_evict": 0,
                "num_evict_kb": 0,
                "num_promote": 0,
                "num_flush_mode_high": 0,
                "num_flush_mode_low": 0,
                "num_evict_mode_some": 0,
                "num_evict_mode_full": 0,
                "num_objects_pinned": 0
            },
            "up": [
                1
            ],
            "acting": [
                1
            ],
            "blocked_by": [],
            "up_primary": 1,
            "acting_primary": 1
        },
        "empty": 0,
        "dne": 0,
        "incomplete": 0,
        "last_epoch_started": 52,
        "hit_set_history": {
            "current_last_update": "0'0",
            "history": []
        }
    },
    "peer_info": [],
    "recovery_state": [
        {
            "name": "Started\/Primary\/Active",
            "enter_time": "2017-01-14 13:42:42.084021",
            "might_have_unfound": [],
            "recovery_progress": {
                "backfill_targets": [],
                "waiting_on_backfill": [],
                "last_backfill_started": "MIN",
                "backfill_info": {
                    "begin": "MIN",
                    "end": "MIN",
                    "objects": []
                },
                "peer_backfill_info": [],
                "backfills_in_flight": [],
                "recovering": [],
                "pg_backend": {
                    "pull_from_peer": [],
                    "pushing": []
                }
            },
            "scrub": {
                "scrubber.epoch_start": "37",
                "scrubber.active": 0,
                "scrubber.state": "INACTIVE",
                "scrubber.start": "MIN",
                "scrubber.end": "MIN",
                "scrubber.subset_last_update": "0'0",
                "scrubber.deep": false,
                "scrubber.seed": 0,
                "scrubber.waiting_on": 0,
                "scrubber.waiting_on_whom": []
            }
        },
        {
            "name": "Started",
            "enter_time": "2017-01-14 13:42:41.065959"
        }
    ],
    "agent_state": {}
}
```

I don't understand what it's mean.

Now, I don't know what I need to do to fix it.

Some tips?

Best regards,
Stéphane

--
_______________________________________________
ceph-users mailing list
ceph-users@xxxxxxxxxxxxxx
http://lists.ceph.com/listinfo.cgi/ceph-users-ceph.com

[Index of Archives]     [Information on CEPH]     [Linux Filesystem Development]     [Ceph Development]     [Ceph Large]     [Ceph Dev]     [Linux USB Development]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]     [xfs]


  Powered by Linux