Fixing mark_unfound_lost revert failure

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hi Ceph,

In a mixed dumpling / emperor cluster, because osd 2 has been removed but is still in

          "might_have_unfound": [
                { "osd": 2,
                  "status": "osd is down"},
                { "osd": 6,
                  "status": "already probed"}],

and because of that mark_unfound_lost fails with

# ceph pg 4.46 mark_unfound_lost revert
Error EINVAL: pg has 1 objects but we haven't probed all sources, not marking lost

What would be the recommended way to fix this ?

FWIW the missing object is an XFS read error

# cp '/var/lib/ceph/osd/ceph-2/current/4.46_head/DIR_6/DIR_C/DIR_D/rbd\udata.9ad9d26b8b4567.00000000000007b1__head_0BC0BDC6__4' .
cp: reading `/var/lib/ceph/osd/ceph-2/current/4.46_head/DIR_6/DIR_C/DIR_D/rbd\\udata.9ad9d26b8b4567.00000000000007b1__head_0BC0BDC6__4': Input/output error

that is not caught by xfs_repair and I expect the older version of the object on the remaining OSD to be OK.

Cheers

osd 6 is running
# ceph --version
ceph version 0.72.2 (a913ded2ff138aefb8cb84d347d72164099cfd60)

and
osd 1 is running
# ceph --version
ceph version 0.67.4 (ad85b8bfafea6232d64cb7ba76a8b6e8252fa0c7)

# ceph pg 4.46 mark_unfound_lost revert
Error EINVAL: pg has 1 objects but we haven't probed all sources, not marking lost
# ceph pg 4.46 list_missing
{ "offset": { "oid": "",
      "key": "",
      "snapid": 0,
      "hash": 0,
      "max": 0,
      "pool": -1,
      "namespace": ""},
  "num_missing": 1,
  "num_unfound": 1,
  "objects": [
        { "oid": { "oid": "rbd_data.9ad9d26b8b4567.00000000000007b1",
              "key": "",
              "snapid": -2,
              "hash": 197180870,
              "max": 0,
              "pool": 4,
              "namespace": ""},
          "need": "328685'1233912",
          "have": "328683'1233904",
          "locations": []}],
  "more": 0}
# ceph pg 4.46 query
{ "state": "active+recovering+degraded+remapped",
  "epoch": 346424,
  "up": [
        6,
        1],
  "acting": [
        1,
        6],
  "info": { "pgid": "4.46",
      "last_update": "346424'1288927",
      "last_complete": "0'0",
      "log_tail": "328683'1233911",
      "last_backfill": "MAX",
      "purged_snaps": "[1~3]",
      "history": { "epoch_created": 195,
          "last_epoch_started": 346424,
          "last_epoch_clean": 328685,
          "last_epoch_split": 0,
          "same_up_since": 346423,
          "same_interval_since": 346423,
          "same_primary_since": 346423,
          "last_scrub": "328664'1230185",
          "last_scrub_stamp": "2014-08-23 09:34:01.524854",
          "last_deep_scrub": "328604'1208887",
          "last_deep_scrub_stamp": "2014-08-20 09:33:19.073523",
          "last_clean_scrub_stamp": "2014-08-23 09:34:01.524854"},
      "stats": { "version": "346424'1288927",
          "reported_seq": "2553234",
          "reported_epoch": "346424",
          "state": "active+recovering+degraded+remapped",
          "last_fresh": "2014-08-31 23:47:49.866548",
          "last_change": "2014-08-31 23:44:22.571492",
          "last_active": "2014-08-31 23:47:49.866548",
          "last_clean": "2014-08-23 22:27:23.391412",
          "last_became_active": "0.000000",
          "last_unstale": "2014-08-31 23:47:49.866548",
          "mapping_epoch": 346421,
          "log_start": "328683'1233911",
          "ondisk_log_start": "328683'1233911",
          "created": 195,
          "last_epoch_clean": 328685,
          "parent": "0.0",
          "parent_split_bits": 0,
          "last_scrub": "328664'1230185",
          "last_scrub_stamp": "2014-08-23 09:34:01.524854",
          "last_deep_scrub": "328604'1208887",
          "last_deep_scrub_stamp": "2014-08-20 09:33:19.073523",
          "last_clean_scrub_stamp": "2014-08-23 09:34:01.524854",
          "log_size": 55016,
          "ondisk_log_size": 55016,
          "stats_invalid": "0",
          "stat_sum": { "num_bytes": 12584300544,
              "num_objects": 3035,
              "num_object_clones": 1,
              "num_object_copies": 0,
              "num_objects_missing_on_primary": 0,
              "num_objects_degraded": 0,
              "num_objects_unfound": 0,
              "num_read": 51123,
              "num_read_kb": 1525186,
              "num_write": 1288927,
              "num_write_kb": 19076876,
              "num_scrub_errors": 0,
              "num_shallow_scrub_errors": 0,
              "num_deep_scrub_errors": 0,
              "num_objects_recovered": 20047,
              "num_bytes_recovered": 78532055040,
              "num_keys_recovered": 0},
          "stat_cat_sum": {},
          "up": [
                6,
                1],
          "acting": [
                1,
                6]},
      "empty": 0,
      "dne": 0,
      "incomplete": 0,
      "last_epoch_started": 346424},
  "recovery_state": [
        { "name": "Started\/Primary\/Active",
          "enter_time": "2014-08-31 23:44:22.435483",
          "might_have_unfound": [
                { "osd": 2,
                  "status": "osd is down"},
                { "osd": 6,
                  "status": "already probed"}],
          "recovery_progress": { "backfill_target": 6,
              "waiting_on_backfill": 0,
              "backfill_pos": "0\/\/0\/\/-1",
              "backfill_info": { "begin": "0\/\/0\/\/-1",
                  "end": "0\/\/0\/\/-1",
                  "objects": []},
              "peer_backfill_info": { "begin": "0\/\/0\/\/-1",
                  "end": "0\/\/0\/\/-1",
                  "objects": []},
              "backfills_in_flight": [],
              "pull_from_peer": [],
              "pushing": []},
          "scrub": { "scrubber.epoch_start": "0",
              "scrubber.active": 0,
              "scrubber.block_writes": 0,
              "scrubber.finalizing": 0,
              "scrubber.waiting_on": 0,
              "scrubber.waiting_on_whom": []}},
        { "name": "Started",
          "enter_time": "2014-08-31 23:44:21.177460"}]}

-- 
Lo?c Dachary, Artisan Logiciel Libre

-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 263 bytes
Desc: OpenPGP digital signature
URL: <http://lists.ceph.com/pipermail/ceph-users-ceph.com/attachments/20140901/70e33899/attachment.pgp>


[Index of Archives]     [Information on CEPH]     [Linux Filesystem Development]     [Ceph Development]     [Ceph Large]     [Ceph Dev]     [Linux USB Development]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]     [xfs]


  Powered by Linux