Inconsistent pgs after update to 0.73 - > 0.74

Mark Kirkwood <mark.kirkwood@xxxxxxxxxxxxxxx> · Fri, 10 Jan 2014 15:36:03 +1300

I've noticed this on 2 (development) clusters that I have with pools 
having size 1. I guess my first question would be - is this expected?

Here's some detail from one of the clusters:

$ ceph -v
ceph version 0.74-621-g6fac2ac (6fac2acc5e6f77651ffcd7dc7aa833713517d8a6)

$ ceph osd dump
epoch 104
fsid 4e8548e8-dfe4-46d0-a2e8-fb4a9beadff2
created 2013-11-08 11:01:57.051773
modified 2014-01-10 14:55:07.353514
flags

pool 0 'data' replicated size 1 min_size 1 crush_ruleset 0 object_hash 
rjenkins pg_num 64 pgp_num 64 last_change 47 owner 0 
crash_replay_interval 45
pool 1 'metadata' replicated size 1 min_size 1 crush_ruleset 1 
object_hash rjenkins pg_num 64 pgp_num 64 last_change 49 owner 0
pool 2 'rbd' replicated size 1 min_size 1 crush_ruleset 2 object_hash 
rjenkins pg_num 64 pgp_num 64 last_change 51 owner 0

max_osd 2
osd.0 up   in  weight 1 up_from 102 up_thru 102 down_at 101 
last_clean_interval [97,100) 192.168.2.63:6800/27931 
192.168.2.63:6801/27931 192.168.2.63:6802/27931 192.168.2.63:6803/27931 
exists,up 3c23570a-8a46-4ff2-9dab-cbbe82138bf7
osd.1 up   in  weight 1 up_from 103 up_thru 103 down_at 100 
last_clean_interval [98,99) 192.168.2.63:6805/28070 
192.168.2.63:6806/28070 192.168.2.63:6807/28070 192.168.2.63:6808/28070 
exists,up 73470b19-cd55-4881-a070-55efa06f3df3

$ ceph -s
    cluster 4e8548e8-dfe4-46d0-a2e8-fb4a9beadff2
     health HEALTH_ERR 62 pgs inconsistent; 62 scrub errors; crush map 
has non-optimal tunables
     monmap e1: 1 mons at {vedavec=192.168.2.63:6789/0}, election epoch 
1, quorum 0 vedavec
     osdmap e104: 2 osds: 2 up, 2 in
      pgmap v4101: 192 pgs, 3 pools, 9691 MB data, 2439 objects
            9833 MB used, 179 GB / 198 GB avail
                 130 active+clean
                  62 active+clean+inconsistent

There are no dmesg errors, and performing a repair on each inconsistent 
pg removes the inconsistent state. I note with this cluster above that I 
have non optimal tunables, however (I think) I sorted that on the other 
one, which had no effect on the inconsistent pgs.

Here's a query from one inconsistent pg:

$ ceph pg 2.3f query
{ "state": "active+clean+inconsistent",
  "epoch": 104,
  "up": [
        0],
  "acting": [
        0],
  "actingbackfill": [
        0],
  "info": { "pgid": "2.3f",
      "last_update": "57'195468",
      "last_complete": "57'195468",
      "log_tail": "57'192467",
      "last_user_version": 195468,
      "last_backfill": "MAX",
      "purged_snaps": "[]",
      "history": { "epoch_created": 1,
          "last_epoch_started": 103,
          "last_epoch_clean": 103,
          "last_epoch_split": 0,
          "same_up_since": 102,
          "same_interval_since": 102,
          "same_primary_since": 102,
          "last_scrub": "57'195468",
          "last_scrub_stamp": "2014-01-10 14:59:23.195403",
          "last_deep_scrub": "57'195468",
          "last_deep_scrub_stamp": "2014-01-08 11:48:47.559227",
          "last_clean_scrub_stamp": "2013-12-13 15:38:13.283741"},
      "stats": { "version": "57'195468",
          "reported_seq": "531759",
          "reported_epoch": "104",
          "state": "active+clean+inconsistent",
          "last_fresh": "2014-01-10 14:59:23.195452",
          "last_change": "2014-01-10 14:59:23.195452",
          "last_active": "2014-01-10 14:59:23.195452",
          "last_clean": "2014-01-10 14:59:23.195452",
          "last_became_active": "0.000000",
          "last_unstale": "2014-01-10 14:59:23.195452",
          "mapping_epoch": 101,
          "log_start": "57'192467",
          "ondisk_log_start": "57'192467",
          "created": 1,
          "last_epoch_clean": 103,
          "parent": "0.0",
          "parent_split_bits": 0,
          "last_scrub": "57'195468",
          "last_scrub_stamp": "2014-01-10 14:59:23.195403",
          "last_deep_scrub": "57'195468",
          "last_deep_scrub_stamp": "2014-01-08 11:48:47.559227",
          "last_clean_scrub_stamp": "2013-12-13 15:38:13.283741",
          "log_size": 3001,
          "ondisk_log_size": 3001,
          "stats_invalid": "0",
          "stat_sum": { "num_bytes": 180355088,
              "num_objects": 44,
              "num_object_clones": 0,
              "num_object_copies": 44,
              "num_objects_missing_on_primary": 0,
              "num_objects_degraded": 0,
              "num_objects_unfound": 0,
              "num_objects_dirty": 0,
              "num_whiteouts": 0,
              "num_read": 143836,
              "num_read_kb": 3135807,
              "num_write": 195467,
              "num_write_kb": 2994821,
              "num_scrub_errors": 1,
              "num_shallow_scrub_errors": 1,
              "num_deep_scrub_errors": 0,
              "num_objects_recovered": 0,
              "num_bytes_recovered": 0,
              "num_keys_recovered": 0},
          "stat_cat_sum": {},
          "up": [
                0],
          "acting": [
                0]},
      "empty": 0,
      "dne": 0,
      "incomplete": 0,
      "last_epoch_started": 103,
      "hit_set_history": { "current_last_update": "0'0",
          "current_last_stamp": "0.000000",
          "current_info": { "begin": "0.000000",
              "end": "0.000000",
              "version": "0'0"},
          "history": []}},
  "peer_info": [],
  "recovery_state": [
        { "name": "Started\/Primary\/Active",
          "enter_time": "2014-01-10 14:55:06.318015",
          "might_have_unfound": [],
          "recovery_progress": { "backfill_target": -1,
              "waiting_on_backfill": 0,
              "last_backfill_started": "0\/\/0\/\/-1",
              "backfill_info": { "begin": "0\/\/0\/\/-1",
                  "end": "0\/\/0\/\/-1",
                  "objects": []},
              "peer_backfill_info": { "begin": "0\/\/0\/\/-1",
                  "end": "0\/\/0\/\/-1",
                  "objects": []},
              "backfills_in_flight": [],
              "recovering": [],
              "pg_backend": { "pull_from_peer": [],
                  "pushing": []}},
          "scrub": { "scrubber.epoch_start": "102",
              "scrubber.active": 0,
              "scrubber.block_writes": 0,
              "scrubber.finalizing": 0,
              "scrubber.waiting_on": 0,
              "scrubber.waiting_on_whom": []}},
        { "name": "Started",
          "enter_time": "2014-01-10 14:55:05.255392"}]}

regards

Mark

_______________________________________________
ceph-users mailing list
ceph-users@xxxxxxxxxxxxxx
http://lists.ceph.com/listinfo.cgi/ceph-users-ceph.com