Cluster with pgs in active (unclean) status

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hi all,

I have a small ceph cluster with just 2 OSDs, latest firefly.

Default data, metadata and rbd pools were created with size=3 and min_size=1
An additional pool rbd2 was created with size=2 and min_size=1

This would give me a warning status, saying that 64 pgs were active+clean and 192 active+degraded. (there are 64 pg per pool).

I realized it was due to the size=3 in the three pools, so I changed that value to 2:
# ceph osd pool set data size 2
# ceph osd pool set metadata size 2
# ceph osd pool set rbd size 2

Those 3 pools are empty. After those commands status would report 64 pgs active+clean, and 192 pgs active, with a warning saying 192 pgs were unclean.

I have created a rbd block with:
rbd create -p rbd --image test --size 1024

And now the status is:
# ceph status
    cluster 3e91b908-2af3-4288-98a5-dbb77056ecc7
health HEALTH_WARN 192 pgs stuck unclean; recovery 2/99640 objects degraded (0.002%) monmap e3: 3 mons at {0=10.0.3.3:6789/0,1=10.0.3.1:6789/0,2=10.0.3.2:6789/0}, election epoch 32, quorum 0,1,2 1,2,0
     osdmap e263: 2 osds: 2 up, 2 in
      pgmap v393763: 256 pgs, 4 pools, 194 GB data, 49820 objects
            388 GB used, 116 GB / 505 GB avail
            2/99640 objects degraded (0.002%)
                 192 active
                  64 active+clean

Looking to an unclean non-empty pg:
# ceph pg 2.14 query
{ "state": "active",
  "epoch": 263,
  "up": [
        0,
        1],
  "acting": [
        0,
        1],
  "actingbackfill": [
        "0",
        "1"],
  "info": { "pgid": "2.14",
      "last_update": "263'1",
      "last_complete": "263'1",
      "log_tail": "0'0",
      "last_user_version": 1,
      "last_backfill": "MAX",
      "purged_snaps": "[]",
      "history": { "epoch_created": 1,
          "last_epoch_started": 136,
          "last_epoch_clean": 136,
          "last_epoch_split": 0,
          "same_up_since": 135,
          "same_interval_since": 135,
          "same_primary_since": 11,
          "last_scrub": "0'0",
          "last_scrub_stamp": "2014-11-26 12:23:57.023493",
          "last_deep_scrub": "0'0",
          "last_deep_scrub_stamp": "2014-11-26 12:23:57.023493",
          "last_clean_scrub_stamp": "0.000000"},
      "stats": { "version": "263'1",
          "reported_seq": "306",
          "reported_epoch": "263",
          "state": "active",
          "last_fresh": "2014-12-10 12:53:37.766465",
          "last_change": "2014-12-10 10:32:24.189000",
          "last_active": "2014-12-10 12:53:37.766465",
          "last_clean": "0.000000",
          "last_became_active": "0.000000",
          "last_unstale": "2014-12-10 12:53:37.766465",
          "mapping_epoch": 128,
          "log_start": "0'0",
          "ondisk_log_start": "0'0",
          "created": 1,
          "last_epoch_clean": 136,
          "parent": "0.0",
          "parent_split_bits": 0,
          "last_scrub": "0'0",
          "last_scrub_stamp": "2014-11-26 12:23:57.023493",
          "last_deep_scrub": "0'0",
          "last_deep_scrub_stamp": "2014-11-26 12:23:57.023493",
          "last_clean_scrub_stamp": "0.000000",
          "log_size": 1,
          "ondisk_log_size": 1,
          "stats_invalid": "0",
          "stat_sum": { "num_bytes": 112,
              "num_objects": 1,
              "num_object_clones": 0,
              "num_object_copies": 2,
              "num_objects_missing_on_primary": 0,
              "num_objects_degraded": 1,
              "num_objects_unfound": 0,
              "num_objects_dirty": 1,
              "num_whiteouts": 0,
              "num_read": 0,
              "num_read_kb": 0,
              "num_write": 1,
              "num_write_kb": 1,
              "num_scrub_errors": 0,
              "num_shallow_scrub_errors": 0,
              "num_deep_scrub_errors": 0,
              "num_objects_recovered": 0,
              "num_bytes_recovered": 0,
              "num_keys_recovered": 0,
              "num_objects_omap": 0,
              "num_objects_hit_set_archive": 0},
          "stat_cat_sum": {},
          "up": [
                0,
                1],
          "acting": [
                0,
                1],
          "up_primary": 0,
          "acting_primary": 0},
      "empty": 0,
      "dne": 0,
      "incomplete": 0,
      "last_epoch_started": 136,
      "hit_set_history": { "current_last_update": "0'0",
          "current_last_stamp": "0.000000",
          "current_info": { "begin": "0.000000",
              "end": "0.000000",
              "version": "0'0"},
          "history": []}},
  "peer_info": [
        { "peer": "1",
          "pgid": "2.14",
          "last_update": "263'1",
          "last_complete": "263'1",
          "log_tail": "0'0",
          "last_user_version": 0,
          "last_backfill": "MAX",
          "purged_snaps": "[]",
          "history": { "epoch_created": 1,
              "last_epoch_started": 136,
              "last_epoch_clean": 136,
              "last_epoch_split": 0,
              "same_up_since": 0,
              "same_interval_since": 0,
              "same_primary_since": 0,
              "last_scrub": "0'0",
              "last_scrub_stamp": "2014-11-26 12:23:57.023493",
              "last_deep_scrub": "0'0",
              "last_deep_scrub_stamp": "2014-11-26 12:23:57.023493",
              "last_clean_scrub_stamp": "0.000000"},
          "stats": { "version": "0'0",
              "reported_seq": "0",
              "reported_epoch": "0",
              "state": "inactive",
              "last_fresh": "0.000000",
              "last_change": "0.000000",
              "last_active": "0.000000",
              "last_clean": "0.000000",
              "last_became_active": "0.000000",
              "last_unstale": "0.000000",
              "mapping_epoch": 0,
              "log_start": "0'0",
              "ondisk_log_start": "0'0",
              "created": 0,
              "last_epoch_clean": 0,
              "parent": "0.0",
              "parent_split_bits": 0,
              "last_scrub": "0'0",
              "last_scrub_stamp": "0.000000",
              "last_deep_scrub": "0'0",
              "last_deep_scrub_stamp": "0.000000",
              "last_clean_scrub_stamp": "0.000000",
              "log_size": 0,
              "ondisk_log_size": 0,
              "stats_invalid": "0",
              "stat_sum": { "num_bytes": 0,
                  "num_objects": 0,
                  "num_object_clones": 0,
                  "num_object_copies": 0,
                  "num_objects_missing_on_primary": 0,
                  "num_objects_degraded": 0,
                  "num_objects_unfound": 0,
                  "num_objects_dirty": 0,
                  "num_whiteouts": 0,
                  "num_read": 0,
                  "num_read_kb": 0,
                  "num_write": 0,
                  "num_write_kb": 0,
                  "num_scrub_errors": 0,
                  "num_shallow_scrub_errors": 0,
                  "num_deep_scrub_errors": 0,
                  "num_objects_recovered": 0,
                  "num_bytes_recovered": 0,
                  "num_keys_recovered": 0,
                  "num_objects_omap": 0,
                  "num_objects_hit_set_archive": 0},
              "stat_cat_sum": {},
              "up": [],
              "acting": [],
              "up_primary": -1,
              "acting_primary": -1},
          "empty": 0,
          "dne": 0,
          "incomplete": 0,
          "last_epoch_started": 136,
          "hit_set_history": { "current_last_update": "0'0",
              "current_last_stamp": "0.000000",
              "current_info": { "begin": "0.000000",
                  "end": "0.000000",
                  "version": "0'0"},
              "history": []}}],
  "recovery_state": [
        { "name": "Started\/Primary\/Active",
          "enter_time": "2014-12-01 17:14:33.709442",
          "might_have_unfound": [],
          "recovery_progress": { "backfill_targets": [],
              "waiting_on_backfill": [],
              "last_backfill_started": "0\/\/0\/\/-1",
              "backfill_info": { "begin": "0\/\/0\/\/-1",
                  "end": "0\/\/0\/\/-1",
                  "objects": []},
              "peer_backfill_info": [],
              "backfills_in_flight": [],
              "recovering": [],
              "pg_backend": { "pull_from_peer": [],
                  "pushing": []}},
          "scrub": { "scrubber.epoch_start": "0",
              "scrubber.active": 0,
              "scrubber.block_writes": 0,
              "scrubber.finalizing": 0,
              "scrubber.waiting_on": 0,
              "scrubber.waiting_on_whom": []}},
        { "name": "Started",
          "enter_time": "2014-12-01 17:14:32.723733"}],
  "agent_state": {}}


It seems like those 192 pgs didn't realize there's no need for a 3rd copy?

How can I fix this so that I get active+clean for all pgs?

Thanks a lot.
Eneko

--
Zuzendari Teknikoa / Director Técnico
Binovo IT Human Project, S.L.
Telf. 943575997
      943493611
Astigarraga bidea 2, planta 6 dcha., ofi. 3-2; 20180 Oiartzun (Gipuzkoa)
www.binovo.es

_______________________________________________
ceph-users mailing list
ceph-users@xxxxxxxxxxxxxx
http://lists.ceph.com/listinfo.cgi/ceph-users-ceph.com





[Index of Archives]     [Information on CEPH]     [Linux Filesystem Development]     [Ceph Development]     [Ceph Large]     [Ceph Dev]     [Linux USB Development]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]     [xfs]


  Powered by Linux