Hi All... Today we had a warning regarding 8 near full osd. Looking to the osds occupation, 3 of them were above 90%. In order to solve the situation, I've decided to reweigh those first using ceph osd crush reweight osd.1 2.67719 ceph osd crush reweight osd.26 2.67719 ceph osd crush reweight osd.53 2.67719 Please note that I've started with a very conservative step since the original weight for all osds was 2.72710. After some rebalancing (which has now stopped) I've seen that the
cluster is currently in the following state # ceph health detailcrush map has legacy tunables (require bobtail, min is firefly); see http://ceph.com/docs/master/rados/operations/crush-map/#tunables Not sure if it is worthwhile to mention, but after upgrading to
Jewel, our cluster shows the warnings regarding tunables. We still
have not migrated to the optimal tunables because the cluster will
be very actively used during the 3 next weeks ( due to one of the
main conference in our area) and we prefer to do that migration
after this peak period, I am unsure what happen during the rebalacing but the mapping of these 4 stuck pgs seems strange, namely the up and acting osds are different. # ceph pg dump_stuck uncleanTo complete this information, I am also sending the output of pg query for one of these problematic pgs (ceph pg 5.306 query) after this email. What should be the procedure to try to recover those PGS before continuing with the reweighing? Than you in advance Goncalo # ceph pg 5.306 query { "state": "active+remapped+backfill_toofull", "snap_trimq": "[]", "epoch": 1054, "up": [ 44, 60, 26 ], "acting": [ 44, 7, 59 ], "backfill_targets": [ "26", "60" ], "actingbackfill": [ "7", "26", "44", "59", "60" ], "info": { "pgid": "5.306", "last_update": "1005'55174", "last_complete": "1005'55174", "log_tail": "1005'52106", "last_user_version": 55174, "last_backfill": "MAX", "last_backfill_bitwise": 0, "purged_snaps": "[]", "history": { "epoch_created": 339, "last_epoch_started": 1016, "last_epoch_clean": 996, "last_epoch_split": 0, "last_epoch_marked_full": 0, "same_up_since": 1015, "same_interval_since": 1015, "same_primary_since": 928, "last_scrub": "1005'55169", "last_scrub_stamp": "2016-07-19 14:31:45.790871", "last_deep_scrub": "1005'55169", "last_deep_scrub_stamp": "2016-07-19 14:31:45.790871", "last_clean_scrub_stamp": "2016-07-19 14:31:45.790871" }, "stats": { "version": "1005'55174", "reported_seq": "39726", "reported_epoch": "1049", "state": "active+remapped+backfill_toofull", "last_fresh": "2016-07-20 01:55:43.224525", "last_change": "2016-07-20 00:47:02.045700", "last_active": "2016-07-20 01:55:43.224525", "last_peered": "2016-07-20 01:55:43.224525", "last_clean": "2016-07-20 00:34:36.197721", "last_became_active": "2016-07-20 00:36:21.224010", "last_became_peered": "2016-07-20 00:36:21.224010", "last_unstale": "2016-07-20 01:55:43.224525", "last_undegraded": "2016-07-20 01:55:43.224525", "last_fullsized": "2016-07-20 01:55:43.224525", "mapping_epoch": 995, "log_start": "1005'52106", "ondisk_log_start": "1005'52106", "created": 339, "last_epoch_clean": 996, "parent": "0.0", "parent_split_bits": 0, "last_scrub": "1005'55169", "last_scrub_stamp": "2016-07-19 14:31:45.790871", "last_deep_scrub": "1005'55169", "last_deep_scrub_stamp": "2016-07-19 14:31:45.790871", "last_clean_scrub_stamp": "2016-07-19 14:31:45.790871", "log_size": 3068, "ondisk_log_size": 3068, "stats_invalid": false, "dirty_stats_invalid": false, "omap_stats_invalid": false, "hitset_stats_invalid": false, "hitset_bytes_stats_invalid": false, "pin_stats_invalid": true, "stat_sum": { "num_bytes": 0, "num_objects": 230, "num_object_clones": 0, "num_object_copies": 1150, "num_objects_missing_on_primary": 0, "num_objects_missing": 0, "num_objects_degraded": 0, "num_objects_misplaced": 920, "num_objects_unfound": 0, "num_objects_dirty": 230, "num_whiteouts": 0, "num_read": 12454, "num_read_kb": 217518, "num_write": 55524, "num_write_kb": 228743, "num_scrub_errors": 0, "num_shallow_scrub_errors": 0, "num_deep_scrub_errors": 0, "num_objects_recovered": 0, "num_bytes_recovered": 0, "num_keys_recovered": 0, "num_objects_omap": 230, "num_objects_hit_set_archive": 0, "num_bytes_hit_set_archive": 0, "num_flush": 0, "num_flush_kb": 0, "num_evict": 0, "num_evict_kb": 0, "num_promote": 0, "num_flush_mode_high": 0, "num_flush_mode_low": 0, "num_evict_mode_some": 0, "num_evict_mode_full": 0, "num_objects_pinned": 0 }, "up": [ 44, 60, 26 ], "acting": [ 44, 7, 59 ], "blocked_by": [], "up_primary": 44, "acting_primary": 44 }, "empty": 0, "dne": 0, "incomplete": 0, "last_epoch_started": 1016, "hit_set_history": { "current_last_update": "0'0", "history": [] } }, "peer_info": [ { "peer": "7", "pgid": "5.306", "last_update": "1005'55174", "last_complete": "1005'55174", "log_tail": "1005'52106", "last_user_version": 55174, "last_backfill": "MAX", "last_backfill_bitwise": 0, "purged_snaps": "[]", "history": { "epoch_created": 339, "last_epoch_started": 1016, "last_epoch_clean": 996, "last_epoch_split": 0, "last_epoch_marked_full": 0, "same_up_since": 1015, "same_interval_since": 1015, "same_primary_since": 928, "last_scrub": "1005'55169", "last_scrub_stamp": "2016-07-19 14:31:45.790871", "last_deep_scrub": "1005'55169", "last_deep_scrub_stamp": "2016-07-19 14:31:45.790871", "last_clean_scrub_stamp": "2016-07-19 14:31:45.790871" }, "stats": { "version": "1005'55173", "reported_seq": "39711", "reported_epoch": "1005", "state": "active+clean", "last_fresh": "2016-07-19 16:44:02.213143", "last_change": "2016-07-19 14:31:45.791194", "last_active": "2016-07-19 16:44:02.213143", "last_peered": "2016-07-19 16:44:02.213143", "last_clean": "2016-07-19 16:44:02.213143", "last_became_active": "2016-06-27 04:57:38.897948", "last_became_peered": "2016-06-27 04:57:38.897948", "last_unstale": "2016-07-19 16:44:02.213143", "last_undegraded": "2016-07-19 16:44:02.213143", "last_fullsized": "2016-07-19 16:44:02.213143", "mapping_epoch": 995, "log_start": "1005'52106", "ondisk_log_start": "1005'52106", "created": 339, "last_epoch_clean": 996, "parent": "0.0", "parent_split_bits": 0, "last_scrub": "1005'55169", "last_scrub_stamp": "2016-07-19 14:31:45.790871", "last_deep_scrub": "1005'55169", "last_deep_scrub_stamp": "2016-07-19 14:31:45.790871", "last_clean_scrub_stamp": "2016-07-19 14:31:45.790871", "log_size": 3067, "ondisk_log_size": 3067, "stats_invalid": false, "dirty_stats_invalid": false, "omap_stats_invalid": false, "hitset_stats_invalid": false, "hitset_bytes_stats_invalid": false, "pin_stats_invalid": true, "stat_sum": { "num_bytes": 0, "num_objects": 230, "num_object_clones": 0, "num_object_copies": 693, "num_objects_missing_on_primary": 0, "num_objects_missing": 0, "num_objects_degraded": 0, "num_objects_misplaced": 0, "num_objects_unfound": 0, "num_objects_dirty": 230, "num_whiteouts": 0, "num_read": 12454, "num_read_kb": 217518, "num_write": 55524, "num_write_kb": 228743, "num_scrub_errors": 0, "num_shallow_scrub_errors": 0, "num_deep_scrub_errors": 0, "num_objects_recovered": 0, "num_bytes_recovered": 0, "num_keys_recovered": 0, "num_objects_omap": 230, "num_objects_hit_set_archive": 0, "num_bytes_hit_set_archive": 0, "num_flush": 0, "num_flush_kb": 0, "num_evict": 0, "num_evict_kb": 0, "num_promote": 0, "num_flush_mode_high": 0, "num_flush_mode_low": 0, "num_evict_mode_some": 0, "num_evict_mode_full": 0, "num_objects_pinned": 0 }, "up": [ 44, 60, 26 ], "acting": [ 44, 7, 59 ], "blocked_by": [], "up_primary": 44, "acting_primary": 44 }, "empty": 0, "dne": 0, "incomplete": 0, "last_epoch_started": 1016, "hit_set_history": { "current_last_update": "0'0", "history": [] } }, { "peer": "26", "pgid": "5.306", "last_update": "1005'55174", "last_complete": "1005'55174", "log_tail": "1005'52174", "last_user_version": 0, "last_backfill": "MIN", "last_backfill_bitwise": 1, "purged_snaps": "[]", "history": { "epoch_created": 339, "last_epoch_started": 1016, "last_epoch_clean": 996, "last_epoch_split": 0, "last_epoch_marked_full": 0, "same_up_since": 1015, "same_interval_since": 1015, "same_primary_since": 928, "last_scrub": "1005'55169", "last_scrub_stamp": "2016-07-19 14:31:45.790871", "last_deep_scrub": "1005'55169", "last_deep_scrub_stamp": "2016-07-19 14:31:45.790871", "last_clean_scrub_stamp": "2016-07-19 14:31:45.790871" }, "stats": { "version": "0'0", "reported_seq": "0", "reported_epoch": "0", "state": "inactive", "last_fresh": "0.000000", "last_change": "0.000000", "last_active": "0.000000", "last_peered": "0.000000", "last_clean": "0.000000", "last_became_active": "0.000000", "last_became_peered": "0.000000", "last_unstale": "0.000000", "last_undegraded": "0.000000", "last_fullsized": "0.000000", "mapping_epoch": 0, "log_start": "0'0", "ondisk_log_start": "0'0", "created": 0, "last_epoch_clean": 0, "parent": "0.0", "parent_split_bits": 0, "last_scrub": "0'0", "last_scrub_stamp": "0.000000", "last_deep_scrub": "0'0", "last_deep_scrub_stamp": "0.000000", "last_clean_scrub_stamp": "0.000000", "log_size": 0, "ondisk_log_size": 0, "stats_invalid": false, "dirty_stats_invalid": false, "omap_stats_invalid": false, "hitset_stats_invalid": false, "hitset_bytes_stats_invalid": false, "pin_stats_invalid": false, "stat_sum": { "num_bytes": 0, "num_objects": 0, "num_object_clones": 0, "num_object_copies": 0, "num_objects_missing_on_primary": 0, "num_objects_missing": 0, "num_objects_degraded": 0, "num_objects_misplaced": 0, "num_objects_unfound": 0, "num_objects_dirty": 0, "num_whiteouts": 0, "num_read": 0, "num_read_kb": 0, "num_write": 0, "num_write_kb": 0, "num_scrub_errors": 0, "num_shallow_scrub_errors": 0, "num_deep_scrub_errors": 0, "num_objects_recovered": 0, "num_bytes_recovered": 0, "num_keys_recovered": 0, "num_objects_omap": 0, "num_objects_hit_set_archive": 0, "num_bytes_hit_set_archive": 0, "num_flush": 0, "num_flush_kb": 0, "num_evict": 0, "num_evict_kb": 0, "num_promote": 0, "num_flush_mode_high": 0, "num_flush_mode_low": 0, "num_evict_mode_some": 0, "num_evict_mode_full": 0, "num_objects_pinned": 0 }, "up": [], "acting": [], "blocked_by": [], "up_primary": -1, "acting_primary": -1 }, "empty": 0, "dne": 0, "incomplete": 1, "last_epoch_started": 1016, "hit_set_history": { "current_last_update": "0'0", "history": [] } }, { "peer": "59", "pgid": "5.306", "last_update": "1005'55174", "last_complete": "1005'55174", "log_tail": "1005'52106", "last_user_version": 55174, "last_backfill": "MAX", "last_backfill_bitwise": 0, "purged_snaps": "[]", "history": { "epoch_created": 339, "last_epoch_started": 1016, "last_epoch_clean": 996, "last_epoch_split": 0, "last_epoch_marked_full": 0, "same_up_since": 1015, "same_interval_since": 1015, "same_primary_since": 928, "last_scrub": "1005'55169", "last_scrub_stamp": "2016-07-19 14:31:45.790871", "last_deep_scrub": "1005'55169", "last_deep_scrub_stamp": "2016-07-19 14:31:45.790871", "last_clean_scrub_stamp": "2016-07-19 14:31:45.790871" }, "stats": { "version": "1005'55173", "reported_seq": "39711", "reported_epoch": "1005", "state": "active+clean", "last_fresh": "2016-07-19 16:44:02.213143", "last_change": "2016-07-19 14:31:45.791194", "last_active": "2016-07-19 16:44:02.213143", "last_peered": "2016-07-19 16:44:02.213143", "last_clean": "2016-07-19 16:44:02.213143", "last_became_active": "2016-06-27 04:57:38.897948", "last_became_peered": "2016-06-27 04:57:38.897948", "last_unstale": "2016-07-19 16:44:02.213143", "last_undegraded": "2016-07-19 16:44:02.213143", "last_fullsized": "2016-07-19 16:44:02.213143", "mapping_epoch": 995, "log_start": "1005'52106", "ondisk_log_start": "1005'52106", "created": 339, "last_epoch_clean": 996, "parent": "0.0", "parent_split_bits": 0, "last_scrub": "1005'55169", "last_scrub_stamp": "2016-07-19 14:31:45.790871", "last_deep_scrub": "1005'55169", "last_deep_scrub_stamp": "2016-07-19 14:31:45.790871", "last_clean_scrub_stamp": "2016-07-19 14:31:45.790871", "log_size": 3067, "ondisk_log_size": 3067, "stats_invalid": false, "dirty_stats_invalid": false, "omap_stats_invalid": false, "hitset_stats_invalid": false, "hitset_bytes_stats_invalid": false, "pin_stats_invalid": true, "stat_sum": { "num_bytes": 0, "num_objects": 230, "num_object_clones": 0, "num_object_copies": 693, "num_objects_missing_on_primary": 0, "num_objects_missing": 0, "num_objects_degraded": 0, "num_objects_misplaced": 0, "num_objects_unfound": 0, "num_objects_dirty": 230, "num_whiteouts": 0, "num_read": 12454, "num_read_kb": 217518, "num_write": 55524, "num_write_kb": 228743, "num_scrub_errors": 0, "num_shallow_scrub_errors": 0, "num_deep_scrub_errors": 0, "num_objects_recovered": 0, "num_bytes_recovered": 0, "num_keys_recovered": 0, "num_objects_omap": 230, "num_objects_hit_set_archive": 0, "num_bytes_hit_set_archive": 0, "num_flush": 0, "num_flush_kb": 0, "num_evict": 0, "num_evict_kb": 0, "num_promote": 0, "num_flush_mode_high": 0, "num_flush_mode_low": 0, "num_evict_mode_some": 0, "num_evict_mode_full": 0, "num_objects_pinned": 0 }, "up": [ 44, 60, 26 ], "acting": [ 44, 7, 59 ], "blocked_by": [], "up_primary": 44, "acting_primary": 44 }, "empty": 0, "dne": 0, "incomplete": 0, "last_epoch_started": 1016, "hit_set_history": { "current_last_update": "0'0", "history": [] } }, { "peer": "60", "pgid": "5.306", "last_update": "1005'55174", "last_complete": "1005'55174", "log_tail": "1005'52174", "last_user_version": 0, "last_backfill": "MIN", "last_backfill_bitwise": 1, "purged_snaps": "[]", "history": { "epoch_created": 339, "last_epoch_started": 1016, "last_epoch_clean": 996, "last_epoch_split": 0, "last_epoch_marked_full": 0, "same_up_since": 1015, "same_interval_since": 1015, "same_primary_since": 928, "last_scrub": "1005'55169", "last_scrub_stamp": "2016-07-19 14:31:45.790871", "last_deep_scrub": "1005'55169", "last_deep_scrub_stamp": "2016-07-19 14:31:45.790871", "last_clean_scrub_stamp": "2016-07-19 14:31:45.790871" }, "stats": { "version": "0'0", "reported_seq": "0", "reported_epoch": "0", "state": "inactive", "last_fresh": "0.000000", "last_change": "0.000000", "last_active": "0.000000", "last_peered": "0.000000", "last_clean": "0.000000", "last_became_active": "0.000000", "last_became_peered": "0.000000", "last_unstale": "0.000000", "last_undegraded": "0.000000", "last_fullsized": "0.000000", "mapping_epoch": 0, "log_start": "0'0", "ondisk_log_start": "0'0", "created": 0, "last_epoch_clean": 0, "parent": "0.0", "parent_split_bits": 0, "last_scrub": "0'0", "last_scrub_stamp": "0.000000", "last_deep_scrub": "0'0", "last_deep_scrub_stamp": "0.000000", "last_clean_scrub_stamp": "0.000000", "log_size": 0, "ondisk_log_size": 0, "stats_invalid": false, "dirty_stats_invalid": false, "omap_stats_invalid": false, "hitset_stats_invalid": false, "hitset_bytes_stats_invalid": false, "pin_stats_invalid": false, "stat_sum": { "num_bytes": 0, "num_objects": 0, "num_object_clones": 0, "num_object_copies": 0, "num_objects_missing_on_primary": 0, "num_objects_missing": 0, "num_objects_degraded": 0, "num_objects_misplaced": 0, "num_objects_unfound": 0, "num_objects_dirty": 0, "num_whiteouts": 0, "num_read": 0, "num_read_kb": 0, "num_write": 0, "num_write_kb": 0, "num_scrub_errors": 0, "num_shallow_scrub_errors": 0, "num_deep_scrub_errors": 0, "num_objects_recovered": 0, "num_bytes_recovered": 0, "num_keys_recovered": 0, "num_objects_omap": 0, "num_objects_hit_set_archive": 0, "num_bytes_hit_set_archive": 0, "num_flush": 0, "num_flush_kb": 0, "num_evict": 0, "num_evict_kb": 0, "num_promote": 0, "num_flush_mode_high": 0, "num_flush_mode_low": 0, "num_evict_mode_some": 0, "num_evict_mode_full": 0, "num_objects_pinned": 0 }, "up": [], "acting": [], "blocked_by": [], "up_primary": -1, "acting_primary": -1 }, "empty": 0, "dne": 0, "incomplete": 1, "last_epoch_started": 1016, "hit_set_history": { "current_last_update": "0'0", "history": [] } } ], "recovery_state": [ { "name": "Started\/Primary\/Active", "enter_time": "2016-07-20 00:36:21.091565", "might_have_unfound": [], "recovery_progress": { "backfill_targets": [ "26", "60" ], "waiting_on_backfill": [], "last_backfill_started": "MIN", "backfill_info": { "begin": "MIN", "end": "MIN", "objects": [] }, "peer_backfill_info": [], "backfills_in_flight": [], "recovering": [], "pg_backend": { "pull_from_peer": [], "pushing": [] } }, "scrub": { "scrubber.epoch_start": "995", "scrubber.active": 0, "scrubber.state": "INACTIVE", "scrubber.start": "MIN", "scrubber.end": "MIN", "scrubber.subset_last_update": "0'0", "scrubber.deep": false, "scrubber.seed": 0, "scrubber.waiting_on": 0, "scrubber.waiting_on_whom": [] } }, { "name": "Started", "enter_time": "2016-07-20 00:36:19.742892" } ], "agent_state": {} } -- Goncalo Borges Research Computing ARC Centre of Excellence for Particle Physics at the Terascale School of Physics A28 | University of Sydney, NSW 2006 T: +61 2 93511937 |
_______________________________________________ ceph-users mailing list ceph-users@xxxxxxxxxxxxxx http://lists.ceph.com/listinfo.cgi/ceph-users-ceph.com