The STRIPE_OP_CHECK_* flags are used to trigger parities verification. STRIPE_OP_CHECK_PP - check P-parity; STRIPE_OP_CHECK_QP - check Q-parity. The result of the check operation is stored to zero_sum_result (for P-parity) and to zero_qsum_result (for Q-parity) fields of <sh>. Zero value corresponds to the correct parity, non-zerp - to non-correct. This patch also removes spare page for RAID-6 Q-parity check since it gone into async_pqxor() [this need for the synchronous CPU cases only; if the check operation is being performed by DMA - there is no need in spares]. Signed-off-by: Yuri Tikhonov <yur@xxxxxxxxxxx> Signed-off-by: Mikhail Cherkashin <mike@xxxxxxxxxxx> -- diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index f0f8d7f..9856a91 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -3327,62 +3327,129 @@ static void handle_parity_checks5(raid5_conf_t *conf, struct stripe_head *sh, static void handle_parity_checks6(raid5_conf_t *conf, struct stripe_head *sh, struct stripe_head_state *s, - struct r6_state *r6s, struct page *tmp_page, + struct r6_state *r6s, int disks) { - int update_p = 0, update_q = 0; struct stripe_queue *sq = sh->sq; - struct r5dev *dev; int pd_idx = sq->pd_idx; int qd_idx = r6s->qd_idx; set_bit(STRIPE_HANDLE, &sh->state); BUG_ON(s->failed > 2); - BUG_ON(s->uptodate < disks); + /* Want to check and possibly repair P and Q. * However there could be one 'failed' device, in which * case we can only check one of them, possibly using the * other to generate missing data */ - - /* If !tmp_page, we cannot do the calculations, - * but as we have set STRIPE_HANDLE, we will soon be called - * by stripe_handle with a tmp_page - just wait until then. - */ - if (tmp_page) { - if (s->failed == r6s->q_failed) { - /* The only possible failed device holds 'Q', so it - * makes sense to check P (If anything else were failed, - * we would have used P to recreate it). - */ - compute_block_1(sh, pd_idx, 1); - if (!page_is_zero(sh->dev[pd_idx].page)) { - compute_block_1(sh, pd_idx, 0); - update_p = 1; + if (s->failed <= 1 && !test_bit(STRIPE_OP_MOD_REPAIR_PD, + &sh->ops.pending)) { + /* If one or no disks failed */ + if (!test_and_set_bit(STRIPE_OP_CHECK, &sh->ops.pending)) { + /* Run check operation */ + pr_debug("run check with uptodate = %d of %d\n", + s->uptodate, disks); + BUG_ON(s->uptodate != disks); + if ( s->failed == r6s->q_failed ) { + /* no or only q-disk failed - check p */ + clear_bit(R5_UPTODATE, &sh->dev[pd_idx].flags); + set_bit(STRIPE_OP_CHECK_PP, &sh->ops.pending); + s->uptodate--; } - } - if (!r6s->q_failed && s->failed < 2) { - /* q is not failed, and we didn't use it to generate - * anything, so it makes sense to check it - */ - memcpy(page_address(tmp_page), - page_address(sh->dev[qd_idx].page), - STRIPE_SIZE); - compute_parity6(sh, UPDATE_PARITY); - if (memcmp(page_address(tmp_page), - page_address(sh->dev[qd_idx].page), - STRIPE_SIZE) != 0) { - clear_bit(STRIPE_INSYNC, &sh->state); - update_q = 1; + if ( !r6s->q_failed ) { + /* Q-disk is OK - then check Q-parity also */ + clear_bit(R5_UPTODATE, &sh->dev[qd_idx].flags); + set_bit(STRIPE_OP_CHECK_QP, &sh->ops.pending); + s->uptodate--; + } + sh->ops.count++; + } else if (test_and_clear_bit(STRIPE_OP_CHECK, + &sh->ops.complete)) { + /* Check operation has been completed */ + clear_bit(STRIPE_OP_CHECK, &sh->ops.ack); + clear_bit(STRIPE_OP_CHECK, &sh->ops.pending); + /* See what we've got */ + if (test_and_clear_bit(STRIPE_OP_CHECK_PP, + &sh->ops.pending) && sh->ops.zero_sum_result != 0) { + /* P-parity is wrong */ + set_bit(STRIPE_OP_UPDATE_PP, &sh->ops.pending); + } + if (test_and_clear_bit(STRIPE_OP_CHECK_QP, &sh-> + ops.pending) && sh->ops.zero_qsum_result != 0) { + /* Q-parity is wrong */ + set_bit(STRIPE_OP_UPDATE_QP, &sh->ops.pending); + } + if (!test_bit(STRIPE_OP_UPDATE_PP, &sh->ops.pending) && + !test_bit(STRIPE_OP_UPDATE_QP, &sh->ops.pending)) { + /* Both parities are correct */ + set_bit(STRIPE_INSYNC, &sh->state); + } else { + /* One or both parities are wrong */ + conf->mddev->resync_mismatches += + STRIPE_SECTORS; + if (test_bit(MD_RECOVERY_CHECK, + &conf->mddev->recovery)) { + /* Don't try to repair */ + clear_bit(STRIPE_OP_UPDATE_PP, + &sh->ops.pending); + clear_bit(STRIPE_OP_UPDATE_QP, + &sh->ops.pending); + set_bit(STRIPE_INSYNC, &sh->state); + } else { + /* + * One or both parities have to be + * updated + */ + pr_debug("Computing ... "); + BUG_ON(test_and_set_bit( + STRIPE_OP_COMPUTE_BLK, + &sh->ops.pending)); + set_bit(STRIPE_OP_MOD_REPAIR_PD, + &sh->ops.pending); + sh->ops.count++; + if (test_bit(STRIPE_OP_UPDATE_PP, + &sh->ops.pending)) { + pr_debug("P "); + BUG_ON(test_and_set_bit( + R5_Wantcompute, + &sh->dev[pd_idx].flags)); + sh->ops.target = pd_idx; + s->uptodate++; + } else + sh->ops.target = -1; + if (test_bit(STRIPE_OP_UPDATE_QP, + &sh->ops.pending)) { + pr_debug("Q "); + BUG_ON(test_and_set_bit( + R5_Wantcompute, + &sh->dev[qd_idx].flags)); + sh->ops.target2 = qd_idx; + s->uptodate++; + } else + sh->ops.target2 = -1; + pr_debug("disk(s)\n"); + } } } - if (update_p || update_q) { - conf->mddev->resync_mismatches += STRIPE_SECTORS; - if (test_bit(MD_RECOVERY_CHECK, &conf->mddev->recovery)) - /* don't try to repair!! */ - update_p = update_q = 0; - } + } + + /* check if we can clear a parity disk reconstruct */ + if (test_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.complete) && + test_bit(STRIPE_OP_MOD_REPAIR_PD, &sh->ops.pending)) { + clear_bit(STRIPE_OP_MOD_REPAIR_PD, &sh->ops.pending); + clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.complete); + clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.ack); + clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending); + } + + /* Wait for check parity and compute block operations to complete + * before write-back + */ + if (!test_bit(STRIPE_INSYNC, &sh->state) && + !test_bit(STRIPE_OP_CHECK, &sh->ops.pending) && + !test_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending)) { + struct r5dev *dev; /* now write out any block on a failed drive, * or P or Q if they need it @@ -3393,25 +3460,29 @@ static void handle_parity_checks6(raid5_conf_t *conf, struct stripe_head *sh, s->locked++; set_bit(R5_LOCKED, &dev->flags); set_bit(R5_Wantwrite, &dev->flags); + BUG_ON(!test_bit(R5_UPTODATE, &dev->flags)); } if (s->failed >= 1) { dev = &sh->dev[r6s->failed_num[0]]; s->locked++; set_bit(R5_LOCKED, &dev->flags); set_bit(R5_Wantwrite, &dev->flags); + BUG_ON(!test_bit(R5_UPTODATE, &dev->flags)); } - if (update_p) { + if (test_and_clear_bit(STRIPE_OP_UPDATE_PP, &sh->ops.pending)) { dev = &sh->dev[pd_idx]; s->locked++; set_bit(R5_LOCKED, &dev->flags); set_bit(R5_Wantwrite, &dev->flags); + BUG_ON(!test_bit(R5_UPTODATE, &dev->flags)); } - if (update_q) { + if (test_and_clear_bit(STRIPE_OP_UPDATE_QP, &sh->ops.pending)) { dev = &sh->dev[qd_idx]; s->locked++; set_bit(R5_LOCKED, &dev->flags); set_bit(R5_Wantwrite, &dev->flags); + BUG_ON(!test_bit(R5_UPTODATE, &dev->flags)); } clear_bit(STRIPE_DEGRADED, &sh->state); @@ -3757,7 +3828,7 @@ static void handle_stripe5(struct stripe_head *sh) } -static void handle_stripe6(struct stripe_head *sh, struct page *tmp_page) +static void handle_stripe6(struct stripe_head *sh) { struct stripe_queue *sq = sh->sq; raid6_conf_t *conf = sq->raid_conf; @@ -3918,12 +3989,18 @@ static void handle_stripe6(struct stripe_head *sh, struct page *tmp_page) !test_bit(STRIPE_OP_CHECK, &sh->ops.pending)) handle_issuing_new_write_requests6(conf, sh, &s, &r6s, disks); - /* maybe we need to check and possibly fix the parity for this stripe - * Any reads will already have been scheduled, so we just see if enough - * data is available + /* 1/ Maybe we need to check and possibly fix the parity for this stripe + * Any reads will already have been scheduled, so we just see + * if enough data is available + * 2/ Hold off parity checks while parity dependent operations are + * in flight (conflicting writes are protected by the 'locked' variable) */ - if (s.syncing && s.locked == 0 && !test_bit(STRIPE_INSYNC, &sh->state)) - handle_parity_checks6(conf, sh, &s, &r6s, tmp_page, disks); + if ((s.syncing && s.locked == 0 && + !test_bit(STRIPE_OP_COMPUTE_BLK,&sh->ops.pending) && + !test_bit(STRIPE_INSYNC, &sh->state)) || + test_bit(STRIPE_OP_CHECK, &sh->ops.pending) || + test_bit(STRIPE_OP_MOD_REPAIR_PD, &sh->ops.pending)) + handle_parity_checks6(conf, sh, &s, &r6s, disks); if (s.syncing && s.locked == 0 && test_bit(STRIPE_INSYNC, &sh->state)) { md_done_sync(conf->mddev, STRIPE_SECTORS,1); @@ -4044,10 +4121,10 @@ static void handle_stripe6(struct stripe_head *sh, struct page *tmp_page) } } -static void handle_stripe(struct stripe_head *sh, struct page *tmp_page) +static void handle_stripe(struct stripe_head *sh) { if (sh->sq->raid_conf->level == 6) - handle_stripe6(sh, tmp_page); + handle_stripe6(sh); else handle_stripe5(sh); } @@ -4068,7 +4145,7 @@ static void handle_queue(struct stripe_queue *sq, int disks, int data_disks) (to_write && test_bit(STRIPE_QUEUE_PREREAD_ACTIVE, &sq->state))) { struct stripe_head *sh = get_active_stripe(sq, disks, 1); if (sh) { - handle_stripe(sh, NULL); + handle_stripe(sh); release_stripe(sh); } } @@ -4747,7 +4824,7 @@ static inline sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *ski clear_bit(STRIPE_INSYNC, &sh->state); spin_unlock(&sq->lock); - handle_stripe(sh, NULL); + handle_stripe(sh); release_stripe(sh); release_queue(sq); @@ -4942,7 +5019,7 @@ static void raid5d (mddev_t *mddev) spin_unlock_irq(&conf->device_lock); handled++; - handle_stripe(sh, conf->spare_page); + handle_stripe(sh); release_stripe(sh); spin_lock_irq(&conf->device_lock); @@ -5140,12 +5217,6 @@ static int run(mddev_t *mddev) if ((conf->stripe_hashtbl = kzalloc(PAGE_SIZE, GFP_KERNEL)) == NULL) goto abort; - if (mddev->level == 6) { - conf->spare_page = alloc_page(GFP_KERNEL); - if (!conf->spare_page) - goto abort; - } - sprintf(conf->workqueue_name, "%s_cache_arb", mddev->gendisk->disk_name); conf->workqueue = create_singlethread_workqueue(conf->workqueue_name); @@ -5326,7 +5397,6 @@ abort: print_raid5_conf(conf); if (conf->workqueue) destroy_workqueue(conf->workqueue); - safe_put_page(conf->spare_page); kfree(conf->disks); kfree(conf->stripe_hashtbl); kfree(conf); diff --git a/include/linux/raid/raid5.h b/include/linux/raid/raid5.h index 8bffac5..c84bfbd 100644 --- a/include/linux/raid/raid5.h +++ b/include/linux/raid/raid5.h @@ -299,6 +299,8 @@ struct stripe_queue { #define STRIPE_OP_CHECK_PP 9 #define STRIPE_OP_CHECK_QP 10 +#define STRIPE_OP_UPDATE_PP 11 +#define STRIPE_OP_UPDATE_QP 12 /* * Stripe-queue state @@ -390,8 +392,6 @@ struct raid5_private_data { * Cleared when a sync completes. */ - struct page *spare_page; /* Used when checking P/Q in raid6 */ - /* * Free queue pool */ -- Yuri Tikhonov, Senior Software Engineer Emcraft Systems, www.emcraft.com - To unsubscribe from this list: send the line "unsubscribe linux-raid" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html