>On Tue, 29 Oct 2013 09:30:14 +0800 majianpeng <majianpeng@xxxxxxxxx> wrote: > > >Nearly there!! Just a few more details. See below. > > >> Signed-off-by: Jianpeng Ma <majianpeng@xxxxxxxxx> >> --- >> drivers/md/raid1.c | 122 +++++++++++++++++++++++++++++++++++++++++++++++------ >> drivers/md/raid1.h | 14 ++++++ >> 2 files changed, 124 insertions(+), 12 deletions(-) >> >> diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c >> index b4a6dcd..5b311c0 100644 >> --- a/drivers/md/raid1.c >> +++ b/drivers/md/raid1.c >> @@ -66,7 +66,8 @@ >> */ >> static int max_queued_requests = 1024; >> >> -static void allow_barrier(struct r1conf *conf); >> +static void allow_barrier(struct r1conf *conf, sector_t start_next_window, >> + sector_t bi_sector); >> static void lower_barrier(struct r1conf *conf); >> >> static void * r1bio_pool_alloc(gfp_t gfp_flags, void *data) >> @@ -227,6 +228,8 @@ static void call_bio_endio(struct r1bio *r1_bio) >> struct bio *bio = r1_bio->master_bio; >> int done; >> struct r1conf *conf = r1_bio->mddev->private; >> + sector_t start_next_window = r1_bio->start_next_window; >> + sector_t bi_sector = bio->bi_sector; > >This should be r1_bio->sector, not bio->bi_sector. >They are often the same but if multiple r1_bios are needed for some reason >(e.g. bad blocks) they may not be. > No, allow_barrier() only for bio not for r1bio.It only do when bio->bi_phys_segments == 0. If this value is r1_bio->sector, the value may has different value as you said. So in allow_barrier(): if (start_next_window) { if (start_next_window == conf->start_next_window) { if (conf->start_next_window + NEXT_NORMALIO_DISTANCE <= bi_sector) conf->next_window_requests--; else conf->current_window_requests--; } else It will has differnt result. >> >> if (bio->bi_phys_segments) { >> unsigned long flags; >> @@ -234,6 +237,11 @@ static void call_bio_endio(struct r1bio *r1_bio) >> bio->bi_phys_segments--; >> done = (bio->bi_phys_segments == 0); >> spin_unlock_irqrestore(&conf->device_lock, flags); >> + /* >> + * make_request() might be waiting for >> + * bi_phys_segments to decrease >> + */ >> + wake_up(&conf->wait_barrier); >> } else >> done = 1; >> >> @@ -245,7 +253,7 @@ static void call_bio_endio(struct r1bio *r1_bio) >> * Wake up any possible resync thread that waits for the device >> * to go idle. >> */ >> - allow_barrier(conf); >> + allow_barrier(conf, start_next_window, bi_sector); >> } >> } >> >> @@ -827,10 +835,18 @@ static void raise_barrier(struct r1conf *conf) >> /* block any new IO from starting */ >> conf->barrier++; >> >> - /* Now wait for all pending IO to complete */ >> + /* For those conditions we must wait: >> + * A:while the array is in frozen state >> + * B:while barrier >= RESYNC_DEPTH, meaning resync reach >> + * the max count which allowed. >> + * C:next_resync + RESYNC_SECTORS > start_next_window, meaning >> + * next resync will reach to window which normal bios are handling. >> + */ >> wait_event_lock_irq(conf->wait_barrier, >> !conf->array_frozen && >> - !conf->nr_pending && conf->barrier < RESYNC_DEPTH, >> + conf->barrier < RESYNC_DEPTH && >> + (conf->start_next_window >= >> + conf->next_resync + RESYNC_SECTORS), >> conf->resync_lock); > >You've removed the test on conf->nr_pending here, which I think is correct. >It counts 'read' requests as well. Testing start_next_window serves the same >purpose as it is increased whenever current_window_requests reaches zero. > In func wait_barrier(): if (need_to_wait_for_sync(conf, bio)) { [snip] } else if (bio_data_dir(bio) == WRITE) { if (conf->next_resync + NEXT_NORMALIO_DISTANCE <= bio->bi_sector) { if (conf->start_next_window == MaxSector) conf->start_next_window = conf->next_resync + NEXT_NORMALIO_DISTANCE; if ((conf->start_next_window + NEXT_NORMALIO_DISTANCE) <= bio->bi_sector) conf->next_window_requests++; else conf->current_window_requests++; } if (bio->bi_sector >= conf->start_next_window) sector = conf->start_next_window; } start_next_window only for bio_data_dir(bio) == WRITE. So resync can't wait read io to complete. >However you having modified as similar test on nr_pending in wait_barrier(). >That worries me a bit. Should that be changed to a test on start_next_window >to match the above change? For this condition, i only copy it.For this condition, i'm not know clearly. Can you give me more details about this? > > >> >> spin_unlock_irq(&conf->resync_lock); >> @@ -846,10 +862,33 @@ static void lower_barrier(struct r1conf *conf) >> wake_up(&conf->wait_barrier); >> } >> >> -static void wait_barrier(struct r1conf *conf) >> +static bool need_to_wait_for_sync(struct r1conf *conf, struct bio *bio) >> +{ >> + bool wait = false; >> + >> + if (conf->array_frozen || !bio) >> + wait = true; >> + else if (conf->barrier && bio_data_dir(bio) == WRITE) { >> + if (conf->next_resync < RESYNC_WINDOW_SECTORS) >> + wait = true; >> + else if ((conf->next_resync - RESYNC_WINDOW_SECTORS >> + >= bio_end_sector(bio)) || >> + (conf->next_resync + NEXT_NORMALIO_DISTANCE >> + <= bio->bi_sector)) >> + wait = false; >> + else >> + wait = true; >> + } >> + >> + return wait; >> +} >> + >> +static sector_t wait_barrier(struct r1conf *conf, struct bio *bio) >> { >> + sector_t sector = 0; >> + >> spin_lock_irq(&conf->resync_lock); >> - if (conf->barrier) { >> + if (need_to_wait_for_sync(conf, bio)) { >> conf->nr_waiting++; >> /* Wait for the barrier to drop. >> * However if there are already pending >> @@ -868,16 +907,57 @@ static void wait_barrier(struct r1conf *conf) >> !bio_list_empty(current->bio_list))), >> conf->resync_lock); >> conf->nr_waiting--; >> + } else if (bio_data_dir(bio) == WRITE) { >> + if (conf->next_resync + NEXT_NORMALIO_DISTANCE >> + <= bio->bi_sector) { >> + if (conf->start_next_window == MaxSector) >> + conf->start_next_window = >> + conf->next_resync + >> + NEXT_NORMALIO_DISTANCE; >> + >> + if ((conf->start_next_window + NEXT_NORMALIO_DISTANCE) >> + <= bio->bi_sector) >> + conf->next_window_requests++; >> + else >> + conf->current_window_requests++; >> + } >> + if (bio->bi_sector >= conf->start_next_window) >> + sector = conf->start_next_window; > >You aren't setting 'sector' if we needed to wait. I don't think that is >correct, is it? > Yes.How about those code: spin_lock_irq(&conf->resync_lock); retry_check: if (need_to_wait_for_sync(conf, bio)) { conf->nr_waiting++; /* Wait for the barrier to drop. * However if there are already pending * requests (preventing the barrier from * rising completely), and the * pre-process bio queue isn't empty, * then don't wait, as we need to empty * that queue to get the nr_pending * count down. */ wait_event_lock_irq(conf->wait_barrier, !conf->array_frozen && (!conf->barrier || (conf->nr_pending && current->bio_list && !bio_list_empty(current->bio_list))), conf->resync_lock); conf->nr_waiting--; goto retry_check; > >> } >> + >> conf->nr_pending++; >> spin_unlock_irq(&conf->resync_lock); >> + return sector; >> } >> >> -static void allow_barrier(struct r1conf *conf) >> +static void allow_barrier(struct r1conf *conf, sector_t start_next_window, >> + sector_t bi_sector) >> { >> unsigned long flags; >> + >> spin_lock_irqsave(&conf->resync_lock, flags); >> conf->nr_pending--; >> + if (start_next_window) { >> + if (start_next_window == conf->start_next_window) { >> + if (conf->start_next_window + NEXT_NORMALIO_DISTANCE >> + <= bi_sector) >> + conf->next_window_requests--; >> + else >> + conf->current_window_requests--; >> + } else >> + conf->current_window_requests--; >> + >> + if (!conf->current_window_requests) { >> + if (conf->next_window_requests) { >> + conf->current_window_requests = >> + conf->next_window_requests; >> + conf->next_window_requests = 0; >> + conf->start_next_window += >> + NEXT_NORMALIO_DISTANCE; >> + } else >> + conf->start_next_window = MaxSector; >> + } >> + } >> spin_unlock_irqrestore(&conf->resync_lock, flags); >> wake_up(&conf->wait_barrier); >> } >> @@ -1012,6 +1092,7 @@ static void make_request(struct mddev *mddev, struct bio * bio) >> int first_clone; >> int sectors_handled; >> int max_sectors; >> + sector_t start_next_window; >> >> /* >> * Register the new request and wait if the reconstruction >> @@ -1041,7 +1122,7 @@ static void make_request(struct mddev *mddev, struct bio * bio) >> finish_wait(&conf->wait_barrier, &w); >> } >> >> - wait_barrier(conf); >> + start_next_window = wait_barrier(conf, bio); >> >> bitmap = mddev->bitmap; >> >> @@ -1162,6 +1243,7 @@ read_again: >> >> disks = conf->raid_disks * 2; >> retry_write: >> + r1_bio->start_next_window = start_next_window; >> blocked_rdev = NULL; >> rcu_read_lock(); >> max_sectors = r1_bio->sectors; >> @@ -1230,14 +1312,24 @@ read_again: >> if (unlikely(blocked_rdev)) { >> /* Wait for this device to become unblocked */ >> int j; >> + sector_t old = start_next_window; >> >> for (j = 0; j < i; j++) >> if (r1_bio->bios[j]) >> rdev_dec_pending(conf->mirrors[j].rdev, mddev); >> r1_bio->state = 0; >> - allow_barrier(conf); >> + allow_barrier(conf, start_next_window, bio->bi_sector); > >I think this should be r1_bio->sector, not bio->bi_sector, for the same >reason as earlier. > The exaplaination is the same at above. Thanks! Jianpeng Ma > >> md_wait_for_blocked_rdev(blocked_rdev, mddev); >> - wait_barrier(conf); >> + start_next_window = wait_barrier(conf, bio); >> + /* >> + * We must make sure the multi r1bios of bio have >> + * the same value of bi_phys_segments >> + */ >> + if (bio->bi_phys_segments && old && >> + old != start_next_window) >> + /*wait the former r1bio(s) completed*/ >> + wait_event(conf->wait_barrier, >> + bio->bi_phys_segments == 1); >> goto retry_write; >> } >> >> @@ -1437,11 +1529,14 @@ static void print_conf(struct r1conf *conf) >> >> static void close_sync(struct r1conf *conf) >> { >> - wait_barrier(conf); >> - allow_barrier(conf); >> + wait_barrier(conf, NULL); >> + allow_barrier(conf, 0, 0); >> >> mempool_destroy(conf->r1buf_pool); >> conf->r1buf_pool = NULL; >> + >> + conf->next_resync = 0; >> + conf->start_next_window = MaxSector; >> } >> >> static int raid1_spare_active(struct mddev *mddev) >> @@ -2713,6 +2808,9 @@ static struct r1conf *setup_conf(struct mddev *mddev) >> conf->pending_count = 0; >> conf->recovery_disabled = mddev->recovery_disabled - 1; >> >> + conf->start_next_window = MaxSector; >> + conf->current_window_requests = conf->next_window_requests = 0; >> + >> err = -EIO; >> for (i = 0; i < conf->raid_disks * 2; i++) { >> >> diff --git a/drivers/md/raid1.h b/drivers/md/raid1.h >> index 331a98a..07425a1 100644 >> --- a/drivers/md/raid1.h >> +++ b/drivers/md/raid1.h >> @@ -41,6 +41,19 @@ struct r1conf { >> */ >> sector_t next_resync; >> >> + /*when raid1 start resync,we divide raid into four partitions >> + * |---------|--------------|---------------------|-------------| >> + * next_resync start_next_window Pc >> + * Now start_next_window = next_resync + NEXT_NORMALIO_DISTANCE >> + * Pc = start_next_window + NEXT_NORMALIO_DISTANCE >> + * current_window_requests means the count of normalIO between >> + * start_next_window and Pc. >> + * next_window_requests means the count of nornalIO after Pc. >> + * */ >> + sector_t start_next_window; >> + int current_window_requests; >> + int next_window_requests; >> + >> spinlock_t device_lock; >> >> /* list of 'struct r1bio' that need to be processed by raid1d, >> @@ -112,6 +125,7 @@ struct r1bio { >> * in this BehindIO request >> */ >> sector_t sector; >> + sector_t start_next_window; >> int sectors; >> unsigned long state; >> struct mddev *mddev; > > >Thanks, >NeilBrown >?韬{.n?????%??檩??w?{.n???{炳盯w???塄}?财??j:+v??????2??璀??摺?囤??z夸z罐?+?????w棹f