On Tue, 29 Oct 2013 09:30:14 +0800 majianpeng <majianpeng@xxxxxxxxx> wrote: Nearly there!! Just a few more details. See below. > Signed-off-by: Jianpeng Ma <majianpeng@xxxxxxxxx> > --- > drivers/md/raid1.c | 122 +++++++++++++++++++++++++++++++++++++++++++++++------ > drivers/md/raid1.h | 14 ++++++ > 2 files changed, 124 insertions(+), 12 deletions(-) > > diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c > index b4a6dcd..5b311c0 100644 > --- a/drivers/md/raid1.c > +++ b/drivers/md/raid1.c > @@ -66,7 +66,8 @@ > */ > static int max_queued_requests = 1024; > > -static void allow_barrier(struct r1conf *conf); > +static void allow_barrier(struct r1conf *conf, sector_t start_next_window, > + sector_t bi_sector); > static void lower_barrier(struct r1conf *conf); > > static void * r1bio_pool_alloc(gfp_t gfp_flags, void *data) > @@ -227,6 +228,8 @@ static void call_bio_endio(struct r1bio *r1_bio) > struct bio *bio = r1_bio->master_bio; > int done; > struct r1conf *conf = r1_bio->mddev->private; > + sector_t start_next_window = r1_bio->start_next_window; > + sector_t bi_sector = bio->bi_sector; This should be r1_bio->sector, not bio->bi_sector. They are often the same but if multiple r1_bios are needed for some reason (e.g. bad blocks) they may not be. > > if (bio->bi_phys_segments) { > unsigned long flags; > @@ -234,6 +237,11 @@ static void call_bio_endio(struct r1bio *r1_bio) > bio->bi_phys_segments--; > done = (bio->bi_phys_segments == 0); > spin_unlock_irqrestore(&conf->device_lock, flags); > + /* > + * make_request() might be waiting for > + * bi_phys_segments to decrease > + */ > + wake_up(&conf->wait_barrier); > } else > done = 1; > > @@ -245,7 +253,7 @@ static void call_bio_endio(struct r1bio *r1_bio) > * Wake up any possible resync thread that waits for the device > * to go idle. > */ > - allow_barrier(conf); > + allow_barrier(conf, start_next_window, bi_sector); > } > } > > @@ -827,10 +835,18 @@ static void raise_barrier(struct r1conf *conf) > /* block any new IO from starting */ > conf->barrier++; > > - /* Now wait for all pending IO to complete */ > + /* For those conditions we must wait: > + * A:while the array is in frozen state > + * B:while barrier >= RESYNC_DEPTH, meaning resync reach > + * the max count which allowed. > + * C:next_resync + RESYNC_SECTORS > start_next_window, meaning > + * next resync will reach to window which normal bios are handling. > + */ > wait_event_lock_irq(conf->wait_barrier, > !conf->array_frozen && > - !conf->nr_pending && conf->barrier < RESYNC_DEPTH, > + conf->barrier < RESYNC_DEPTH && > + (conf->start_next_window >= > + conf->next_resync + RESYNC_SECTORS), > conf->resync_lock); You've removed the test on conf->nr_pending here, which I think is correct. It counts 'read' requests as well. Testing start_next_window serves the same purpose as it is increased whenever current_window_requests reaches zero. However you having modified as similar test on nr_pending in wait_barrier(). That worries me a bit. Should that be changed to a test on start_next_window to match the above change? > > spin_unlock_irq(&conf->resync_lock); > @@ -846,10 +862,33 @@ static void lower_barrier(struct r1conf *conf) > wake_up(&conf->wait_barrier); > } > > -static void wait_barrier(struct r1conf *conf) > +static bool need_to_wait_for_sync(struct r1conf *conf, struct bio *bio) > +{ > + bool wait = false; > + > + if (conf->array_frozen || !bio) > + wait = true; > + else if (conf->barrier && bio_data_dir(bio) == WRITE) { > + if (conf->next_resync < RESYNC_WINDOW_SECTORS) > + wait = true; > + else if ((conf->next_resync - RESYNC_WINDOW_SECTORS > + >= bio_end_sector(bio)) || > + (conf->next_resync + NEXT_NORMALIO_DISTANCE > + <= bio->bi_sector)) > + wait = false; > + else > + wait = true; > + } > + > + return wait; > +} > + > +static sector_t wait_barrier(struct r1conf *conf, struct bio *bio) > { > + sector_t sector = 0; > + > spin_lock_irq(&conf->resync_lock); > - if (conf->barrier) { > + if (need_to_wait_for_sync(conf, bio)) { > conf->nr_waiting++; > /* Wait for the barrier to drop. > * However if there are already pending > @@ -868,16 +907,57 @@ static void wait_barrier(struct r1conf *conf) > !bio_list_empty(current->bio_list))), > conf->resync_lock); > conf->nr_waiting--; > + } else if (bio_data_dir(bio) == WRITE) { > + if (conf->next_resync + NEXT_NORMALIO_DISTANCE > + <= bio->bi_sector) { > + if (conf->start_next_window == MaxSector) > + conf->start_next_window = > + conf->next_resync + > + NEXT_NORMALIO_DISTANCE; > + > + if ((conf->start_next_window + NEXT_NORMALIO_DISTANCE) > + <= bio->bi_sector) > + conf->next_window_requests++; > + else > + conf->current_window_requests++; > + } > + if (bio->bi_sector >= conf->start_next_window) > + sector = conf->start_next_window; You aren't setting 'sector' if we needed to wait. I don't think that is correct, is it? > } > + > conf->nr_pending++; > spin_unlock_irq(&conf->resync_lock); > + return sector; > } > > -static void allow_barrier(struct r1conf *conf) > +static void allow_barrier(struct r1conf *conf, sector_t start_next_window, > + sector_t bi_sector) > { > unsigned long flags; > + > spin_lock_irqsave(&conf->resync_lock, flags); > conf->nr_pending--; > + if (start_next_window) { > + if (start_next_window == conf->start_next_window) { > + if (conf->start_next_window + NEXT_NORMALIO_DISTANCE > + <= bi_sector) > + conf->next_window_requests--; > + else > + conf->current_window_requests--; > + } else > + conf->current_window_requests--; > + > + if (!conf->current_window_requests) { > + if (conf->next_window_requests) { > + conf->current_window_requests = > + conf->next_window_requests; > + conf->next_window_requests = 0; > + conf->start_next_window += > + NEXT_NORMALIO_DISTANCE; > + } else > + conf->start_next_window = MaxSector; > + } > + } > spin_unlock_irqrestore(&conf->resync_lock, flags); > wake_up(&conf->wait_barrier); > } > @@ -1012,6 +1092,7 @@ static void make_request(struct mddev *mddev, struct bio * bio) > int first_clone; > int sectors_handled; > int max_sectors; > + sector_t start_next_window; > > /* > * Register the new request and wait if the reconstruction > @@ -1041,7 +1122,7 @@ static void make_request(struct mddev *mddev, struct bio * bio) > finish_wait(&conf->wait_barrier, &w); > } > > - wait_barrier(conf); > + start_next_window = wait_barrier(conf, bio); > > bitmap = mddev->bitmap; > > @@ -1162,6 +1243,7 @@ read_again: > > disks = conf->raid_disks * 2; > retry_write: > + r1_bio->start_next_window = start_next_window; > blocked_rdev = NULL; > rcu_read_lock(); > max_sectors = r1_bio->sectors; > @@ -1230,14 +1312,24 @@ read_again: > if (unlikely(blocked_rdev)) { > /* Wait for this device to become unblocked */ > int j; > + sector_t old = start_next_window; > > for (j = 0; j < i; j++) > if (r1_bio->bios[j]) > rdev_dec_pending(conf->mirrors[j].rdev, mddev); > r1_bio->state = 0; > - allow_barrier(conf); > + allow_barrier(conf, start_next_window, bio->bi_sector); I think this should be r1_bio->sector, not bio->bi_sector, for the same reason as earlier. > md_wait_for_blocked_rdev(blocked_rdev, mddev); > - wait_barrier(conf); > + start_next_window = wait_barrier(conf, bio); > + /* > + * We must make sure the multi r1bios of bio have > + * the same value of bi_phys_segments > + */ > + if (bio->bi_phys_segments && old && > + old != start_next_window) > + /*wait the former r1bio(s) completed*/ > + wait_event(conf->wait_barrier, > + bio->bi_phys_segments == 1); > goto retry_write; > } > > @@ -1437,11 +1529,14 @@ static void print_conf(struct r1conf *conf) > > static void close_sync(struct r1conf *conf) > { > - wait_barrier(conf); > - allow_barrier(conf); > + wait_barrier(conf, NULL); > + allow_barrier(conf, 0, 0); > > mempool_destroy(conf->r1buf_pool); > conf->r1buf_pool = NULL; > + > + conf->next_resync = 0; > + conf->start_next_window = MaxSector; > } > > static int raid1_spare_active(struct mddev *mddev) > @@ -2713,6 +2808,9 @@ static struct r1conf *setup_conf(struct mddev *mddev) > conf->pending_count = 0; > conf->recovery_disabled = mddev->recovery_disabled - 1; > > + conf->start_next_window = MaxSector; > + conf->current_window_requests = conf->next_window_requests = 0; > + > err = -EIO; > for (i = 0; i < conf->raid_disks * 2; i++) { > > diff --git a/drivers/md/raid1.h b/drivers/md/raid1.h > index 331a98a..07425a1 100644 > --- a/drivers/md/raid1.h > +++ b/drivers/md/raid1.h > @@ -41,6 +41,19 @@ struct r1conf { > */ > sector_t next_resync; > > + /*when raid1 start resync,we divide raid into four partitions > + * |---------|--------------|---------------------|-------------| > + * next_resync start_next_window Pc > + * Now start_next_window = next_resync + NEXT_NORMALIO_DISTANCE > + * Pc = start_next_window + NEXT_NORMALIO_DISTANCE > + * current_window_requests means the count of normalIO between > + * start_next_window and Pc. > + * next_window_requests means the count of nornalIO after Pc. > + * */ > + sector_t start_next_window; > + int current_window_requests; > + int next_window_requests; > + > spinlock_t device_lock; > > /* list of 'struct r1bio' that need to be processed by raid1d, > @@ -112,6 +125,7 @@ struct r1bio { > * in this BehindIO request > */ > sector_t sector; > + sector_t start_next_window; > int sectors; > unsigned long state; > struct mddev *mddev; Thanks, NeilBrown
Attachment:
signature.asc
Description: PGP signature