On Wed, 7 Dec 2011 15:37:30 -0800 Yucong Sun (叶雨飞) <sunyucong@xxxxxxxxx> wrote: > Neil, I can't compile latest MD against 2.6.32, and that commit can't > be patched into 2.6.32 directly either, can you help me on this? > This should do it. NeilBrown commit ef54b7cf955dc3b7d33248e8591b1a00b4fa998c Author: NeilBrown <neilb@xxxxxxx> Date: Tue Oct 11 16:50:01 2011 +1100 md: add proper write-congestion reporting to RAID1 and RAID10. RAID1 and RAID10 handle write requests by queuing them for handling by a separate thread. This is because when a write-intent-bitmap is active we might need to update the bitmap first, so it is good to queue a lot of writes, then do one big bitmap update for them all. However writeback request devices to appear to be congested after a while so it can make some guesstimate of throughput. The infinite queue defeats that (note that RAID5 has already has a finite queue so it doesn't suffer from this problem). So impose a limit on the number of pending write requests. By default it is 1024 which seems to be generally suitable. Make it configurable via module option just in case someone finds a regression. Signed-off-by: NeilBrown <neilb@xxxxxxx> diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index e07ce2e..fe7ae3c 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -50,6 +50,11 @@ */ #define NR_RAID1_BIOS 256 +/* When there are this many requests queue to be written by + * the raid1 thread, we become 'congested' to provide back-pressure + * for writeback. + */ +static int max_queued_requests = 1024; static void unplug_slaves(mddev_t *mddev); @@ -576,7 +581,8 @@ static int raid1_congested(void *data, int bits) conf_t *conf = mddev->private; int i, ret = 0; - if (mddev_congested(mddev, bits)) + if (mddev_congested(mddev, bits) && + conf->pending_count >= max_queued_requests) return 1; rcu_read_lock(); @@ -613,10 +619,12 @@ static int flush_pending_writes(conf_t *conf) struct bio *bio; bio = bio_list_get(&conf->pending_bio_list); blk_remove_plug(conf->mddev->queue); + conf->pending_count = 0; spin_unlock_irq(&conf->device_lock); /* flush any pending bitmap writes to * disk before proceeding w/ I/O */ bitmap_unplug(conf->mddev->bitmap); + wake_up(&conf->wait_barrier); while (bio) { /* submit pending writes */ struct bio *next = bio->bi_next; @@ -789,6 +797,7 @@ static int make_request(struct request_queue *q, struct bio * bio) int cpu; bool do_barriers; mdk_rdev_t *blocked_rdev; + int cnt = 0; /* * Register the new request and wait if the reconstruction @@ -864,6 +873,11 @@ static int make_request(struct request_queue *q, struct bio * bio) /* * WRITE: */ + if (conf->pending_count >= max_queued_requests) { + md_wakeup_thread(mddev->thread); + wait_event(conf->wait_barrier, + conf->pending_count < max_queued_requests); + } /* first select target devices under spinlock and * inc refcount on their rdev. Record them by setting * bios[x] to bio @@ -970,6 +984,7 @@ static int make_request(struct request_queue *q, struct bio * bio) atomic_inc(&r1_bio->remaining); bio_list_add(&bl, mbio); + cnt++; } kfree(behind_pages); /* the behind pages are attached to the bios now */ @@ -978,6 +993,7 @@ static int make_request(struct request_queue *q, struct bio * bio) spin_lock_irqsave(&conf->device_lock, flags); bio_list_merge(&conf->pending_bio_list, &bl); bio_list_init(&bl); + conf->pending_count += cnt; blk_plug_device(mddev->queue); spin_unlock_irqrestore(&conf->device_lock, flags); @@ -2021,7 +2037,7 @@ static int run(mddev_t *mddev) bio_list_init(&conf->pending_bio_list); bio_list_init(&conf->flushing_bio_list); - + conf->pending_count = 0; mddev->degraded = 0; for (i = 0; i < conf->raid_disks; i++) { @@ -2317,3 +2333,5 @@ MODULE_LICENSE("GPL"); MODULE_ALIAS("md-personality-3"); /* RAID1 */ MODULE_ALIAS("md-raid1"); MODULE_ALIAS("md-level-1"); + +module_param(max_queued_requests, int, S_IRUGO|S_IWUSR); diff --git a/drivers/md/raid1.h b/drivers/md/raid1.h index e87b84d..520288c 100644 --- a/drivers/md/raid1.h +++ b/drivers/md/raid1.h @@ -38,6 +38,7 @@ struct r1_private_data_s { /* queue of writes that have been unplugged */ struct bio_list flushing_bio_list; + int pending_count; /* for use when syncing mirrors: */ spinlock_t resync_lock; diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index c2cb7b8..4c7d9b5 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -59,6 +59,11 @@ static void unplug_slaves(mddev_t *mddev); static void allow_barrier(conf_t *conf); static void lower_barrier(conf_t *conf); +/* When there are this many requests queue to be written by + * the raid10 thread, we become 'congested' to provide back-pressure + * for writeback. + */ +static int max_queued_requests = 1024; static void * r10bio_pool_alloc(gfp_t gfp_flags, void *data) { @@ -631,6 +636,10 @@ static int raid10_congested(void *data, int bits) conf_t *conf = mddev->private; int i, ret = 0; + if ((bits & (1 << BDI_async_congested)) && + conf->pending_count >= max_queued_requests) + return 1; + if (mddev_congested(mddev, bits)) return 1; rcu_read_lock(); @@ -660,10 +669,12 @@ static int flush_pending_writes(conf_t *conf) struct bio *bio; bio = bio_list_get(&conf->pending_bio_list); blk_remove_plug(conf->mddev->queue); + conf->pending_count = 0; spin_unlock_irq(&conf->device_lock); /* flush any pending bitmap writes to disk * before proceeding w/ I/O */ bitmap_unplug(conf->mddev->bitmap); + wake_up(&conf->wait_barrier); while (bio) { /* submit pending writes */ struct bio *next = bio->bi_next; @@ -802,6 +813,7 @@ static int make_request(struct request_queue *q, struct bio * bio) struct bio_list bl; unsigned long flags; mdk_rdev_t *blocked_rdev; + int cnt = 0; if (unlikely(bio_rw_flagged(bio, BIO_RW_BARRIER))) { bio_endio(bio, -EOPNOTSUPP); @@ -894,6 +906,11 @@ static int make_request(struct request_queue *q, struct bio * bio) /* * WRITE: */ + if (conf->pending_count >= max_queued_requests) { + md_wakeup_thread(mddev->thread); + wait_event(conf->wait_barrier, + conf->pending_count < max_queued_requests); + } /* first select target devices under rcu_lock and * inc refcount on their rdev. Record them by setting * bios[x] to bio @@ -957,6 +974,7 @@ static int make_request(struct request_queue *q, struct bio * bio) atomic_inc(&r10_bio->remaining); bio_list_add(&bl, mbio); + cnt++ } if (unlikely(!atomic_read(&r10_bio->remaining))) { @@ -970,6 +988,7 @@ static int make_request(struct request_queue *q, struct bio * bio) spin_lock_irqsave(&conf->device_lock, flags); bio_list_merge(&conf->pending_bio_list, &bl); blk_plug_device(mddev->queue); + conf->pending_count += cnt; spin_unlock_irqrestore(&conf->device_lock, flags); /* In case raid10d snuck in to freeze_array */ @@ -2318,3 +2337,5 @@ MODULE_LICENSE("GPL"); MODULE_ALIAS("md-personality-9"); /* RAID10 */ MODULE_ALIAS("md-raid10"); MODULE_ALIAS("md-level-10"); + +module_param(max_queued_requests, int, S_IRUGO|S_IWUSR); diff --git a/drivers/md/raid10.h b/drivers/md/raid10.h index 59cd1ef..e6e1613 100644 --- a/drivers/md/raid10.h +++ b/drivers/md/raid10.h @@ -39,7 +39,7 @@ struct r10_private_data_s { struct list_head retry_list; /* queue pending writes and submit them on unplug */ struct bio_list pending_bio_list; - + int pending_count; spinlock_t resync_lock; int nr_pending;
Attachment:
signature.asc
Description: PGP signature