On Wed, Apr 14, 2010 at 11:21 PM, NeilBrown <neilb@xxxxxxx> wrote: > Hi all, > I thought it was time I posted my patch queue for review to be sure > it would be ready for the next merge window. > > Apart from sundry bug fixes and minor improvements there are two big > themes here > 1/ enhancements to level conversion so e.g. we can now convert RAID0 > to RAID5 or RAID10 (near-2 only) and back. > 2/ general refactoring of bits of md code - some functions > (e.g. do_md_stop) had become really big and were just a mess of > stuff that all had to be done at much the same time. It is now > broken into somewhat meaningful parts. There is a deeper reason > for doing this refactoring .... you'll find out soon. :-) > > This is all available at > git://neil.brown.name/md for-next > and should be in linux-next in a day or two. > > > All review, testing, and comments most welcome. > A few fixes/enhancements while playing with the takeover code are available at: git://git.kernel.org/pub/scm/linux/kernel/git/djbw/md.git for-neil Dan Williams (3): md/raid4: permit raid0 takeover md: notify mdstat waiters of level change md: allow integers to be passed to md/level drivers/md/md.c | 25 +++++++++++++++---------- drivers/md/raid5.c | 32 +++++++++++++++++--------------- 2 files changed, 32 insertions(+), 25 deletions(-) I'd like to get "raid6: fix recovery performance regression" in for 2.6.34. I pushed it out to the url below, let me know if you just want me to send it directly. git://git.kernel.org/pub/scm/linux/kernel/git/djbw/md.git fixes Dan Williams (1): raid6: fix recovery performance regression crypto/async_tx/async_raid6_recov.c | 21 +++++++++++++-------- 1 files changed, 13 insertions(+), 8 deletions(-) Full diff of these 4 patches below (whitespace damaged): diff --git a/crypto/async_tx/async_raid6_recov.c b/crypto/async_tx/async_raid6_recov.c index 943f2ab..3df6746 100644 --- a/crypto/async_tx/async_raid6_recov.c +++ b/crypto/async_tx/async_raid6_recov.c @@ -324,6 +324,7 @@ struct dma_async_tx_descriptor * async_raid6_2data_recov(int disks, size_t bytes, int faila, int failb, struct page **blocks, struct async_submit_ctl *submit) { + void *scribble = submit->scribble; int non_zero_srcs, i; BUG_ON(faila == failb); @@ -332,11 +333,13 @@ async_raid6_2data_recov(int disks, size_t bytes, int faila, int failb, pr_debug("%s: disks: %d len: %zu\n", __func__, disks, bytes); - /* we need to preserve the contents of 'blocks' for the async - * case, so punt to synchronous if a scribble buffer is not available + /* if a dma resource is not available or a scribble buffer is not + * available punt to the synchronous path. In the 'dma not + * available' case be sure to use the scribble buffer to + * preserve the content of 'blocks' as the caller intended. */ - if (!submit->scribble) { - void **ptrs = (void **) blocks; + if (async_dma_find_channel(DMA_PQ) == NULL || !scribble) { + void **ptrs = scribble ? scribble : (void **) blocks; async_tx_quiesce(&submit->depend_tx); for (i = 0; i < disks; i++) @@ -406,11 +409,13 @@ async_raid6_datap_recov(int disks, size_t bytes, int faila, pr_debug("%s: disks: %d len: %zu\n", __func__, disks, bytes); - /* we need to preserve the contents of 'blocks' for the async - * case, so punt to synchronous if a scribble buffer is not available + /* if a dma resource is not available or a scribble buffer is not + * available punt to the synchronous path. In the 'dma not + * available' case be sure to use the scribble buffer to + * preserve the content of 'blocks' as the caller intended. */ - if (!scribble) { - void **ptrs = (void **) blocks; + if (async_dma_find_channel(DMA_PQ) == NULL || !scribble) { + void **ptrs = scribble ? scribble : (void **) blocks; async_tx_quiesce(&submit->depend_tx); for (i = 0; i < disks; i++) diff --git a/drivers/md/md.c b/drivers/md/md.c index f177de0..e3ec0fd 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -2935,9 +2935,10 @@ level_show(mddev_t *mddev, char *page) static ssize_t level_store(mddev_t *mddev, const char *buf, size_t len) { - char level[16]; + char clevel[16]; ssize_t rv = len; struct mdk_personality *pers; + long level; void *priv; mdk_rdev_t *rdev; @@ -2970,19 +2971,22 @@ level_store(mddev_t *mddev, const char *buf, size_t len) } /* Now find the new personality */ - if (len == 0 || len >= sizeof(level)) + if (len == 0 || len >= sizeof(clevel)) return -EINVAL; - strncpy(level, buf, len); - if (level[len-1] == '\n') + strncpy(clevel, buf, len); + if (clevel[len-1] == '\n') len--; - level[len] = 0; + clevel[len] = 0; + if (strict_strtol(clevel, 10, &level)) + level = LEVEL_NONE; - request_module("md-%s", level); + if (request_module("md-%s", clevel) != 0) + request_module("md-level-%s", clevel); spin_lock(&pers_lock); - pers = find_pers(LEVEL_NONE, level); + pers = find_pers(level, clevel); if (!pers || !try_module_get(pers->owner)) { spin_unlock(&pers_lock); - printk(KERN_WARNING "md: personality %s not loaded\n", level); + printk(KERN_WARNING "md: personality %s not loaded\n", clevel); return -EINVAL; } spin_unlock(&pers_lock); @@ -2995,7 +2999,7 @@ level_store(mddev_t *mddev, const char *buf, size_t len) if (!pers->takeover) { module_put(pers->owner); printk(KERN_WARNING "md: %s: %s does not support personality takeover\n", - mdname(mddev), level); + mdname(mddev), clevel); return -EINVAL; } @@ -3011,7 +3015,7 @@ level_store(mddev_t *mddev, const char *buf, size_t len) mddev->delta_disks = 0; module_put(pers->owner); printk(KERN_WARNING "md: %s: %s would not accept array\n", - mdname(mddev), level); + mdname(mddev), clevel); return PTR_ERR(priv); } @@ -3075,6 +3079,7 @@ level_store(mddev_t *mddev, const char *buf, size_t len) set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); md_wakeup_thread(mddev->thread); sysfs_notify(&mddev->kobj, NULL, "level"); + md_new_event(mddev); return rv; } diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 042651b..d09c263 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -5607,10 +5607,17 @@ static void raid5_quiesce(mddev_t *mddev, int state) } -static void *raid5_takeover_raid0(mddev_t *mddev) +static void *raid45_takeover_raid0(mddev_t *mddev, int level) { + struct raid0_private_data *raid0_priv = mddev->private; - mddev->new_level = 5; + /* for raid0 takeover only one zone is supported */ + if (raid0_priv->nr_strip_zones > 1) { + printk(KERN_ERR "md: cannot takeover raid0 with more than one zone.\n"); + return ERR_PTR(-EINVAL); + } + + mddev->new_level = level; mddev->new_layout = ALGORITHM_PARITY_N; mddev->new_chunk_sectors = mddev->chunk_sectors; mddev->raid_disks += 1; @@ -5746,22 +5753,13 @@ static int raid6_check_reshape(mddev_t *mddev) static void *raid5_takeover(mddev_t *mddev) { /* raid5 can take over: - * raid0 - if all devices are the same - make it a raid4 layout + * raid0 - if there is only one strip zone - make it a raid4 layout * raid1 - if there are two drives. We need to know the chunk size * raid4 - trivial - just use a raid4 layout. * raid6 - Providing it is a *_6 layout */ - if (mddev->level == 0) { - /* for raid0 takeover only one zone is supported */ - struct raid0_private_data *raid0_priv - = mddev->private; - if (raid0_priv->nr_strip_zones > 1) { - printk(KERN_ERR "md: cannot takeover raid 0 with more than one zone.\n"); - return ERR_PTR(-EINVAL); - } - return raid5_takeover_raid0(mddev); - } - + if (mddev->level == 0) + return raid45_takeover_raid0(mddev, 5); if (mddev->level == 1) return raid5_takeover_raid1(mddev); if (mddev->level == 4) { @@ -5777,8 +5775,12 @@ static void *raid5_takeover(mddev_t *mddev) static void *raid4_takeover(mddev_t *mddev) { - /* raid4 can take over raid5 if layout is right. + /* raid4 can take over: + * raid0 - if there is only one strip zone + * raid5 - if layout is right */ + if (mddev->level == 0) + return raid45_takeover_raid0(mddev, 4); if (mddev->level == 5 && mddev->layout == ALGORITHM_PARITY_N) { mddev->new_layout = 0; -- To unsubscribe from this list: send the line "unsubscribe linux-raid" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html