On Sun, 2 May 2010 10:59:02 -0700 Dan Williams <dan.j.williams@xxxxxxxxx> wrote: > On Wed, Apr 14, 2010 at 11:21 PM, NeilBrown <neilb@xxxxxxx> wrote: > > Hi all, > > I thought it was time I posted my patch queue for review to be sure > > it would be ready for the next merge window. > > > > Apart from sundry bug fixes and minor improvements there are two big > > themes here > > 1/ enhancements to level conversion so e.g. we can now convert RAID0 > > to RAID5 or RAID10 (near-2 only) and back. > > 2/ general refactoring of bits of md code - some functions > > (e.g. do_md_stop) had become really big and were just a mess of > > stuff that all had to be done at much the same time. It is now > > broken into somewhat meaningful parts. There is a deeper reason > > for doing this refactoring .... you'll find out soon. :-) > > > > This is all available at > > git://neil.brown.name/md for-next > > and should be in linux-next in a day or two. > > > > > > All review, testing, and comments most welcome. > > > > A few fixes/enhancements while playing with the takeover code are available at: > > git://git.kernel.org/pub/scm/linux/kernel/git/djbw/md.git for-neil Thanks - they all make sense. I've merged them with my queue and push them to my for-next branch. NeilBrown > > Dan Williams (3): > md/raid4: permit raid0 takeover > md: notify mdstat waiters of level change > md: allow integers to be passed to md/level > > drivers/md/md.c | 25 +++++++++++++++---------- > drivers/md/raid5.c | 32 +++++++++++++++++--------------- > 2 files changed, 32 insertions(+), 25 deletions(-) > > I'd like to get "raid6: fix recovery performance regression" in for > 2.6.34. I pushed it out to the url below, let me know if you just > want me to send it directly. > > git://git.kernel.org/pub/scm/linux/kernel/git/djbw/md.git fixes > > Dan Williams (1): > raid6: fix recovery performance regression > > crypto/async_tx/async_raid6_recov.c | 21 +++++++++++++-------- > 1 files changed, 13 insertions(+), 8 deletions(-) > > Full diff of these 4 patches below (whitespace damaged): > > diff --git a/crypto/async_tx/async_raid6_recov.c > b/crypto/async_tx/async_raid6_recov.c > index 943f2ab..3df6746 100644 > --- a/crypto/async_tx/async_raid6_recov.c > +++ b/crypto/async_tx/async_raid6_recov.c > @@ -324,6 +324,7 @@ struct dma_async_tx_descriptor * > async_raid6_2data_recov(int disks, size_t bytes, int faila, int failb, > struct page **blocks, struct async_submit_ctl *submit) > { > + void *scribble = submit->scribble; > int non_zero_srcs, i; > > BUG_ON(faila == failb); > @@ -332,11 +333,13 @@ async_raid6_2data_recov(int disks, size_t bytes, > int faila, int failb, > > pr_debug("%s: disks: %d len: %zu\n", __func__, disks, bytes); > > - /* we need to preserve the contents of 'blocks' for the async > - * case, so punt to synchronous if a scribble buffer is not available > + /* if a dma resource is not available or a scribble buffer is not > + * available punt to the synchronous path. In the 'dma not > + * available' case be sure to use the scribble buffer to > + * preserve the content of 'blocks' as the caller intended. > */ > - if (!submit->scribble) { > - void **ptrs = (void **) blocks; > + if (async_dma_find_channel(DMA_PQ) == NULL || !scribble) { > + void **ptrs = scribble ? scribble : (void **) blocks; > > async_tx_quiesce(&submit->depend_tx); > for (i = 0; i < disks; i++) > @@ -406,11 +409,13 @@ async_raid6_datap_recov(int disks, size_t bytes, > int faila, > > pr_debug("%s: disks: %d len: %zu\n", __func__, disks, bytes); > > - /* we need to preserve the contents of 'blocks' for the async > - * case, so punt to synchronous if a scribble buffer is not available > + /* if a dma resource is not available or a scribble buffer is not > + * available punt to the synchronous path. In the 'dma not > + * available' case be sure to use the scribble buffer to > + * preserve the content of 'blocks' as the caller intended. > */ > - if (!scribble) { > - void **ptrs = (void **) blocks; > + if (async_dma_find_channel(DMA_PQ) == NULL || !scribble) { > + void **ptrs = scribble ? scribble : (void **) blocks; > > async_tx_quiesce(&submit->depend_tx); > for (i = 0; i < disks; i++) > diff --git a/drivers/md/md.c b/drivers/md/md.c > index f177de0..e3ec0fd 100644 > --- a/drivers/md/md.c > +++ b/drivers/md/md.c > @@ -2935,9 +2935,10 @@ level_show(mddev_t *mddev, char *page) > static ssize_t > level_store(mddev_t *mddev, const char *buf, size_t len) > { > - char level[16]; > + char clevel[16]; > ssize_t rv = len; > struct mdk_personality *pers; > + long level; > void *priv; > mdk_rdev_t *rdev; > > @@ -2970,19 +2971,22 @@ level_store(mddev_t *mddev, const char *buf, size_t len) > } > > /* Now find the new personality */ > - if (len == 0 || len >= sizeof(level)) > + if (len == 0 || len >= sizeof(clevel)) > return -EINVAL; > - strncpy(level, buf, len); > - if (level[len-1] == '\n') > + strncpy(clevel, buf, len); > + if (clevel[len-1] == '\n') > len--; > - level[len] = 0; > + clevel[len] = 0; > + if (strict_strtol(clevel, 10, &level)) > + level = LEVEL_NONE; > > - request_module("md-%s", level); > + if (request_module("md-%s", clevel) != 0) > + request_module("md-level-%s", clevel); > spin_lock(&pers_lock); > - pers = find_pers(LEVEL_NONE, level); > + pers = find_pers(level, clevel); > if (!pers || !try_module_get(pers->owner)) { > spin_unlock(&pers_lock); > - printk(KERN_WARNING "md: personality %s not loaded\n", level); > + printk(KERN_WARNING "md: personality %s not loaded\n", clevel); > return -EINVAL; > } > spin_unlock(&pers_lock); > @@ -2995,7 +2999,7 @@ level_store(mddev_t *mddev, const char *buf, size_t len) > if (!pers->takeover) { > module_put(pers->owner); > printk(KERN_WARNING "md: %s: %s does not support personality takeover\n", > - mdname(mddev), level); > + mdname(mddev), clevel); > return -EINVAL; > } > > @@ -3011,7 +3015,7 @@ level_store(mddev_t *mddev, const char *buf, size_t len) > mddev->delta_disks = 0; > module_put(pers->owner); > printk(KERN_WARNING "md: %s: %s would not accept array\n", > - mdname(mddev), level); > + mdname(mddev), clevel); > return PTR_ERR(priv); > } > > @@ -3075,6 +3079,7 @@ level_store(mddev_t *mddev, const char *buf, size_t len) > set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); > md_wakeup_thread(mddev->thread); > sysfs_notify(&mddev->kobj, NULL, "level"); > + md_new_event(mddev); > return rv; > } > > diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c > index 042651b..d09c263 100644 > --- a/drivers/md/raid5.c > +++ b/drivers/md/raid5.c > @@ -5607,10 +5607,17 @@ static void raid5_quiesce(mddev_t *mddev, int state) > } > > > -static void *raid5_takeover_raid0(mddev_t *mddev) > +static void *raid45_takeover_raid0(mddev_t *mddev, int level) > { > + struct raid0_private_data *raid0_priv = mddev->private; > > - mddev->new_level = 5; > + /* for raid0 takeover only one zone is supported */ > + if (raid0_priv->nr_strip_zones > 1) { > + printk(KERN_ERR "md: cannot takeover raid0 with more than one zone.\n"); > + return ERR_PTR(-EINVAL); > + } > + > + mddev->new_level = level; > mddev->new_layout = ALGORITHM_PARITY_N; > mddev->new_chunk_sectors = mddev->chunk_sectors; > mddev->raid_disks += 1; > @@ -5746,22 +5753,13 @@ static int raid6_check_reshape(mddev_t *mddev) > static void *raid5_takeover(mddev_t *mddev) > { > /* raid5 can take over: > - * raid0 - if all devices are the same - make it a raid4 layout > + * raid0 - if there is only one strip zone - make it a raid4 layout > * raid1 - if there are two drives. We need to know the chunk size > * raid4 - trivial - just use a raid4 layout. > * raid6 - Providing it is a *_6 layout > */ > - if (mddev->level == 0) { > - /* for raid0 takeover only one zone is supported */ > - struct raid0_private_data *raid0_priv > - = mddev->private; > - if (raid0_priv->nr_strip_zones > 1) { > - printk(KERN_ERR "md: cannot takeover raid 0 with more than one zone.\n"); > - return ERR_PTR(-EINVAL); > - } > - return raid5_takeover_raid0(mddev); > - } > - > + if (mddev->level == 0) > + return raid45_takeover_raid0(mddev, 5); > if (mddev->level == 1) > return raid5_takeover_raid1(mddev); > if (mddev->level == 4) { > @@ -5777,8 +5775,12 @@ static void *raid5_takeover(mddev_t *mddev) > > static void *raid4_takeover(mddev_t *mddev) > { > - /* raid4 can take over raid5 if layout is right. > + /* raid4 can take over: > + * raid0 - if there is only one strip zone > + * raid5 - if layout is right > */ > + if (mddev->level == 0) > + return raid45_takeover_raid0(mddev, 4); > if (mddev->level == 5 && > mddev->layout == ALGORITHM_PARITY_N) { > mddev->new_layout = 0; -- To unsubscribe from this list: send the line "unsubscribe linux-raid" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html