Re: [md PATCH 00/28] md patches destined for -next and the next merge window

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Wed, Apr 14, 2010 at 11:21 PM, NeilBrown <neilb@xxxxxxx> wrote:
> Hi all,
>  I thought it was time I posted my patch queue for review to be sure
>  it would be ready for the next merge window.
>
>  Apart from sundry bug fixes and minor improvements there are two big
>  themes here
>  1/ enhancements to level conversion so e.g. we can now convert RAID0
>    to RAID5 or RAID10 (near-2 only) and back.
>  2/ general refactoring of bits of md code - some functions
>    (e.g. do_md_stop) had become really big and were just a mess of
>    stuff that all had to be done at much the same time.  It is now
>    broken into somewhat meaningful parts.  There is a deeper reason
>    for doing this refactoring .... you'll find out soon. :-)
>
>  This is all available at
>    git://neil.brown.name/md for-next
>  and should be in linux-next in a day or two.
>
>
>  All review, testing, and comments most welcome.
>

A few fixes/enhancements while playing with the takeover code are available at:

  git://git.kernel.org/pub/scm/linux/kernel/git/djbw/md.git for-neil

Dan Williams (3):
      md/raid4: permit raid0 takeover
      md: notify mdstat waiters of level change
      md: allow integers to be passed to md/level

 drivers/md/md.c    |   25 +++++++++++++++----------
 drivers/md/raid5.c |   32 +++++++++++++++++---------------
 2 files changed, 32 insertions(+), 25 deletions(-)

I'd like to get "raid6: fix recovery performance regression" in for
2.6.34.  I pushed it out to the url below, let me know if you just
want me to send it directly.

  git://git.kernel.org/pub/scm/linux/kernel/git/djbw/md.git fixes

Dan Williams (1):
      raid6: fix recovery performance regression

 crypto/async_tx/async_raid6_recov.c |   21 +++++++++++++--------
 1 files changed, 13 insertions(+), 8 deletions(-)

Full diff of these 4 patches below (whitespace damaged):

diff --git a/crypto/async_tx/async_raid6_recov.c
b/crypto/async_tx/async_raid6_recov.c
index 943f2ab..3df6746 100644
--- a/crypto/async_tx/async_raid6_recov.c
+++ b/crypto/async_tx/async_raid6_recov.c
@@ -324,6 +324,7 @@ struct dma_async_tx_descriptor *
 async_raid6_2data_recov(int disks, size_t bytes, int faila, int failb,
 			struct page **blocks, struct async_submit_ctl *submit)
 {
+	void *scribble = submit->scribble;
 	int non_zero_srcs, i;

 	BUG_ON(faila == failb);
@@ -332,11 +333,13 @@ async_raid6_2data_recov(int disks, size_t bytes,
int faila, int failb,

 	pr_debug("%s: disks: %d len: %zu\n", __func__, disks, bytes);

-	/* we need to preserve the contents of 'blocks' for the async
-	 * case, so punt to synchronous if a scribble buffer is not available
+	/* if a dma resource is not available or a scribble buffer is not
+	 * available punt to the synchronous path.  In the 'dma not
+	 * available' case be sure to use the scribble buffer to
+	 * preserve the content of 'blocks' as the caller intended.
 	 */
-	if (!submit->scribble) {
-		void **ptrs = (void **) blocks;
+	if (async_dma_find_channel(DMA_PQ) == NULL || !scribble) {
+		void **ptrs = scribble ? scribble : (void **) blocks;

 		async_tx_quiesce(&submit->depend_tx);
 		for (i = 0; i < disks; i++)
@@ -406,11 +409,13 @@ async_raid6_datap_recov(int disks, size_t bytes,
int faila,

 	pr_debug("%s: disks: %d len: %zu\n", __func__, disks, bytes);

-	/* we need to preserve the contents of 'blocks' for the async
-	 * case, so punt to synchronous if a scribble buffer is not available
+	/* if a dma resource is not available or a scribble buffer is not
+	 * available punt to the synchronous path.  In the 'dma not
+	 * available' case be sure to use the scribble buffer to
+	 * preserve the content of 'blocks' as the caller intended.
 	 */
-	if (!scribble) {
-		void **ptrs = (void **) blocks;
+	if (async_dma_find_channel(DMA_PQ) == NULL || !scribble) {
+		void **ptrs = scribble ? scribble : (void **) blocks;

 		async_tx_quiesce(&submit->depend_tx);
 		for (i = 0; i < disks; i++)
diff --git a/drivers/md/md.c b/drivers/md/md.c
index f177de0..e3ec0fd 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -2935,9 +2935,10 @@ level_show(mddev_t *mddev, char *page)
 static ssize_t
 level_store(mddev_t *mddev, const char *buf, size_t len)
 {
-	char level[16];
+	char clevel[16];
 	ssize_t rv = len;
 	struct mdk_personality *pers;
+	long level;
 	void *priv;
 	mdk_rdev_t *rdev;

@@ -2970,19 +2971,22 @@ level_store(mddev_t *mddev, const char *buf, size_t len)
 	}

 	/* Now find the new personality */
-	if (len == 0 || len >= sizeof(level))
+	if (len == 0 || len >= sizeof(clevel))
 		return -EINVAL;
-	strncpy(level, buf, len);
-	if (level[len-1] == '\n')
+	strncpy(clevel, buf, len);
+	if (clevel[len-1] == '\n')
 		len--;
-	level[len] = 0;
+	clevel[len] = 0;
+	if (strict_strtol(clevel, 10, &level))
+		level = LEVEL_NONE;

-	request_module("md-%s", level);
+	if (request_module("md-%s", clevel) != 0)
+		request_module("md-level-%s", clevel);
 	spin_lock(&pers_lock);
-	pers = find_pers(LEVEL_NONE, level);
+	pers = find_pers(level, clevel);
 	if (!pers || !try_module_get(pers->owner)) {
 		spin_unlock(&pers_lock);
-		printk(KERN_WARNING "md: personality %s not loaded\n", level);
+		printk(KERN_WARNING "md: personality %s not loaded\n", clevel);
 		return -EINVAL;
 	}
 	spin_unlock(&pers_lock);
@@ -2995,7 +2999,7 @@ level_store(mddev_t *mddev, const char *buf, size_t len)
 	if (!pers->takeover) {
 		module_put(pers->owner);
 		printk(KERN_WARNING "md: %s: %s does not support personality takeover\n",
-		       mdname(mddev), level);
+		       mdname(mddev), clevel);
 		return -EINVAL;
 	}

@@ -3011,7 +3015,7 @@ level_store(mddev_t *mddev, const char *buf, size_t len)
 		mddev->delta_disks = 0;
 		module_put(pers->owner);
 		printk(KERN_WARNING "md: %s: %s would not accept array\n",
-		       mdname(mddev), level);
+		       mdname(mddev), clevel);
 		return PTR_ERR(priv);
 	}

@@ -3075,6 +3079,7 @@ level_store(mddev_t *mddev, const char *buf, size_t len)
 	set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
 	md_wakeup_thread(mddev->thread);
 	sysfs_notify(&mddev->kobj, NULL, "level");
+	md_new_event(mddev);
 	return rv;
 }

diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 042651b..d09c263 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -5607,10 +5607,17 @@ static void raid5_quiesce(mddev_t *mddev, int state)
 }


-static void *raid5_takeover_raid0(mddev_t *mddev)
+static void *raid45_takeover_raid0(mddev_t *mddev, int level)
 {
+	struct raid0_private_data *raid0_priv = mddev->private;

-	mddev->new_level = 5;
+	/* for raid0 takeover only one zone is supported */
+	if (raid0_priv->nr_strip_zones > 1) {
+		printk(KERN_ERR "md: cannot takeover raid0 with more than one zone.\n");
+		return ERR_PTR(-EINVAL);
+	}
+
+	mddev->new_level = level;
 	mddev->new_layout = ALGORITHM_PARITY_N;
 	mddev->new_chunk_sectors = mddev->chunk_sectors;
 	mddev->raid_disks += 1;
@@ -5746,22 +5753,13 @@ static int raid6_check_reshape(mddev_t *mddev)
 static void *raid5_takeover(mddev_t *mddev)
 {
 	/* raid5 can take over:
-	 *  raid0 - if all devices are the same - make it a raid4 layout
+	 *  raid0 - if there is only one strip zone - make it a raid4 layout
 	 *  raid1 - if there are two drives.  We need to know the chunk size
 	 *  raid4 - trivial - just use a raid4 layout.
 	 *  raid6 - Providing it is a *_6 layout
 	 */
-	if (mddev->level == 0) {
-		/* for raid0 takeover only one zone is supported */
-		struct raid0_private_data *raid0_priv
-			= mddev->private;
-		if (raid0_priv->nr_strip_zones > 1) {
-			printk(KERN_ERR "md: cannot takeover raid 0 with more than one zone.\n");
-			return ERR_PTR(-EINVAL);
-		}
-		return raid5_takeover_raid0(mddev);
-	}
-
+	if (mddev->level == 0)
+		return raid45_takeover_raid0(mddev, 5);
 	if (mddev->level == 1)
 		return raid5_takeover_raid1(mddev);
 	if (mddev->level == 4) {
@@ -5777,8 +5775,12 @@ static void *raid5_takeover(mddev_t *mddev)

 static void *raid4_takeover(mddev_t *mddev)
 {
-	/* raid4 can take over raid5 if layout is right.
+	/* raid4 can take over:
+	 *  raid0 - if there is only one strip zone
+	 *  raid5 - if layout is right
 	 */
+	if (mddev->level == 0)
+		return raid45_takeover_raid0(mddev, 4);
 	if (mddev->level == 5 &&
 	    mddev->layout == ALGORITHM_PARITY_N) {
 		mddev->new_layout = 0;
--
To unsubscribe from this list: send the line "unsubscribe linux-raid" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Index of Archives]     [Linux RAID Wiki]     [ATA RAID]     [Linux SCSI Target Infrastructure]     [Linux Block]     [Linux IDE]     [Linux SCSI]     [Linux Hams]     [Device Mapper]     [Device Mapper Cryptographics]     [Kernel]     [Linux Admin]     [Linux Net]     [GFS]     [RPM]     [git]     [Yosemite Forum]


  Powered by Linux