[PATCH v2 03/12] raid5-cache: add a new policy

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Add a source file for the new policy implementation and allow selecting
the policy based on the policy_type parameter in r5l_init_log().

Introduce a new flag for rdev state flags to allow enabling the new
policy from userspace.

Signed-off-by: Artur Paszkiewicz <artur.paszkiewicz@xxxxxxxxx>
---
 drivers/md/Makefile      |  2 +-
 drivers/md/md.c          |  5 +++++
 drivers/md/md.h          |  3 +++
 drivers/md/raid5-cache.c | 17 +++++++++++++++--
 drivers/md/raid5-cache.h |  9 ++++++++-
 drivers/md/raid5-ppl.c   | 20 ++++++++++++++++++++
 drivers/md/raid5.c       | 42 ++++++++++++++++++++++++++++++++++++------
 7 files changed, 88 insertions(+), 10 deletions(-)
 create mode 100644 drivers/md/raid5-ppl.c

diff --git a/drivers/md/Makefile b/drivers/md/Makefile
index 3cbda1a..4d48714 100644
--- a/drivers/md/Makefile
+++ b/drivers/md/Makefile
@@ -18,7 +18,7 @@ dm-cache-cleaner-y += dm-cache-policy-cleaner.o
 dm-era-y	+= dm-era-target.o
 dm-verity-y	+= dm-verity-target.o
 md-mod-y	+= md.o bitmap.o
-raid456-y	+= raid5.o raid5-cache.o
+raid456-y	+= raid5.o raid5-cache.o raid5-ppl.o
 
 # Note: link order is important.  All raid personalities
 # and must come before md.o, as they each initialise 
diff --git a/drivers/md/md.c b/drivers/md/md.c
index c7894fb..4876687 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -2575,6 +2575,8 @@ state_show(struct md_rdev *rdev, char *page)
 		len += sprintf(page+len, "journal%s", sep);
 	if (test_bit(WriteMostly, &flags))
 		len += sprintf(page+len, "write_mostly%s", sep);
+	if (test_bit(JournalPpl, &flags))
+		len += sprintf(page+len, "journal_ppl%s", sep);
 	if (test_bit(Blocked, &flags) ||
 	    (rdev->badblocks.unacked_exist
 	     && !test_bit(Faulty, &flags)))
@@ -2753,6 +2755,9 @@ state_store(struct md_rdev *rdev, const char *buf, size_t len)
 	} else if (cmd_match(buf, "-external_bbl") && (rdev->mddev->external)) {
 		clear_bit(ExternalBbl, &rdev->flags);
 		err = 0;
+	} else if (cmd_match(buf, "journal_ppl")) {
+		set_bit(JournalPpl, &rdev->flags);
+		err = 0;
 	}
 	if (!err)
 		sysfs_notify_dirent_safe(rdev->sysfs_state);
diff --git a/drivers/md/md.h b/drivers/md/md.h
index 5c08f84..2fc75ac 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -172,6 +172,9 @@ enum flag_bits {
 				 * Usually, this device should be faster
 				 * than other devices in the array
 				 */
+	JournalPpl,		/* This device is used for raid5
+				 * Partial Parity Log.
+				 */
 	ClusterRemove,
 	RemoveSynchronized,	/* synchronize_rcu() was called after
 				 * this device was known to be faulty,
diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c
index 74a0eda..fa82b9a 100644
--- a/drivers/md/raid5-cache.c
+++ b/drivers/md/raid5-cache.c
@@ -2556,16 +2556,29 @@ struct r5l_policy r5l_journal = {
 	.handle_flush_request = __r5l_handle_flush_request,
 	.quiesce = __r5l_quiesce,
 };
+extern struct r5l_policy r5l_ppl;
 
-int r5l_init_log(struct r5conf *conf, struct md_rdev *rdev)
+int r5l_init_log(struct r5conf *conf, struct md_rdev *rdev, int policy_type)
 {
 	int ret;
 	struct r5l_log *log = kzalloc(sizeof(*log), GFP_KERNEL);
 	if (!log)
 		return -ENOMEM;
 
+	switch (policy_type) {
+	case RWH_POLICY_JOURNAL:
+		log->policy = &r5l_journal;
+		break;
+	case RWH_POLICY_PPL:
+		log->policy = &r5l_ppl;
+		break;
+	default:
+		kfree(log);
+		return -EINVAL;
+	}
+
 	log->rdev = rdev;
-	log->policy = &r5l_journal;
+	log->rwh_policy = policy_type;
 
 	ret = log->policy->init_log(log, conf);
 	if (ret)
diff --git a/drivers/md/raid5-cache.h b/drivers/md/raid5-cache.h
index 52cfef4..4ba11d3 100644
--- a/drivers/md/raid5-cache.h
+++ b/drivers/md/raid5-cache.h
@@ -80,6 +80,13 @@ struct r5l_log {
 	struct work_struct deferred_io_work;
 
 	struct r5l_policy *policy;
+	enum {
+		RWH_POLICY_OFF,
+		RWH_POLICY_JOURNAL,
+		RWH_POLICY_PPL,
+	} rwh_policy;
+
+	void *private;
 };
 
 /*
@@ -140,7 +147,7 @@ struct r5l_policy {
 	void (*quiesce)(struct r5l_log *log, int state);
 };
 
-extern int r5l_init_log(struct r5conf *conf, struct md_rdev *rdev);
+extern int r5l_init_log(struct r5conf *conf, struct md_rdev *rdev, int policy_type);
 extern void r5l_exit_log(struct r5l_log *log);
 extern int r5l_write_stripe(struct r5l_log *log, struct stripe_head *sh);
 extern void r5l_write_stripe_run(struct r5l_log *log);
diff --git a/drivers/md/raid5-ppl.c b/drivers/md/raid5-ppl.c
new file mode 100644
index 0000000..263fad7
--- /dev/null
+++ b/drivers/md/raid5-ppl.c
@@ -0,0 +1,20 @@
+/*
+ * Partial Parity Log for closing the RAID5 write hole
+ * Copyright (c) 2016, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/kernel.h>
+#include <linux/blkdev.h>
+#include "raid5.h"
+#include "raid5-cache.h"
+
+struct r5l_policy r5l_ppl;
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index fe8c1a7..9f07769 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -6833,8 +6833,10 @@ static int raid5_run(struct mddev *mddev)
 	struct r5conf *conf;
 	int working_disks = 0;
 	int dirty_parity_disks = 0;
+	int ppl_disks = 0;
 	struct md_rdev *rdev;
 	struct md_rdev *journal_dev = NULL;
+	int rwh_policy = RWH_POLICY_OFF;
 	sector_t reshape_offset = 0;
 	int i;
 	long long min_offset_diff = 0;
@@ -6847,6 +6849,10 @@ static int raid5_run(struct mddev *mddev)
 	rdev_for_each(rdev, mddev) {
 		long long diff;
 
+		if (test_bit(JournalPpl, &rdev->flags) &&
+		    test_bit(In_sync, &rdev->flags))
+			ppl_disks++;
+
 		if (test_bit(Journal, &rdev->flags)) {
 			journal_dev = rdev;
 			continue;
@@ -7037,6 +7043,22 @@ static int raid5_run(struct mddev *mddev)
 		goto abort;
 	}
 
+	if (ppl_disks) {
+		if (ppl_disks != working_disks) {
+			pr_err("md/raid:%s: distributed PPL must be enabled on all member devices - aborting\n",
+			       mdname(mddev));
+			goto abort;
+		}
+		rwh_policy = RWH_POLICY_PPL;
+	}
+
+	if (journal_dev) {
+		if (ppl_disks)
+			pr_warn("md/raid:%s: using journal device and PPL not allowed - ignoring PPL\n",
+				mdname(mddev));
+		rwh_policy = RWH_POLICY_JOURNAL;
+	}
+
 	/* device size must be a multiple of chunk size */
 	mddev->dev_sectors &= ~(mddev->chunk_sectors - 1);
 	mddev->resync_max_sectors = mddev->dev_sectors;
@@ -7171,12 +7193,17 @@ static int raid5_run(struct mddev *mddev)
 		blk_queue_max_hw_sectors(mddev->queue, UINT_MAX);
 	}
 
-	if (journal_dev) {
-		char b[BDEVNAME_SIZE];
+	if (rwh_policy) {
+		if (journal_dev) {
+			char b[BDEVNAME_SIZE];
 
-		pr_debug("md/raid:%s: using device %s as journal\n",
-			 mdname(mddev), bdevname(journal_dev->bdev, b));
-		if (r5l_init_log(conf, journal_dev))
+			pr_debug("md/raid:%s: using device %s as journal\n",
+				 mdname(mddev), bdevname(journal_dev->bdev, b));
+		} else if (rwh_policy == RWH_POLICY_PPL) {
+			pr_debug("md/raid:%s: enabling distributed PPL journal\n",
+				 mdname(mddev));
+		}
+		if (r5l_init_log(conf, journal_dev, rwh_policy))
 			goto abort;
 	}
 
@@ -7372,6 +7399,7 @@ static int raid5_add_disk(struct mddev *mddev, struct md_rdev *rdev)
 
 	if (test_bit(Journal, &rdev->flags)) {
 		char b[BDEVNAME_SIZE];
+		int ret;
 		if (conf->log)
 			return -EBUSY;
 
@@ -7380,7 +7408,9 @@ static int raid5_add_disk(struct mddev *mddev, struct md_rdev *rdev)
 		 * The array is in readonly mode if journal is missing, so no
 		 * write requests running. We should be safe
 		 */
-		r5l_init_log(conf, rdev);
+		ret = r5l_init_log(conf, rdev, RWH_POLICY_JOURNAL);
+		if (ret)
+			return ret;
 		pr_debug("md/raid:%s: using device %s as journal\n",
 			 mdname(mddev), bdevname(rdev->bdev, b));
 		return 0;
-- 
2.10.1

--
To unsubscribe from this list: send the line "unsubscribe linux-raid" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html



[Index of Archives]     [Linux RAID Wiki]     [ATA RAID]     [Linux SCSI Target Infrastructure]     [Linux Block]     [Linux IDE]     [Linux SCSI]     [Linux Hams]     [Device Mapper]     [Device Mapper Cryptographics]     [Kernel]     [Linux Admin]     [Linux Net]     [GFS]     [RPM]     [git]     [Yosemite Forum]


  Powered by Linux