[PATCH 2/2] dm era: avoid deadlock when swapping table

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Under certain conditions, swapping a table, that includes a dm-era
target, with a new table, causes a deadlock.

This happens when a status (STATUSTYPE_INFO) or message IOCTL is blocked
in the suspended dm-era target.

dm-era executes all metadata operations in a worker thread, which stops
processing requests when the target is suspended, and resumes again when
the target is resumed.

So, running 'dmsetup status' or 'dmsetup message' for a suspended dm-era
device blocks, until the device is resumed.

If we then load a new table to the device, while the aforementioned
dmsetup command is blocked in dm-era, and resume the device, we
deadlock.

The problem is that the 'dmsetup status' and 'dmsetup message' commands
hold a reference to the live table, i.e., they hold an SRCU read lock on
md->io_barrier, while they are blocked.

When the device is resumed, the old table is replaced with the new one
by dm_swap_table(), which ends up calling synchronize_srcu() on
md->io_barrier.

Since the blocked dmsetup command is holding the SRCU read lock, and the
old table is never resumed, 'dmsetup resume' blocks too, and we have a
deadlock.

The only way to recover is by rebooting.

Fix this by allowing metadata operations triggered by user space to run
in the corresponding user space thread, instead of queueing them for
execution by the dm-era worker thread.

This way, even if the device is suspended, the metadata operations will
run and release the SRCU read lock, preventing a subsequent table reload
(dm_swap_table()) from deadlocking.

To make this possible use a mutex to serialize access to the metadata
between the worker thread and the user space threads.

Fixes: eec40579d8487 ("dm: add era target")
Cc: stable@xxxxxxxxxxxxxxx # v3.15+
Signed-off-by: Nikos Tsironis <ntsironis@xxxxxxxxxxx>
---
 drivers/md/dm-era-target.c | 58 ++++++++++++++++++++++++++++----------
 1 file changed, 43 insertions(+), 15 deletions(-)

diff --git a/drivers/md/dm-era-target.c b/drivers/md/dm-era-target.c
index 3332bed2f412..c57a19320dbf 100644
--- a/drivers/md/dm-era-target.c
+++ b/drivers/md/dm-era-target.c
@@ -11,6 +11,7 @@
 #include <linux/module.h>
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
+#include <linux/mutex.h>
 
 #define DM_MSG_PREFIX "era"
 
@@ -1182,6 +1183,12 @@ struct era {
 	dm_block_t nr_blocks;
 	uint32_t sectors_per_block;
 	int sectors_per_block_shift;
+
+	/*
+	 * Serialize access to metadata between worker thread and user space
+	 * threads.
+	 */
+	struct mutex metadata_lock;
 	struct era_metadata *md;
 
 	struct workqueue_struct *wq;
@@ -1358,10 +1365,12 @@ static void do_work(struct work_struct *ws)
 {
 	struct era *era = container_of(ws, struct era, worker);
 
+	mutex_lock(&era->metadata_lock);
 	kick_off_digest(era);
 	process_old_eras(era);
 	process_deferred_bios(era);
 	process_rpc_calls(era);
+	mutex_unlock(&era->metadata_lock);
 }
 
 static void defer_bio(struct era *era, struct bio *bio)
@@ -1400,17 +1409,6 @@ static int in_worker0(struct era *era, int (*fn)(struct era_metadata *))
 	return perform_rpc(era, &rpc);
 }
 
-static int in_worker1(struct era *era,
-		      int (*fn)(struct era_metadata *, void *), void *arg)
-{
-	struct rpc rpc;
-	rpc.fn0 = NULL;
-	rpc.fn1 = fn;
-	rpc.arg = arg;
-
-	return perform_rpc(era, &rpc);
-}
-
 static void start_worker(struct era *era)
 {
 	atomic_set(&era->suspended, 0);
@@ -1439,6 +1437,7 @@ static void era_destroy(struct era *era)
 	if (era->metadata_dev)
 		dm_put_device(era->ti, era->metadata_dev);
 
+	mutex_destroy(&era->metadata_lock);
 	kfree(era);
 }
 
@@ -1539,6 +1538,8 @@ static int era_ctr(struct dm_target *ti, unsigned argc, char **argv)
 	spin_lock_init(&era->rpc_lock);
 	INIT_LIST_HEAD(&era->rpc_calls);
 
+	mutex_init(&era->metadata_lock);
+
 	ti->private = era;
 	ti->num_flush_bios = 1;
 	ti->flush_supported = true;
@@ -1591,7 +1592,9 @@ static void era_postsuspend(struct dm_target *ti)
 
 	stop_worker(era);
 
+	mutex_lock(&era->metadata_lock);
 	r = metadata_commit(era->md);
+	mutex_unlock(&era->metadata_lock);
 	if (r) {
 		DMERR("%s: metadata_commit failed", __func__);
 		/* FIXME: fail mode */
@@ -1605,19 +1608,23 @@ static int era_preresume(struct dm_target *ti)
 	dm_block_t new_size = calc_nr_blocks(era);
 
 	if (era->nr_blocks != new_size) {
+		mutex_lock(&era->metadata_lock);
 		r = metadata_resize(era->md, &new_size);
 		if (r) {
 			DMERR("%s: metadata_resize failed", __func__);
+			mutex_unlock(&era->metadata_lock);
 			return r;
 		}
 
 		r = metadata_commit(era->md);
 		if (r) {
 			DMERR("%s: metadata_commit failed", __func__);
+			mutex_unlock(&era->metadata_lock);
 			return r;
 		}
 
 		era->nr_blocks = new_size;
+		mutex_unlock(&era->metadata_lock);
 	}
 
 	start_worker(era);
@@ -1648,7 +1655,9 @@ static void era_status(struct dm_target *ti, status_type_t type,
 
 	switch (type) {
 	case STATUSTYPE_INFO:
-		r = in_worker1(era, metadata_get_stats, &stats);
+		mutex_lock(&era->metadata_lock);
+		r = metadata_get_stats(era->md, &stats);
+		mutex_unlock(&era->metadata_lock);
 		if (r)
 			goto err;
 
@@ -1682,6 +1691,25 @@ static void era_status(struct dm_target *ti, status_type_t type,
 	DMEMIT("Error");
 }
 
+static int _process_message(struct era *era, int (*fn)(struct era_metadata *))
+{
+	int ret, r;
+
+	mutex_lock(&era->metadata_lock);
+	ret = fn(era->md);
+	r = metadata_commit(era->md);
+	mutex_unlock(&era->metadata_lock);
+
+	/* Handle return value the same way as process_rpc_calls() does */
+	if (r)
+		ret = r;
+
+	/* Wake worker to handle archived writesets */
+	wake_worker(era);
+
+	return ret;
+}
+
 static int era_message(struct dm_target *ti, unsigned argc, char **argv,
 		       char *result, unsigned maxlen)
 {
@@ -1693,13 +1721,13 @@ static int era_message(struct dm_target *ti, unsigned argc, char **argv,
 	}
 
 	if (!strcasecmp(argv[0], "checkpoint"))
-		return in_worker0(era, metadata_checkpoint);
+		return _process_message(era, metadata_checkpoint);
 
 	if (!strcasecmp(argv[0], "take_metadata_snap"))
-		return in_worker0(era, metadata_take_snap);
+		return _process_message(era, metadata_take_snap);
 
 	if (!strcasecmp(argv[0], "drop_metadata_snap"))
-		return in_worker0(era, metadata_drop_snap);
+		return _process_message(era, metadata_drop_snap);
 
 	DMERR("unsupported message '%s'", argv[0]);
 	return -EINVAL;
-- 
2.30.2

--
dm-devel mailing list
dm-devel@xxxxxxxxxx
https://listman.redhat.com/mailman/listinfo/dm-devel




[Index of Archives]     [DM Crypt]     [Fedora Desktop]     [ATA RAID]     [Fedora Marketing]     [Fedora Packaging]     [Fedora SELinux]     [Yosemite Discussion]     [KDE Users]     [Fedora Docs]

  Powered by Linux