[PATCH 7/7] block: don't grab q->debugfs_mutex

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



All block internal state for dealing adding/removing debugfs entries
have been removed, and debugfs can sync everything for us in fs level,
so don't grab q->debugfs_mutex for adding/removing block internal debugfs
entries.

Now q->debugfs_mutex is only used for blktrace, meantime move creating
queue debugfs dir code out of q->sysfs_lock. Both the two locks are
connected with queue freeze IO lock.  Then queue freeze IO lock chain
with debugfs lock is cut.

The following lockdep report can be fixed:

https://lore.kernel.org/linux-block/ougniadskhks7uyxguxihgeuh2pv4yaqv4q3emo4gwuolgzdt6@brotly74p6bs/

Follows contexts which adds/removes debugfs entries:

- update nr_hw_queues

- add/remove disks

- elevator switch

- blktrace

blktrace only adds entries under disk top directory, so we can ignore it,
because it can only work iff disk is added. Also nothing overlapped with
the other two contex, blktrace context is fine.

Elevator switch is only allowed after disk is added, so there isn't race
with add/remove disk. blk_mq_update_nr_hw_queues() always restores to
previous elevator, so no race between these two. Elevator switch context
is fine.

So far blk_mq_update_nr_hw_queues() doesn't hold debugfs lock for
adding/removing hctx entries, there might be race with add/remove disk,
which is just fine in reality:

- blk_mq_update_nr_hw_queues() is usually for error recovery, and disk
won't be added/removed at the same time

- even though there is race between the two contexts, it is just fine,
since hctx won't be freed until queue is dead

- we never see reports in this area without holding debugfs in
blk_mq_update_nr_hw_queues()

Reported-by: Shinichiro Kawasaki <shinichiro.kawasaki@xxxxxxx>
Signed-off-by: Ming Lei <ming.lei@xxxxxxxxxx>
---
 block/blk-mq-debugfs.c  | 12 ------------
 block/blk-mq-sched.c    |  8 --------
 block/blk-rq-qos.c      |  7 +------
 block/blk-sysfs.c       |  6 +-----
 kernel/trace/blktrace.c |  2 ++
 5 files changed, 4 insertions(+), 31 deletions(-)

diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c
index 6d98c2a6e7c6..9601823730e2 100644
--- a/block/blk-mq-debugfs.c
+++ b/block/blk-mq-debugfs.c
@@ -770,8 +770,6 @@ void blk_mq_debugfs_register_sched(struct request_queue *q)
 	struct elevator_type *e = q->elevator->type;
 	struct dentry *sched_dir;
 
-	lockdep_assert_held(&q->debugfs_mutex);
-
 	/*
 	 * If the parent directory has not been created yet, return, we will be
 	 * called again later on and the directory/files will be created then.
@@ -793,8 +791,6 @@ void blk_mq_debugfs_unregister_sched(struct request_queue *q)
 {
 	struct dentry *queue_dir = blk_mq_get_queue_entry(q);
 
-	lockdep_assert_held(&q->debugfs_mutex);
-
 	if (IS_ERR_OR_NULL(queue_dir))
 		return;
 	debugfs_lookup_and_remove("sched", queue_dir);
@@ -832,8 +828,6 @@ void blk_mq_debugfs_unregister_rqos(struct rq_qos *rqos)
 	struct dentry *queue_dir = blk_mq_get_queue_entry(q);
 	struct dentry *rqos_top;
 
-	lockdep_assert_held(&q->debugfs_mutex);
-
 	if (IS_ERR_OR_NULL(queue_dir))
 		return;
 
@@ -853,8 +847,6 @@ void blk_mq_debugfs_register_rqos(struct rq_qos *rqos)
 	struct dentry *rqos_top;
 	struct dentry *rqos_dir;
 
-	lockdep_assert_held(&q->debugfs_mutex);
-
 	if (!rqos->ops->debugfs_attrs)
 		return;
 
@@ -874,8 +866,6 @@ void blk_mq_debugfs_register_sched_hctx(struct request_queue *q,
 	struct elevator_type *e = q->elevator->type;
 	struct dentry *sched_dir;
 
-	lockdep_assert_held(&q->debugfs_mutex);
-
 	/*
 	 * If the parent debugfs directory has not been created yet, return;
 	 * We will be called again later on with appropriate parent debugfs
@@ -897,8 +887,6 @@ void blk_mq_debugfs_unregister_sched_hctx(struct blk_mq_hw_ctx *hctx)
 {
 	struct dentry *sched_dir;
 
-	lockdep_assert_held(&hctx->queue->debugfs_mutex);
-
 	sched_dir = blk_mq_get_hctx_sched_entry(hctx);
 	if (sched_dir) {
 		debugfs_remove_recursive(sched_dir);
diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
index 7442ca27c2bf..1ca127297b2c 100644
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -469,9 +469,7 @@ int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e)
 	if (ret)
 		goto err_free_map_and_rqs;
 
-	mutex_lock(&q->debugfs_mutex);
 	blk_mq_debugfs_register_sched(q);
-	mutex_unlock(&q->debugfs_mutex);
 
 	queue_for_each_hw_ctx(q, hctx, i) {
 		if (e->ops.init_hctx) {
@@ -484,9 +482,7 @@ int blk_mq_init_sched(struct request_queue *q, struct elevator_type *e)
 				return ret;
 			}
 		}
-		mutex_lock(&q->debugfs_mutex);
 		blk_mq_debugfs_register_sched_hctx(q, hctx);
-		mutex_unlock(&q->debugfs_mutex);
 	}
 
 	return 0;
@@ -527,9 +523,7 @@ void blk_mq_exit_sched(struct request_queue *q, struct elevator_queue *e)
 	unsigned int flags = 0;
 
 	queue_for_each_hw_ctx(q, hctx, i) {
-		mutex_lock(&q->debugfs_mutex);
 		blk_mq_debugfs_unregister_sched_hctx(hctx);
-		mutex_unlock(&q->debugfs_mutex);
 
 		if (e->type->ops.exit_hctx && hctx->sched_data) {
 			e->type->ops.exit_hctx(hctx, i);
@@ -538,9 +532,7 @@ void blk_mq_exit_sched(struct request_queue *q, struct elevator_queue *e)
 		flags = hctx->flags;
 	}
 
-	mutex_lock(&q->debugfs_mutex);
 	blk_mq_debugfs_unregister_sched(q);
-	mutex_unlock(&q->debugfs_mutex);
 
 	if (e->type->ops.exit_sched)
 		e->type->ops.exit_sched(e);
diff --git a/block/blk-rq-qos.c b/block/blk-rq-qos.c
index d4d4f4dc0e23..529640ed2ff5 100644
--- a/block/blk-rq-qos.c
+++ b/block/blk-rq-qos.c
@@ -320,11 +320,8 @@ int rq_qos_add(struct rq_qos *rqos, struct gendisk *disk, enum rq_qos_id id,
 
 	blk_mq_unfreeze_queue(q, memflags);
 
-	if (rqos->ops->debugfs_attrs) {
-		mutex_lock(&q->debugfs_mutex);
+	if (rqos->ops->debugfs_attrs)
 		blk_mq_debugfs_register_rqos(rqos);
-		mutex_unlock(&q->debugfs_mutex);
-	}
 
 	return 0;
 ebusy:
@@ -349,7 +346,5 @@ void rq_qos_del(struct rq_qos *rqos)
 	}
 	blk_mq_unfreeze_queue(q, memflags);
 
-	mutex_lock(&q->debugfs_mutex);
 	blk_mq_debugfs_unregister_rqos(rqos);
-	mutex_unlock(&q->debugfs_mutex);
 }
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 68bd84e06aac..b0bfb4c82e0e 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -744,10 +744,8 @@ static void blk_debugfs_remove(struct gendisk *disk)
 {
 	struct request_queue *q = disk->queue;
 
-	mutex_lock(&q->debugfs_mutex);
 	blk_trace_shutdown(q);
 	debugfs_lookup_and_remove(disk->disk_name, blk_debugfs_root);
-	mutex_unlock(&q->debugfs_mutex);
 }
 
 /**
@@ -769,14 +767,12 @@ int blk_register_queue(struct gendisk *disk)
 		if (ret)
 			goto out_put_queue_kobj;
 	}
-	mutex_lock(&q->sysfs_lock);
 
-	mutex_lock(&q->debugfs_mutex);
 	debugfs_create_dir(disk->disk_name, blk_debugfs_root);
 	if (queue_is_mq(q))
 		blk_mq_debugfs_register(q);
-	mutex_unlock(&q->debugfs_mutex);
 
+	mutex_lock(&q->sysfs_lock);
 	ret = disk_register_independent_access_ranges(disk);
 	if (ret)
 		goto out_debugfs_remove;
diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c
index 32cda8f5d008..13efd48adcc3 100644
--- a/kernel/trace/blktrace.c
+++ b/kernel/trace/blktrace.c
@@ -775,9 +775,11 @@ int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg)
  **/
 void blk_trace_shutdown(struct request_queue *q)
 {
+	mutex_lock(&q->debugfs_mutex);
 	if (rcu_dereference_protected(q->blk_trace,
 				      lockdep_is_held(&q->debugfs_mutex)))
 		__blk_trace_remove(q);
+	mutex_unlock(&q->debugfs_mutex);
 }
 
 #ifdef CONFIG_BLK_CGROUP
-- 
2.47.0





[Index of Archives]     [Linux RAID]     [Linux SCSI]     [Linux ATA RAID]     [IDE]     [Linux Wireless]     [Linux Kernel]     [ATH6KL]     [Linux Bluetooth]     [Linux Netdev]     [Kernel Newbies]     [Security]     [Git]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Device Mapper]

  Powered by Linux