Hi,
在 2024/10/25 15:05, Yang Erkun 写道:
From: Yang Erkun <yangerkun@xxxxxxxxxx>
My colleague Wupeng found the following problems during fault injection:
BUG: unable to handle page fault for address: fffffbfff809d073
PGD 6e648067 P4D 123ec8067 PUD 123ec4067 PMD 100e38067 PTE 0
Oops: Oops: 0000 [#1] PREEMPT SMP KASAN NOPTI
CPU: 5 UID: 0 PID: 755 Comm: modprobe Not tainted 6.12.0-rc3+ #17
Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS
1.16.1-2.fc37 04/01/2014
RIP: 0010:__asan_load8+0x4c/0xa0
...
Call Trace:
<TASK>
blkdev_put_whole+0x41/0x70
bdev_release+0x1a3/0x250
blkdev_release+0x11/0x20
__fput+0x1d7/0x4a0
task_work_run+0xfc/0x180
syscall_exit_to_user_mode+0x1de/0x1f0
do_syscall_64+0x6b/0x170
entry_SYSCALL_64_after_hwframe+0x76/0x7e
The error path of modprobe will ignores the refcnt for the module, and
directly releases everything about this all associated resources. For
the brd module, it can be brd_fops. The brd_init function attempts to
create 16 ramdisks; each time add_disk is called, a file for the block
device is created to help do partition scanning and remains alive util
we return to userspace(Also we can open it with another user thread).
Let's say brd_init has successfully create the first ramdisk, but fail
to create the second, the error handling path will release code segment.
Consequently, bdev_release for file of first ramdisk will trigger null
pointer since brd_fops has been removed.
To resolve issue, implement a solution similar to loop_init, and
reintroduce brd_devices_mutex to help serialize modifications to
brd_list.
This patch looks good to me, just some nits below:
Fixes: 7f9b348cb5e9 ("brd: convert to blk_alloc_disk/blk_cleanup_disk")
Reported-by: Wupeng Ma <mawupeng1@xxxxxxxxxx>
Signed-off-by: Yang Erkun <yangerkun@xxxxxxxxxx>
---
drivers/block/brd.c | 70 ++++++++++++++++++++-------------------------
1 file changed, 31 insertions(+), 39 deletions(-)
diff --git a/drivers/block/brd.c b/drivers/block/brd.c
index 2fd1ed101748..f86dd0795d3c 100644
--- a/drivers/block/brd.c
+++ b/drivers/block/brd.c
@@ -316,6 +316,7 @@ __setup("ramdisk_size=", ramdisk_size);
* (should share code eventually).
*/
static LIST_HEAD(brd_devices);
+static DEFINE_MUTEX(brd_devices_mutex);
static struct dentry *brd_debugfs_dir;
static int brd_alloc(int i)
@@ -340,14 +341,21 @@ static int brd_alloc(int i)
BLK_FEAT_NOWAIT,
};
- list_for_each_entry(brd, &brd_devices, brd_list)
- if (brd->brd_number == i)
+ mutex_lock(&brd_devices_mutex);
+ list_for_each_entry(brd, &brd_devices, brd_list) {
+ if (brd->brd_number == i) {
+ mutex_unlock(&brd_devices_mutex);
return -EEXIST;
+ }
+ }
brd = kzalloc(sizeof(*brd), GFP_KERNEL);
- if (!brd)
+ if (!brd) {
+ mutex_unlock(&brd_devices_mutex);
return -ENOMEM;
+ }
brd->brd_number = i;
list_add_tail(&brd->brd_list, &brd_devices);
+ mutex_unlock(&brd_devices_mutex);
xa_init(&brd->brd_pages);
The global mutex is used to protect brd_alloc() from module init and
brd_alloc() from creating on open? If so, I think just use the lock in
the caller is cleaner.
@@ -378,7 +386,9 @@ static int brd_alloc(int i)
out_cleanup_disk:
put_disk(disk);
out_free_dev:
+ mutex_lock(&brd_devices_mutex);
list_del(&brd->brd_list);
+ mutex_unlock(&brd_devices_mutex);
kfree(brd);
return err;
}
@@ -388,21 +398,6 @@ static void brd_probe(dev_t dev)
brd_alloc(MINOR(dev) / max_part);
}
-static void brd_cleanup(void)
-{
- struct brd_device *brd, *next;
-
- debugfs_remove_recursive(brd_debugfs_dir);
-
- list_for_each_entry_safe(brd, next, &brd_devices, brd_list) {
- del_gendisk(brd->brd_disk);
- put_disk(brd->brd_disk);
- brd_free_pages(brd);
- list_del(&brd->brd_list);
- kfree(brd);
- }
-}
-
static inline void brd_check_and_reset_par(void)
{
if (unlikely(!max_part))
@@ -424,17 +419,7 @@ static inline void brd_check_and_reset_par(void)
static int __init brd_init(void)
{
- int err, i;
-
- brd_check_and_reset_par();
-
- brd_debugfs_dir = debugfs_create_dir("ramdisk_pages", NULL);
-
- for (i = 0; i < rd_nr; i++) {
- err = brd_alloc(i);
- if (err)
- goto out_free;
- }
+ int i;
/*
* brd module now has a feature to instantiate underlying device
@@ -450,28 +435,35 @@ static int __init brd_init(void)
* If (X / max_part) was not already created it will be created
* dynamically.
*/
+ brd_check_and_reset_par();
+ brd_debugfs_dir = debugfs_create_dir("ramdisk_pages", NULL);
if (__register_blkdev(RAMDISK_MAJOR, "ramdisk", brd_probe)) {
- err = -EIO;
- goto out_free;
+ pr_info("brd: module NOT loaded !!!\n");
+ debugfs_remove_recursive(brd_debugfs_dir);
+ return -EIO;
}
+ for (i = 0; i < rd_nr; i++)
+ brd_alloc(i);
Now that brd_alloc() can fail while loding the module will succed,
please also add err log if brd_alloc() failed.
+
pr_info("brd: module loaded\n");
return 0;
-
-out_free:
- brd_cleanup();
-
- pr_info("brd: module NOT loaded !!!\n");
- return err;
}
static void __exit brd_exit(void)
{
+ struct brd_device *brd, *next;
unregister_blkdev(RAMDISK_MAJOR, "ramdisk");
- brd_cleanup();
-
+ debugfs_remove_recursive(brd_debugfs_dir);
+ list_for_each_entry_safe(brd, next, &brd_devices, brd_list) {
+ del_gendisk(brd->brd_disk);
+ put_disk(brd->brd_disk);
+ brd_free_pages(brd);
+ list_del(&brd->brd_list);
+ kfree(brd);
+ }
Why the global mutex is not used here? It took a while for me to figure
out that this is safe, because unregister_blkdev() make sure no new
brd_alloc() can be called from open path, and in progress open will be
synchronized with del_gendisk().
However, please add some comments or just hold the global mutex.
Thanks,
Kuai
pr_info("brd: module unloaded\n");
}