Re: Nullblk configfs oddities

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 4/19/22 07:24, Jens Axboe wrote:
> On 4/18/22 4:21 PM, Chaitanya Kulkarni wrote:
>> On 4/18/22 15:14, Jens Axboe wrote:
>>> On 4/18/22 3:54 PM, Chaitanya Kulkarni wrote:
>>>> On 4/18/22 14:38, Josef Bacik wrote:
>>>>> Hello,
>>>>>
>>>>> I'm trying to add a test to fsperf and it requires the use of nullblk.  I'm
>>>>> trying to use the configfs thing, and it's doing some odd things.  My basic
>>>>> reproducer is
>>>>>
>>>>> modprobe null_blk
>>>>> mkdir /sys/kernel/config/nullb/nullb0
>>>>> echo some shit into the config
>>>>> echo 1 > /sys/kernel/config/nullb/nullb0/power
>>>>>
>>>>> Now null_blk apparently defaults to nr_devices == 1, so it creates nullb0 on
>>>>> modprobe.  But this doesn't show up in the configfs directory.  There's no way
>>>>> to find this out until when I try to mkfs my nullb0 and it doesn't work.  The
>>>>> above steps gets my device created at /dev/nullb1, but there's no actual way to
>>>>> figure out that's what happened.  If I do something like
>>>>> /sys/kernel/config/nullb/nullbfsperf I still just get nullb<number>, I don't get
>>>>> my fancy name.
>>>>>
>>>>
>>>> when you load module with default module parameter it will create a
>>>> default device with no memory backed mode, that will not be visible in
>>>> the configfs.
>>>
>>> Right, the problem is really that pre-configured devices (via nr_devices
>>> being bigger than 0, which is the default) don't show up in configfs.
>>> That, to me, is the real issue here, because it means you need to know
>>> which ones are already setup before doing mkdir for a new one.
>>>
>>> On top of that, it's also odd that they don't show up there to begin
>>> with.
>>>
>>
>> it is indeed confusing, maybe we need to find a way to populate the
>> configfs when loading the module? but I'm not sure if that is
>> the right approach since configs ideally should be populated by
>> user.
>>
>> OTOH we can make the memory_backed module param [1] so user can
>> tentatively not use configfs and only rely on default configuration ?
> 
> Arguably configfs should just be disabled if loading with nr_devices
> larger than 0, as it's a mess of an API as it stands. But probably too
> late for that. The fact that we also have an option that's specific to
> configfs just makes it even worse.
> 
> I don't know much about configfs, but pre-populating with the configured
> devices and options would be ideal and completely solve this. I think
> that would be the best solution given the current situation. Not that
> it's THAT important, null_blk is a developer tool and as such can have
> some sharper and rouger edges. Still would be nice to make it saner,
> though.
> 

I came up with this. It does prepopulate configfs nullb directory with the
devices created on modprobe. But... doing an rmmod now always gives a
"rmmod: ERROR: Module null_blk is in use" because configfs takes a ref on
nullblk module for each entry. So the user now must do an rmdir of all
prepopulated devices before doing rmmod. Not ideal. And messing up with
the module references is probably not a good idea...


diff --git a/drivers/block/null_blk/main.c b/drivers/block/null_blk/main.c
index 1aa4897685f6..98933555b59f 100644
--- a/drivers/block/null_blk/main.c
+++ b/drivers/block/null_blk/main.c
@@ -9,6 +9,7 @@
 #include <linux/sched.h>
 #include <linux/fs.h>
 #include <linux/init.h>
+#include <linux/namei.h>
 #include "null_blk.h"

 #define FREE_BATCH		16
@@ -667,6 +668,7 @@ static void null_free_dev(struct nullb_device *dev)

 	null_free_zoned_dev(dev);
 	badblocks_exit(&dev->badblocks);
+	kfree(dev->config_path);
 	kfree(dev);
 }

@@ -2088,12 +2090,121 @@ static int null_add_dev(struct nullb_device *dev)
 	return rv;
 }

+#ifdef CONFIG_CONFIGFS_FS
+
+static int nullb_create_dev(int idx)
+{
+	char disk_name[DISK_NAME_LEN];
+	struct nullb_device *dev;
+	struct config_item *item;
+	struct dentry *dentry;
+	struct path parent;
+	const char *path;
+	int ret;
+
+	/* Use configfs to allocate the device */
+	sprintf(disk_name, "nullb%d", idx);
+	path = kasprintf(GFP_KERNEL, "/sys/kernel/config/nullb/%s", disk_name);
+	if (!path)
+		return -ENOMEM;
+
+	dentry = kern_path_create(AT_FDCWD, path, &parent, LOOKUP_DIRECTORY);
+	if (IS_ERR(dentry)) {
+		ret = PTR_ERR(dentry);
+		goto free;
+	}
+
+	ret = vfs_mkdir(mnt_user_ns(parent.mnt), d_inode(parent.dentry),
+			dentry, 0755);
+	if (ret)
+		goto done;
+
+	/* Start the device */
+	item = config_group_find_item(&nullb_subsys.su_group, disk_name);
+	if (!item) {
+		pr_err("Device %s not powered up\n", disk_name);
+		goto done;
+	}
+
+	dev = to_nullb_device(item);
+	set_bit(NULLB_DEV_FL_UP, &dev->flags);
+	ret = null_add_dev(dev);
+	if (ret) {
+		clear_bit(NULLB_DEV_FL_UP, &dev->flags);
+		goto put;
+	}
+
+	set_bit(NULLB_DEV_FL_CONFIGURED, &dev->flags);
+	dev->power = true;
+	dev->config_path = path;
+	path = NULL;
+
+put:
+	config_item_put(item);
+done:
+	done_path_create(&parent, dentry);
+free:
+	kfree(path);
+
+	return ret;
+}
+
+static void nullb_destroy_dev(struct nullb *nullb)
+{
+	struct nullb_device *dev = nullb->dev;
+	struct dentry *dentry;
+	struct path parent;
+
+	dentry = kern_path_create(AT_FDCWD, dev->config_path, &parent,
LOOKUP_DIRECTORY);
+	if (IS_ERR(dentry)) {
+		pr_err("Lookup %s failed %ld\n",
+		       dev->config_path, PTR_ERR(dentry));
+		return;
+	}
+
+	if (d_really_is_positive(dentry)) {
+		vfs_rmdir(mnt_user_ns(parent.mnt), d_inode(parent.dentry),
+			  dentry);
+		dput(dentry);
+	}
+
+	done_path_create(&parent, dentry);
+}
+
+#else
+
+static int nullb_create_dev(int idx)
+{
+	struct nullb_device *dev;
+
+	dev = null_alloc_dev();
+	if (!dev)
+		return -ENOMEM;
+
+	ret = null_add_dev(dev);
+	if (ret) {
+		null_free_dev(dev);
+		return ret;
+	}
+
+	return 0;
+}
+
+static int nullb_destroy_dev(struct nullb *nullb)
+{
+	struct nullb_device *dev = nullb->dev;
+
+	null_del_dev(nullb);
+	null_free_dev(dev);
+}
+
+#endif
+
 static int __init null_init(void)
 {
 	int ret = 0;
 	unsigned int i;
 	struct nullb *nullb;
-	struct nullb_device *dev;

 	if (g_bs > PAGE_SIZE) {
 		pr_warn("invalid block size\n");
@@ -2151,16 +2262,9 @@ static int __init null_init(void)
 	}

 	for (i = 0; i < nr_devices; i++) {
-		dev = null_alloc_dev();
-		if (!dev) {
-			ret = -ENOMEM;
-			goto err_dev;
-		}
-		ret = null_add_dev(dev);
-		if (ret) {
-			null_free_dev(dev);
+		ret = nullb_create_dev(i);
+		if (ret)
 			goto err_dev;
-		}
 	}

 	pr_info("module loaded\n");
@@ -2169,9 +2273,7 @@ static int __init null_init(void)
 err_dev:
 	while (!list_empty(&nullb_list)) {
 		nullb = list_entry(nullb_list.next, struct nullb, list);
-		dev = nullb->dev;
-		null_del_dev(nullb);
-		null_free_dev(dev);
+		nullb_destroy_dev(nullb);
 	}
 	unregister_blkdev(null_major, "nullb");
 err_conf:
diff --git a/drivers/block/null_blk/null_blk.h
b/drivers/block/null_blk/null_blk.h
index 78eb56b0ca55..ecdc22e74d35 100644
--- a/drivers/block/null_blk/null_blk.h
+++ b/drivers/block/null_blk/null_blk.h
@@ -61,6 +61,7 @@ struct nullb_zone {
 struct nullb_device {
 	struct nullb *nullb;
 	struct config_item item;
+	const char *config_path;
 	struct radix_tree_root data; /* data stored in the disk */
 	struct radix_tree_root cache; /* disk cache data */
 	unsigned long flags; /* device flags */



-- 
Damien Le Moal
Western Digital Research



[Index of Archives]     [Linux RAID]     [Linux SCSI]     [Linux ATA RAID]     [IDE]     [Linux Wireless]     [Linux Kernel]     [ATH6KL]     [Linux Bluetooth]     [Linux Netdev]     [Kernel Newbies]     [Security]     [Git]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Device Mapper]

  Powered by Linux