[PATCH 01/20] block, blk_filter: enable block device filters

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Allows to attach block device filters to the block devices.
Kernel modules can use this functionality to extend the
capabilities of the block layer.

Signed-off-by: Sergei Shtepa <sergei.shtepa@xxxxxxxxx>
---
 block/Kconfig             |   8 +++
 block/bdev.c              | 129 ++++++++++++++++++++++++++++++++++++++
 block/blk-core.c          |  88 ++++++++++++++++++++++++++
 include/linux/blk_types.h |  22 +++++++
 include/linux/blkdev.h    |  81 ++++++++++++++++++++++++
 5 files changed, 328 insertions(+)

diff --git a/block/Kconfig b/block/Kconfig
index 50b17e260fa2..256483e00224 100644
--- a/block/Kconfig
+++ b/block/Kconfig
@@ -225,6 +225,14 @@ config BLK_MQ_RDMA
 config BLK_PM
 	def_bool PM
 
+config BLK_FILTER
+	bool "Enable block device filters"
+	default n
+	help
+	  Enabling this lets the block layer filters handle bio requests.
+	  Kernel modules can use this feature to extend the functionality
+	  of the block layer.
+
 # do not use in new code
 config BLOCK_HOLDER_DEPRECATED
 	bool
diff --git a/block/bdev.c b/block/bdev.c
index 5fe06c1f2def..4bcd9f4378e3 100644
--- a/block/bdev.c
+++ b/block/bdev.c
@@ -426,8 +426,15 @@ static void init_once(void *data)
 	inode_init_once(&ei->vfs_inode);
 }
 
+#ifdef CONFIG_BLK_FILTER
+static void bdev_filter_cleanup(struct block_device *bdev);
+#endif
+
 static void bdev_evict_inode(struct inode *inode)
 {
+#ifdef CONFIG_BLK_FILTER
+	bdev_filter_cleanup(I_BDEV(inode));
+#endif
 	truncate_inode_pages_final(&inode->i_data);
 	invalidate_inode_buffers(inode); /* is it needed here? */
 	clear_inode(inode);
@@ -503,6 +510,11 @@ struct block_device *bdev_alloc(struct gendisk *disk, u8 partno)
 		return NULL;
 	}
 	bdev->bd_disk = disk;
+
+#ifdef CONFIG_BLK_FILTER
+	memset(bdev->bd_filters, 0, sizeof(bdev->bd_filters));
+	spin_lock_init(&bdev->bd_filters_lock);
+#endif
 	return bdev;
 }
 
@@ -1071,3 +1083,120 @@ void sync_bdevs(bool wait)
 	spin_unlock(&blockdev_superblock->s_inode_list_lock);
 	iput(old_inode);
 }
+
+#ifdef CONFIG_BLK_FILTER
+static void bdev_filter_cleanup(struct block_device *bdev)
+{
+	int altitude;
+	struct bdev_filter *flt;
+
+	for (altitude = 0; altitude < bdev_filter_alt_end; altitude++) {
+		spin_lock(&bdev->bd_filters_lock);
+		flt = bdev->bd_filters[altitude];
+		bdev->bd_filters[altitude] = NULL;
+		spin_unlock(&bdev->bd_filters_lock);
+
+		bdev_filter_put(flt);
+	}
+}
+
+/**
+ * bdev_filter_attach - Attach a filter to the original block device.
+ * @bdev:
+ *	Block device.
+ * @name:
+ *	Name of the block device filter.
+ * @altitude:
+ *	Altituda number of the block device filter.
+ * @flt:
+ *	Pointer to the filter structure.
+ *
+ * Before adding a filter, it is necessary to initialize &struct bdev_filter.
+ *
+ * The bdev_filter_detach() function allows to detach the filter from the block
+ * device.
+ *
+ * Return:
+ * 0 - OK
+ * -EALREADY - a filter with this name already exists
+ */
+int bdev_filter_attach(struct block_device *bdev, const char *name,
+		       const enum bdev_filter_altitudes altitude,
+		       struct bdev_filter *flt)
+{
+	int ret = 0;
+
+	spin_lock(&bdev->bd_filters_lock);
+	if (bdev->bd_filters[altitude])
+		ret = -EALREADY;
+	else
+		bdev->bd_filters[altitude] = flt;
+	spin_unlock(&bdev->bd_filters_lock);
+
+	if (!ret)
+		pr_info("block device filter '%s' has been attached to %d:%d",
+			name, MAJOR(bdev->bd_dev), MINOR(bdev->bd_dev));
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(bdev_filter_attach);
+
+/**
+ * bdev_filter_detach - Detach a filter from the block device.
+ * @bdev:
+ *	Block device.
+ * @name:
+ *	Name of the block device filter.
+ * @altitude:
+ *	Altituda number of the block device filter.
+ *
+ * The filter should be added using the bdev_filter_attach() function.
+ *
+ * Return:
+ * 0 - OK
+ * -ENOENT - the filter was not found in the linked list
+ */
+int bdev_filter_detach(struct block_device *bdev, const char *name,
+		       const enum bdev_filter_altitudes altitude)
+{
+	struct bdev_filter *flt = NULL;
+
+	spin_lock(&bdev->bd_filters_lock);
+	flt = bdev->bd_filters[altitude];
+	bdev->bd_filters[altitude] = NULL;
+	spin_unlock(&bdev->bd_filters_lock);
+
+	if (!flt)
+		return -ENOENT;
+
+	bdev_filter_put(flt);
+	pr_info("block device filter '%s' has been detached from %d:%d",
+		name, MAJOR(bdev->bd_dev), MINOR(bdev->bd_dev));
+	return 0;
+}
+EXPORT_SYMBOL_GPL(bdev_filter_detach);
+
+/**
+ * bdev_filter_get_by_altitude - Get filter by altitude.
+ * @bdev:
+ *	Pointer to the block device structure.
+ *
+ * Return:
+ * pointer - pointer to filters structure from &struct blk_filter
+ * NULL - no filter has been set
+ */
+struct bdev_filter *bdev_filter_get_by_altitude(struct block_device *bdev,
+				const enum bdev_filter_altitudes altitude)
+{
+	struct bdev_filter *flt;
+
+	spin_lock(&bdev->bd_filters_lock);
+	flt = bdev->bd_filters[altitude];
+	if (flt)
+		bdev_filter_get(flt);
+	spin_unlock(&bdev->bd_filters_lock);
+
+	return flt;
+}
+EXPORT_SYMBOL_GPL(bdev_filter_get_by_altitude);
+#endif
diff --git a/block/blk-core.c b/block/blk-core.c
index 06ff5bbfe8f6..a44906fb08aa 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -757,6 +757,86 @@ void submit_bio_noacct_nocheck(struct bio *bio)
 		__submit_bio_noacct(bio);
 }
 
+#ifdef CONFIG_BLK_FILTER
+
+/**
+ * __filter_bio() - Process bio by the block device filter.
+ * @flt:
+ *	Block device filter.
+ * @bio:
+ *	Original I/O unit.
+ *
+ * Return:
+ * bdev_filter_pass - original bio should be submitted
+ * bdev_filter_skip - do not submit original bio
+ * bdev_filter_redirect - repeat bio processing for another block device
+ */
+static inline enum bdev_filter_result __filter_bio(struct bdev_filter *flt,
+						   struct bio *bio)
+{
+	enum bdev_filter_result result;
+	struct bio *new_bio;
+	struct bio_list bio_list[2] = { };
+
+	do {
+		bio_list_init(&bio_list[0]);
+		current->bio_list = bio_list;
+
+		result = flt->fops->submit_bio_cb(bio, flt);
+
+		current->bio_list = NULL;
+
+		while ((new_bio = bio_list_pop(&bio_list[0]))) {
+			bio_set_flag(new_bio, BIO_FILTERED);
+			submit_bio_noacct(new_bio);
+		};
+	} while (result == bdev_filter_repeat);
+
+	return result;
+}
+
+/**
+ * filter_bio() - Pass bio to the block device filters.
+ * @bio:
+ *	Original I/O unit.
+ *
+ * Return:
+ * true - original bio should be submitted
+ * false - do not submit original bio
+ */
+static bool filter_bio(struct bio *bio)
+{
+	enum bdev_filter_result result = bdev_filter_pass;
+
+	if (bio_flagged(bio, BIO_FILTERED))
+		return true;
+	do {
+		struct block_device *bdev = bio->bi_bdev;
+		unsigned int altitude = 0;
+
+		while (altitude < bdev_filter_alt_end) {
+			struct bdev_filter *flt;
+
+			spin_lock(&bdev->bd_filters_lock);
+			flt = bdev->bd_filters[altitude];
+			if (flt)
+				bdev_filter_get(flt);
+			spin_unlock(&bdev->bd_filters_lock);
+
+			if (flt) {
+				result = __filter_bio(flt, bio);
+				bdev_filter_put(flt);
+				if (result != bdev_filter_pass)
+					break;
+			}
+			altitude++;
+		}
+	} while (result == bdev_filter_redirect);
+
+	return (result == bdev_filter_pass);
+}
+#endif
+
 /**
  * submit_bio_noacct - re-submit a bio to the block device layer for I/O
  * @bio:  The bio describing the location in memory and on the device.
@@ -790,6 +870,14 @@ void submit_bio_noacct(struct bio *bio)
 		goto end_io;
 	if (unlikely(bio_check_ro(bio)))
 		goto end_io;
+#ifdef CONFIG_BLK_FILTER
+	/*
+	 * It looks like should_fail_bio() and bio_check_ro() can be placed
+	 * in a separate block device filter for debugging.
+	 */
+	if (!filter_bio(bio))
+		goto end_io;
+#endif
 	if (!bio_flagged(bio, BIO_REMAPPED)) {
 		if (unlikely(bio_check_eod(bio)))
 			goto end_io;
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index a24d4078fb21..b88f506ea59e 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -37,6 +37,23 @@ struct bio_crypt_ctx;
 #define PAGE_SECTORS		(1 << PAGE_SECTORS_SHIFT)
 #define SECTOR_MASK		(PAGE_SECTORS - 1)
 
+#ifdef CONFIG_BLK_FILTER
+/**
+ * enum bdev_filter_altitudes - Set of reserved altitudes for block device
+ *	filters.
+ *
+ * @bdev_filter_alt_blksnap:
+ *	An altitude for the blksnap module.
+ * @bdev_filter_alt_end:
+ *	Indicates the end of the altitude set.
+ */
+enum bdev_filter_altitudes {
+	bdev_filter_alt_blksnap = 0,
+	bdev_filter_alt_end
+};
+struct bdev_filter;
+#endif
+
 struct block_device {
 	sector_t		bd_start_sect;
 	sector_t		bd_nr_sectors;
@@ -68,6 +85,10 @@ struct block_device {
 #ifdef CONFIG_FAIL_MAKE_REQUEST
 	bool			bd_make_it_fail;
 #endif
+#ifdef CONFIG_BLK_FILTER
+	struct bdev_filter	*bd_filters[bdev_filter_alt_end];
+	spinlock_t		bd_filters_lock;
+#endif
 } __randomize_layout;
 
 #define bdev_whole(_bdev) \
@@ -332,6 +353,7 @@ enum {
 	BIO_QOS_MERGED,		/* but went through rq_qos merge path */
 	BIO_REMAPPED,
 	BIO_ZONE_WRITE_LOCKED,	/* Owns a zoned device zone write lock */
+	BIO_FILTERED,		/* bio has already been filtered */
 	BIO_FLAG_LAST
 };
 
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 608d577734c2..24cb5293897f 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1573,4 +1573,85 @@ struct io_comp_batch {
 
 #define DEFINE_IO_COMP_BATCH(name)	struct io_comp_batch name = { }
 
+#ifdef CONFIG_BLK_FILTER
+/**
+ * enum bdev_filter_result - The result of bio processing by
+ *	the block device filter.
+ *
+ * @bdev_filter_skip:
+ *	Original bio does not need to be submitted.
+ * @bdev_filter_pass:
+ *	It is necessary to submit the original request.
+ * @bdev_filter_repeat:
+ *	Bio processing has not been completed, a second call is required.
+ * @bdev_filter_redirect:
+ *	Original bio was redirected to another block device. The set
+ *	of filters on it is different, so processing must be repeated.
+ */
+enum bdev_filter_result {
+	bdev_filter_skip = 0,
+	bdev_filter_pass,
+	bdev_filter_repeat,
+	bdev_filter_redirect
+};
+struct bdev_filter;
+/**
+ * bdev_filter_operations - List of callback functions for the filter.
+ *
+ * @submit_bio_cb:
+ *	A callback function for bio processing.
+ * @detach_cb:
+ *	A callback function to disable the filter when removing a block
+ *	device from the system.
+ */
+struct bdev_filter_operations {
+	enum bdev_filter_result (*submit_bio_cb)(struct bio *bio,
+						 struct bdev_filter *flt);
+	void (*detach_cb)(struct kref *kref);
+};
+/**
+ * struct bdev_filter - Block device filter.
+ *
+ * @kref:
+ *	Kernel reference counter.
+ * @fops:
+ *	The pointer to &struct bdev_filter_operations with callback
+ *	functions for the filter.
+ */
+struct bdev_filter {
+	struct kref kref;
+	const struct bdev_filter_operations *fops;
+};
+/**
+ * bdev_filter_init - Initialization of the filter structure.
+ * @flt:
+ *	Pointer to the &struct bdev_filter to be initialized.
+ * @fops:
+ *	The callback functions for the filter.
+ */
+static inline void bdev_filter_init(struct bdev_filter *flt,
+		const struct bdev_filter_operations *fops)
+{
+	kref_init(&flt->kref);
+	flt->fops = fops;
+};
+int bdev_filter_attach(struct block_device *bdev, const char *name,
+		       const enum bdev_filter_altitudes altitude,
+		       struct bdev_filter *flt);
+int bdev_filter_detach(struct block_device *bdev, const char *name,
+		       const enum bdev_filter_altitudes altitude);
+struct bdev_filter *bdev_filter_get_by_altitude(struct block_device *bdev,
+		       const enum bdev_filter_altitudes altitude);
+static inline void bdev_filter_get(struct bdev_filter *flt)
+{
+	kref_get(&flt->kref);
+}
+static inline void bdev_filter_put(struct bdev_filter *flt)
+{
+	if (flt)
+		kref_put(&flt->kref, flt->fops->detach_cb);
+};
+
+#endif
+
 #endif /* _LINUX_BLKDEV_H */
-- 
2.20.1




[Index of Archives]     [Linux RAID]     [Linux SCSI]     [Linux ATA RAID]     [IDE]     [Linux Wireless]     [Linux Kernel]     [ATH6KL]     [Linux Bluetooth]     [Linux Netdev]     [Kernel Newbies]     [Security]     [Git]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Device Mapper]

  Powered by Linux