Re: [PATCH RFC] btrfs: sysfs: add <uuid>/debug/io_accounting/ directory

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 21/01/2022 13:24, Qu Wenruo wrote:
[BACKGROUND]
There is a series of btrfs autodefrag bugs in v5.16, that would cause
way more IO than previous kernels.

Unfortunately there isn't any test case covering autodefrag, nor there
is anyway to show the io accounting of a btrfs.

[WORKAROUND]
I originally want to implement a dm target to do the io accounting for
all filesystems, but can not find a good enough interface (the status
interface has only 3 pre-defined workload).

Thus I turned to btrfs specific io accounting first.
One thing specific to btrfs is its integrated volume management/RAID.

Without proper profiles specification, default profile will cause
metadata IO to be accounted twice (DUP profile) and only data IO is
accounted correctly.

So for btrfs this patch will introduce a new sysfs directory in
/sys/fs/btrfs/<uuid>/debug/io_accounting/

And have the following files:

- meta_read:	Metadata bytes read
- meta_write:	Metadata bytes written
- data_read:	Data bytes read
- data_write:	Data bytes written
		(including both zoned append and regular write)

And all these accounting is in logical address space, meaning profile
will not affect the values.

All those values can be reset by simply "echo 0".

Signed-off-by: Qu Wenruo <wqu@xxxxxxxx>
---
Reason for RFC:

- (To DM guys) Is there any good way to implement a dm target to do the IO
   accounting?
   A more generic one can help more filesystems.

- (To Btrfs guys) Is the sysfs interface fine?

 I am in for it. It can be a non debug feature IMO.
 More below.

---
  fs/btrfs/ctree.h   | 11 +++++++
  fs/btrfs/disk-io.c |  1 +
  fs/btrfs/sysfs.c   | 77 ++++++++++++++++++++++++++++++++++++++++++++++
  fs/btrfs/volumes.c | 24 +++++++++++++++
  4 files changed, 113 insertions(+)

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index b4a9b1c58d22..3983bceaef7f 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -1035,10 +1035,21 @@ struct btrfs_fs_info {
  #ifdef CONFIG_BTRFS_DEBUG
  	struct kobject *debug_kobj;
  	struct kobject *discard_debug_kobj;
+	struct kobject *io_accounting_debug_kobj;
  	struct list_head allocated_roots;
spinlock_t eb_leak_lock;
  	struct list_head allocated_ebs;
+
+	spinlock_t io_accounting_lock;
+	/*
+	 * The IO accounting unit are all in byte, and are in logical address
+	 * space, which is before the RAID/DUP mapping.
+	 */
+	u64 meta_read;
+	u64 meta_write;
+	u64 data_read;
+	u64 data_write;
  #endif
  };
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 87a5addbedf6..41b56fde6e97 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -3065,6 +3065,7 @@ void btrfs_init_fs_info(struct btrfs_fs_info *fs_info)
  	INIT_LIST_HEAD(&fs_info->allocated_roots);
  	INIT_LIST_HEAD(&fs_info->allocated_ebs);
  	spin_lock_init(&fs_info->eb_leak_lock);
+	spin_lock_init(&fs_info->io_accounting_lock);
  #endif
  	extent_map_tree_init(&fs_info->mapping_tree);
  	btrfs_init_block_rsv(&fs_info->global_block_rsv,
diff --git a/fs/btrfs/sysfs.c b/fs/btrfs/sysfs.c
index beb7f72d50b8..dfdef93bdeab 100644
--- a/fs/btrfs/sysfs.c
+++ b/fs/btrfs/sysfs.c
@@ -40,6 +40,7 @@
   * btrfs_debug_feature_attrs		/sys/fs/btrfs/debug
   * btrfs_debug_mount_attrs		/sys/fs/btrfs/<uuid>/debug
   * discard_debug_attrs			/sys/fs/btrfs/<uuid>/debug/discard
+ * io_accounting_debug_attrs		/sys/fs/btrfs/<uuid>/debug/io_accounting
   */
struct btrfs_feature_attr {
@@ -616,6 +617,62 @@ static const struct attribute_group btrfs_debug_feature_attr_group = {
  	.attrs = btrfs_debug_feature_attrs,
  };
+/* IO accounting */
+#define io_accounting_to_fs_info(_kobj)	to_fs_info((_kobj)->parent->parent)
+
+#define DECLARE_IO_ACCOUNTING_OPS(name)					\
+static ssize_t io_accounting_##name##_show(struct kobject *kobj,	\
+					   struct kobj_attribute *a,	\
+					   char *buf)			\
+{									\
+	struct btrfs_fs_info *fs_info = io_accounting_to_fs_info(kobj);	\
+	u64 result;							\
+									\
+	spin_lock(&fs_info->io_accounting_lock);			\
+	result = fs_info->name;						\
+	spin_unlock(&fs_info->io_accounting_lock);			\
+	return sysfs_emit(buf, "%llu\n", result);			\
+}									\
+static ssize_t io_accounting_##name##_store(struct kobject *kobj,	\
+					    struct kobj_attribute *a,	\
+					    const char *buf,		\
+					    size_t count) 		\
+{									\
+	struct btrfs_fs_info *fs_info = io_accounting_to_fs_info(kobj);	\
+	u64 value;							\
+	int ret;							\
+									\
+	ret = kstrtoull(skip_spaces(buf), 0, &value);			\
+	if (ret)							\
+		return ret;						\
+	spin_lock(&fs_info->io_accounting_lock);			\
+	fs_info->name = value;						\
+	spin_unlock(&fs_info->io_accounting_lock);			\
+	return count;							\
+}
+
+DECLARE_IO_ACCOUNTING_OPS(meta_read);
+DECLARE_IO_ACCOUNTING_OPS(meta_write);
+DECLARE_IO_ACCOUNTING_OPS(data_read);
+DECLARE_IO_ACCOUNTING_OPS(data_write);
+
+BTRFS_ATTR_RW(io_accounting, meta_read, io_accounting_meta_read_show,
+	      io_accounting_meta_read_store);
+BTRFS_ATTR_RW(io_accounting, meta_write, io_accounting_meta_write_show,
+	      io_accounting_meta_write_store);
+BTRFS_ATTR_RW(io_accounting, data_read, io_accounting_data_read_show,
+	      io_accounting_data_read_store);
+BTRFS_ATTR_RW(io_accounting, data_write, io_accounting_data_write_show,
+	      io_accounting_data_write_store);
+
+static const struct attribute *io_accounting_debug_attrs[] = {
+	BTRFS_ATTR_PTR(io_accounting, meta_read),
+	BTRFS_ATTR_PTR(io_accounting, meta_write),
+	BTRFS_ATTR_PTR(io_accounting, data_read),
+	BTRFS_ATTR_PTR(io_accounting, data_write),
+	NULL,
+};
+
  #endif
static ssize_t btrfs_show_u64(u64 *value_ptr, spinlock_t *lock, char *buf)
@@ -1219,6 +1276,12 @@ void btrfs_sysfs_remove_mounted(struct btrfs_fs_info *fs_info)
  		kobject_del(fs_info->discard_debug_kobj);
  		kobject_put(fs_info->discard_debug_kobj);
  	}
+	if (fs_info->io_accounting_debug_kobj) {
+		sysfs_remove_files(fs_info->io_accounting_debug_kobj,
+				   io_accounting_debug_attrs);
+		kobject_del(fs_info->io_accounting_debug_kobj);
+		kobject_put(fs_info->io_accounting_debug_kobj);
+	}
  	if (fs_info->debug_kobj) {
  		sysfs_remove_files(fs_info->debug_kobj, btrfs_debug_mount_attrs);
  		kobject_del(fs_info->debug_kobj);
@@ -1804,6 +1867,20 @@ int btrfs_sysfs_add_mounted(struct btrfs_fs_info *fs_info)
  				   discard_debug_attrs);
  	if (error)
  		goto failure;
+
+	/* io_accounting directory */
+	fs_info->io_accounting_debug_kobj =
+		kobject_create_and_add("io_accounting",fs_info->debug_kobj);
+	if (!fs_info->io_accounting_debug_kobj) {
+		error = -ENOMEM;
+		goto failure;
+	}
+
+	error = sysfs_create_files(fs_info->io_accounting_debug_kobj,
+				   io_accounting_debug_attrs);
+	if (error)
+		goto failure;
+
  #endif
error = addrm_unknown_feature_attrs(fs_info, true);
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 2332e3026efa..58f2ec0a611a 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -6763,6 +6763,29 @@ static void bioc_error(struct btrfs_io_context *bioc, struct bio *bio, u64 logic
  	}
  }
+static void update_io_accounting(struct btrfs_fs_info *fs_info, struct bio *bio)
+{
+	u32 length = bio->bi_iter.bi_size;
+	bool metadata = bio->bi_opf & REQ_META;
+
+#ifdef	CONFIG_BTRFS_DEBUG
+	spin_lock(&fs_info->io_accounting_lock);
+	if (bio_op(bio) == REQ_OP_READ) {
+		if (metadata)
+			fs_info->meta_read += length;
+		else
+			fs_info->data_read += length;
+	} else {
+		if (metadata)
+			fs_info->meta_write += length;
+		else
+			fs_info->data_write += length;
+	}
+	spin_unlock(&fs_info->io_accounting_lock);
+#endif
+	return;
+}
+
  blk_status_t btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
  			   int mirror_num)
  {
@@ -6776,6 +6799,7 @@ blk_status_t btrfs_map_bio(struct btrfs_fs_info *fs_info, struct bio *bio,
>
  	int total_devs;
  	struct btrfs_io_context *bioc = NULL;
+ update_io_accounting(fs_info, bio);

 Generally, speaking accounting should go at the end of IO.
 Other than that looks good.

 Also, need a way to reset the values.

Thanks, Anand

  	length = bio->bi_iter.bi_size;
  	map_length = length;

--
dm-devel mailing list
dm-devel@xxxxxxxxxx
https://listman.redhat.com/mailman/listinfo/dm-devel




[Index of Archives]     [DM Crypt]     [Fedora Desktop]     [ATA RAID]     [Fedora Marketing]     [Fedora Packaging]     [Fedora SELinux]     [Yosemite Discussion]     [KDE Users]     [Fedora Docs]

  Powered by Linux