Re: [PATCH v2] dma-buf: Move sysfs work out of DMA-BUF export path

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Am 16.05.22 um 19:13 schrieb T.J. Mercier:
Recently, we noticed an issue where a process went into direct reclaim
while holding the kernfs rw semaphore for sysfs in write (exclusive)
mode. This caused processes who were doing DMA-BUF exports and releases
to go into uninterruptible sleep since they needed to acquire the same
semaphore for the DMA-BUF sysfs entry creation/deletion. In order to avoid
blocking DMA-BUF export for an indeterminate amount of time while
another process is holding the sysfs rw semaphore in exclusive mode,
this patch moves the per-buffer sysfs file creation to the default work
queue. Note that this can lead to a short-term inaccuracy in the dmabuf
sysfs statistics, but this is a tradeoff to prevent the hot path from
being blocked. A work_struct is added to dma_buf to achieve this, but as
it is unioned with the kobject in the sysfs_entry, dma_buf does not
increase in size.

I'm still not very keen of this approach as it strongly feels like we are working around shortcoming somewhere else.

Fixes: bdb8d06dfefd ("dmabuf: Add the capability to expose DMA-BUF stats in sysfs")
Originally-by: Hridya Valsaraju <hridya@xxxxxxxxxx>
Signed-off-by: T.J. Mercier <tjmercier@xxxxxxxxxx>

---
See the originally submitted patch by Hridya Valsaraju here:
https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flkml.org%2Flkml%2F2022%2F1%2F4%2F1066&amp;data=05%7C01%7Cchristian.koenig%40amd.com%7C5575fa6126d74ca4315408da375f618d%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637883180063393649%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C3000%7C%7C%7C&amp;sdata=1PcZaUfsLhQZOW29yGUDxazzcyNoBrN2NjeN1Yb40hk%3D&amp;reserved=0

v2 changes:
- Defer only sysfs creation instead of creation and teardown per
Christian König

- Use a work queue instead of a kthread for deferred work per
Christian König
---
  drivers/dma-buf/dma-buf-sysfs-stats.c | 56 ++++++++++++++++++++-------
  include/linux/dma-buf.h               | 14 ++++++-
  2 files changed, 54 insertions(+), 16 deletions(-)

diff --git a/drivers/dma-buf/dma-buf-sysfs-stats.c b/drivers/dma-buf/dma-buf-sysfs-stats.c
index 2bba0babcb62..67b0a298291c 100644
--- a/drivers/dma-buf/dma-buf-sysfs-stats.c
+++ b/drivers/dma-buf/dma-buf-sysfs-stats.c
@@ -11,6 +11,7 @@
  #include <linux/printk.h>
  #include <linux/slab.h>
  #include <linux/sysfs.h>
+#include <linux/workqueue.h>
#include "dma-buf-sysfs-stats.h" @@ -168,10 +169,46 @@ void dma_buf_uninit_sysfs_statistics(void)
  	kset_unregister(dma_buf_stats_kset);
  }
+static void sysfs_add_workfn(struct work_struct *work)
+{
+	struct dma_buf_sysfs_entry *sysfs_entry =
+		container_of(work, struct dma_buf_sysfs_entry, sysfs_add_work);
+	struct dma_buf *dmabuf = sysfs_entry->dmabuf;
+
+	/*
+	 * A dmabuf is ref-counted via its file member. If this handler holds the only
+	 * reference to the dmabuf, there is no need for sysfs kobject creation. This is an
+	 * optimization and a race; when the reference count drops to 1 immediately after
+	 * this check it is not harmful as the sysfs entry will still get cleaned up in
+	 * dma_buf_stats_teardown, which won't get called until the final dmabuf reference
+	 * is released, and that can't happen until the end of this function.
+	 */
+	if (file_count(dmabuf->file) > 1) {

Please completely drop that. I see absolutely no justification for this additional complexity.

+		/*
+		 * kobject_init_and_add expects kobject to be zero-filled, but we have populated it
+		 * (the sysfs_add_work union member) to trigger this work function.
+		 */
+		memset(&dmabuf->sysfs_entry->kobj, 0, sizeof(dmabuf->sysfs_entry->kobj));
+		dmabuf->sysfs_entry->kobj.kset = dma_buf_per_buffer_stats_kset;
+		if (kobject_init_and_add(&dmabuf->sysfs_entry->kobj, &dma_buf_ktype, NULL,
+						"%lu", file_inode(dmabuf->file)->i_ino)) {
+			kobject_put(&dmabuf->sysfs_entry->kobj);
+			dmabuf->sysfs_entry = NULL;
+		}
+	} else {
+		/*
+		 * Free the sysfs_entry and reset the pointer so dma_buf_stats_teardown doesn't
+		 * attempt to operate on it.
+		 */
+		kfree(dmabuf->sysfs_entry);
+		dmabuf->sysfs_entry = NULL;
+	}
+	dma_buf_put(dmabuf);
+}
+
  int dma_buf_stats_setup(struct dma_buf *dmabuf)
  {
  	struct dma_buf_sysfs_entry *sysfs_entry;
-	int ret;
if (!dmabuf || !dmabuf->file)
  		return -EINVAL;
@@ -181,25 +218,16 @@ int dma_buf_stats_setup(struct dma_buf *dmabuf)
  		return -EINVAL;
  	}
- sysfs_entry = kzalloc(sizeof(struct dma_buf_sysfs_entry), GFP_KERNEL);
+	sysfs_entry = kmalloc(sizeof(struct dma_buf_sysfs_entry), GFP_KERNEL);
  	if (!sysfs_entry)
  		return -ENOMEM;
- sysfs_entry->kobj.kset = dma_buf_per_buffer_stats_kset;
  	sysfs_entry->dmabuf = dmabuf;
-
  	dmabuf->sysfs_entry = sysfs_entry;
- /* create the directory for buffer stats */
-	ret = kobject_init_and_add(&sysfs_entry->kobj, &dma_buf_ktype, NULL,
-				   "%lu", file_inode(dmabuf->file)->i_ino);
-	if (ret)
-		goto err_sysfs_dmabuf;
+	INIT_WORK(&dmabuf->sysfs_entry->sysfs_add_work, sysfs_add_workfn);
+	get_dma_buf(dmabuf); /* This reference will be dropped in sysfs_add_workfn. */
+	schedule_work(&dmabuf->sysfs_entry->sysfs_add_work);
return 0;
-
-err_sysfs_dmabuf:
-	kobject_put(&sysfs_entry->kobj);
-	dmabuf->sysfs_entry = NULL;
-	return ret;
  }
diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h
index 2097760e8e95..0200caa3c515 100644
--- a/include/linux/dma-buf.h
+++ b/include/linux/dma-buf.h
@@ -22,6 +22,7 @@
  #include <linux/fs.h>
  #include <linux/dma-fence.h>
  #include <linux/wait.h>
+#include <linux/workqueue.h>
struct device;
  struct dma_buf;
@@ -365,7 +366,7 @@ struct dma_buf {
  	 */
  	const char *name;

-	/** @name_lock: Spinlock to protect name acces for read access. */
+	/** @name_lock: Spinlock to protect name access for read access. */
  	spinlock_t name_lock;
/**
@@ -441,6 +442,7 @@ struct dma_buf {
__poll_t active;
  	} cb_in, cb_out;
+

Those changes are unrelated.

Regards,
Christian.

  #ifdef CONFIG_DMABUF_SYSFS_STATS
  	/**
  	 * @sysfs_entry:
@@ -449,7 +451,15 @@ struct dma_buf {
  	 * `DMA-BUF statistics`_ for the uapi this enables.
  	 */
  	struct dma_buf_sysfs_entry {
-		struct kobject kobj;
+		union {
+			struct kobject kobj;
+
+			/** @sysfs_add_work:
+			 *
+			 * For deferred sysfs kobject creation using a workqueue.
+			 */
+			struct work_struct sysfs_add_work;
+		};
  		struct dma_buf *dmabuf;
  	} *sysfs_entry;
  #endif




[Index of Archives]     [Linux DRI Users]     [Linux Intel Graphics]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]     [XFree86]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Linux Kernel]     [Linux SCSI]     [XFree86]
  Powered by Linux