Re: [PATCH] dmatest: add CPU binding parameter

Dave Jiang <dave.jiang@xxxxxxxxx> · Mon, 22 Aug 2022 08:40:35 -0700

On 8/22/2022 4:48 AM, Alexander Fomichev wrote:
From: Alexander Fomichev <a.fomichev@xxxxxxxxx>

Introduce "on_cpu" module parameter for dmatest.
By default, its value is -1 which means no binding implies.
Positive values or zero cause the next "channel" assignment(s) to bind
channel's thread to certain CPU. Thus, it is possible to bind different
DMA channels' threads to different CPUs.

This is useful for the cases when cold cache (because of task migrating
between CPUs) significantly impacts DMA Engine performance. Such
situation was analyzed in [1].

[1] Scheduler: DMA Engine regression because of sched/fair changes
https://lore.kernel.org/all/20220128165058.zxyrnd7nzr4hlks2@xxxxxxxxx/

I don't think this would work for multi-socket systems given when it's 
on you bind all threads to a single CPU even if you may have channels on 
a different NUMA node. One possible way to do this is perhaps using 
on_cpu for Nth core for the NUMA node the channel is on? Just throwing 
out ideas.



Signed-off-by: Alexander Fomichev <a.fomichev@xxxxxxxxx>
---
  drivers/dma/dmatest.c | 23 +++++++++++++++++++++--
  1 file changed, 21 insertions(+), 2 deletions(-)

diff --git a/drivers/dma/dmatest.c b/drivers/dma/dmatest.c
index f696246f57fd..c91cbc9e5d1a 100644
--- a/drivers/dma/dmatest.c
+++ b/drivers/dma/dmatest.c
@@ -89,6 +89,10 @@ static bool polled;
  module_param(polled, bool, S_IRUGO | S_IWUSR);
  MODULE_PARM_DESC(polled, "Use polling for completion instead of interrupts");
  
+static int on_cpu = -1;
+module_param(on_cpu, int, 0644);
+MODULE_PARM_DESC(on_cpu, "Bind CPU to run threads on (default: auto scheduled (-1))");
+
  /**
   * struct dmatest_params - test parameters.
   * @buf_size:		size of the memcpy test buffer
@@ -237,6 +241,7 @@ struct dmatest_thread {
  struct dmatest_chan {
  	struct list_head	node;
  	struct dma_chan		*chan;
+	int					cpu;
  	struct list_head	threads;
  };
  
@@ -602,6 +607,7 @@ static int dmatest_func(void *data)
  	ret = -ENOMEM;
  
  	smp_rmb();
+


Stray blank line

  	thread->pending = false;
  	info = thread->info;
  	params = &info->params;
@@ -1010,6 +1016,7 @@ static int dmatest_add_channel(struct dmatest_info *info,
  	struct dmatest_chan	*dtc;
  	struct dma_device	*dma_dev = chan->device;
  	unsigned int		thread_count = 0;
+	char	cpu_str[20];
  	int cnt;
  
  	dtc = kmalloc(sizeof(struct dmatest_chan), GFP_KERNEL);
@@ -1018,6 +1025,13 @@ static int dmatest_add_channel(struct dmatest_info *info,
  		return -ENOMEM;
  	}
  
+	memset(cpu_str, 0, sizeof(cpu_str));
+	if (on_cpu >= nr_cpu_ids || on_cpu < -1)
+		on_cpu = -1;
+	dtc->cpu = on_cpu;
+	if (dtc->cpu != -1)
+		snprintf(cpu_str, sizeof(cpu_str) - 1, " on CPU #%d", dtc->cpu);
+
  	dtc->chan = chan;
  	INIT_LIST_HEAD(&dtc->threads);
  
@@ -1050,8 +1064,8 @@ static int dmatest_add_channel(struct dmatest_info *info,
  		thread_count += cnt > 0 ? cnt : 0;
  	}
  
-	pr_info("Added %u threads using %s\n",
-		thread_count, dma_chan_name(chan));
+	pr_info("Added %u threads using %s%s\n",
+		thread_count, dma_chan_name(chan), cpu_str);
  
  	list_add_tail(&dtc->node, &info->channels);
  	info->nr_channels++;
@@ -1125,6 +1139,11 @@ static void run_pending_tests(struct dmatest_info *info)
  
  		thread_count = 0;
  		list_for_each_entry(thread, &dtc->threads, node) {
+			if (dtc->cpu != -1) {
+				if (!thread->pending)
+					continue;
+				kthread_bind(thread->task, dtc->cpu);
+			}
  			wake_up_process(thread->task);
  			thread_count++;
  		}