Re: [PATCH] mm: Free per cpu pages async to shorten program exit time

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 08.10.21 08:39, ultrachin@xxxxxxx wrote:
From: chen xiaoguang <xiaoggchen@xxxxxxxxxxx>

The exit time is long when program allocated big memory and
the most time consuming part is free memory which takes 99.9%
of the total exit time. By using async free we can save 25% of
exit time.

Signed-off-by: chen xiaoguang <xiaoggchen@xxxxxxxxxxx>
Signed-off-by: zeng jingxiang <linuszeng@xxxxxxxxxxx>
Signed-off-by: lu yihui <yihuilu@xxxxxxxxxxx>

I recently discussed with Claudio if it would be possible to tear down the process MM deferred, because for some use cases (secure/encrypted virtualization, very large mmaps) tearing down the page tables is already the much more expensive operation.

There is mmdrop_async(), and I wondered if one could reuse that concept when tearing down a process -- I didn't look into feasibility, however, so it's just some very rough idea.

---
  include/linux/mm.h |  1 +
  kernel/exit.c      |  2 ++
  mm/page_alloc.c    | 89 +++++++++++++++++++++++++++++++++++++++++++---
  3 files changed, 87 insertions(+), 5 deletions(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 73a52aba448f..2add3b635eee 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -908,6 +908,7 @@ void put_pages_list(struct list_head *pages);
void split_page(struct page *page, unsigned int order);
  void copy_huge_page(struct page *dst, struct page *src);
+void kfreepcp_set_run(unsigned int cpu);
/*
   * Compound pages have a destructor function.  Provide a
diff --git a/kernel/exit.c b/kernel/exit.c
index 91a43e57a32e..269eb81acbe9 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -167,10 +167,12 @@ static void __exit_signal(struct task_struct *tsk)
  static void delayed_put_task_struct(struct rcu_head *rhp)
  {
  	struct task_struct *tsk = container_of(rhp, struct task_struct, rcu);
+	unsigned int cpu = tsk->cpu;
perf_event_delayed_put(tsk);
  	trace_sched_process_free(tsk);
  	put_task_struct(tsk);
+	kfreepcp_set_run(cpu);
  }
void put_task_struct_rcu_user(struct task_struct *task)
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index b37435c274cf..8a748ea9156b 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -72,6 +72,7 @@
  #include <linux/padata.h>
  #include <linux/khugepaged.h>
  #include <linux/buffer_head.h>
+#include <linux/smpboot.h>
  #include <asm/sections.h>
  #include <asm/tlbflush.h>
  #include <asm/div64.h>
@@ -147,6 +148,12 @@ DEFINE_PER_CPU(int, _numa_mem_);		/* Kernel "local memory" node */
  EXPORT_PER_CPU_SYMBOL(_numa_mem_);
  #endif
+struct freepcp_stat {
+	struct task_struct *thread;
+	bool should_run;
+};
+DEFINE_PER_CPU(struct freepcp_stat, kfreepcp);
+
  /* work_structs for global per-cpu drains */
  struct pcpu_drain {
  	struct zone *zone;
@@ -3361,6 +3368,81 @@ static int nr_pcp_high(struct per_cpu_pages *pcp, struct zone *zone)
  	return min(READ_ONCE(pcp->batch) << 2, high);
  }
+void kfreepcp_set_run(unsigned int cpu)
+{
+	struct task_struct *tsk;
+	struct freepcp_stat *stat = this_cpu_ptr(&kfreepcp);
+
+	tsk = stat->thread;
+	per_cpu(kfreepcp.should_run, cpu) = true;
+
+	if (tsk && !task_is_running(tsk))
+		wake_up_process(tsk);
+}
+EXPORT_SYMBOL_GPL(kfreepcp_set_run);
+
+static int kfreepcp_should_run(unsigned int cpu)
+{
+	struct freepcp_stat *stat = this_cpu_ptr(&kfreepcp);
+
+	return stat->should_run;
+}
+
+static void run_kfreepcp(unsigned int cpu)
+{
+	struct zone *zone;
+	struct per_cpu_pages *pcp;
+	unsigned long flags;
+	struct freepcp_stat *stat = this_cpu_ptr(&kfreepcp);
+	bool need_free_more = false;
+
+
+
+again:
+	need_free_more = false;
+	for_each_populated_zone(zone) {
+		pcp = per_cpu_ptr(zone->per_cpu_pageset, cpu);
+		if (pcp->count && pcp->high && pcp->count > pcp->high) {
+			unsigned long batch = READ_ONCE(pcp->batch);
+			int high;
+
+			high = nr_pcp_high(pcp, zone);
+			local_irq_save(flags);
+			free_pcppages_bulk(zone, nr_pcp_free(pcp, high, batch),
+					pcp);
+			local_irq_restore(flags);
+			if (pcp->count > pcp->high)
+				need_free_more = true;
+		}
+
+		cond_resched();
+	}
+	if (need_free_more)
+		goto again;
+
+	stat->should_run = false;
+}
+
+static struct smp_hotplug_thread freepcp_threads = {
+	.store                  = &kfreepcp.thread,
+	.thread_should_run      = kfreepcp_should_run,
+	.thread_fn              = run_kfreepcp,
+	.thread_comm            = "kfreepcp/%u",
+};
+
+static int __init freepcp_init(void)
+{
+	int cpu;
+
+	for_each_possible_cpu(cpu)
+		per_cpu(kfreepcp.should_run, cpu) = false;
+
+	BUG_ON(smpboot_register_percpu_thread(&freepcp_threads));
+
+	return 0;
+}
+late_initcall(freepcp_init);
+
  static void free_unref_page_commit(struct page *page, unsigned long pfn,
  				   int migratetype, unsigned int order)
  {
@@ -3375,11 +3457,8 @@ static void free_unref_page_commit(struct page *page, unsigned long pfn,
  	list_add(&page->lru, &pcp->lists[pindex]);
  	pcp->count += 1 << order;
  	high = nr_pcp_high(pcp, zone);
-	if (pcp->count >= high) {
-		int batch = READ_ONCE(pcp->batch);
-
-		free_pcppages_bulk(zone, nr_pcp_free(pcp, high, batch), pcp);
-	}
+	if (pcp->count >= high)
+		this_cpu_ptr(&kfreepcp)->should_run = false;
  }
/*



--
Thanks,

David / dhildenb





[Index of Archives]     [Linux ARM Kernel]     [Linux ARM]     [Linux Omap]     [Fedora ARM]     [IETF Annouce]     [Bugtraq]     [Linux OMAP]     [Linux MIPS]     [eCos]     [Asterisk Internet PBX]     [Linux API]

  Powered by Linux