Re: [PATCH] mm: memcontrol: remove page_memcg()

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 





On 2024/5/22 3:29, Shakeel Butt wrote:
On Tue, May 21, 2024 at 03:44:21PM +0100, Matthew Wilcox wrote:
On Tue, May 21, 2024 at 09:15:56PM +0800, Kefeng Wang wrote:
The page_memcg() only called by mod_memcg_page_state(), so squash it to
cleanup page_memcg().

This isn't wrong, except that the entire usage of memcg is wrong in the
only two callers of mod_memcg_page_state():

$ git grep mod_memcg_page_state
include/linux/memcontrol.h:static inline void mod_memcg_page_state(struct page *page,
include/linux/memcontrol.h:static inline void mod_memcg_page_state(struct page *page,
mm/vmalloc.c:           mod_memcg_page_state(page, MEMCG_VMALLOC, -1);
mm/vmalloc.c:                   mod_memcg_page_state(area->pages[i], MEMCG_VMALLOC, 1);

The memcg should not be attached to the individual pages that make up a
vmalloc allocation.  Rather, it should be managed by the vmalloc
allocation itself.  I don't have the knowledge to poke around inside
vmalloc right now, but maybe somebody else could take that on.

Are you concerned about accessing just memcg or any field of the
sub-page? There are drivers accessing fields of pages allocated through
vmalloc. Some details at 3b8000ae185c ("mm/vmalloc: huge vmalloc backing
pages should be split rather than compound").

Maybe Matthew want something shown below, move the memcg MEMCG_VMALLOC stat update from per-page to per-vmalloc-allocation? It should be speed up the statistic after conversion.

diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
index e4a631ec430b..89f115623124 100644
--- a/include/linux/vmalloc.h
+++ b/include/linux/vmalloc.h
@@ -55,6 +55,9 @@ struct vm_struct {
 	unsigned long		size;
 	unsigned long		flags;
 	struct page		**pages;
+#ifdef CONFIG_MEMCG_KMEM
+	struct obj_cgroup	*objcg;
+#endif
 #ifdef CONFIG_HAVE_ARCH_HUGE_VMALLOC
 	unsigned int		page_order;
 #endif
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 5d3aa2dc88a8..3e28c382f604 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -3001,6 +3001,49 @@ static inline void set_vm_area_page_order(struct vm_struct *vm, unsigned int ord
 #endif
 }

+#ifdef CONFIG_MEMCG_KMEM
+static void vmalloc_memcg_alloc_hook(struct vm_struct *area, gfp_t gfp,
+				     int nr_pages)
+{
+	struct obj_cgroup *objcg;
+
+	if (!memcg_kmem_online() || !(gfp & __GFP_ACCOUNT))
+		return;
+
+	objcg = get_obj_cgroup_from_current();
+	if (objcg)
+		return;
+
+	area->objcg = objcg;
+
+	rcu_read_lock();
+	mod_memcg_state(obj_cgroup_memcg(objcg), MEMCG_VMALLOC, nr_pages);
+	rcu_read_unlock();
+}
+
+static void vmalloc_memcg_free_hook(struct vm_struct *area)
+{
+	struct obj_cgroup *objcg = area->objcg;
+
+	if (!objcg)
+		return;
+
+	rcu_read_lock();
+	mod_memcg_state(obj_cgroup_memcg(objcg), MEMCG_VMALLOC, -area->nr_pages);
+	rcu_read_unlock();
+
+	obj_cgroup_put(objcg);
+}
+#else
+static void vmalloc_memcg_alloc_hook(struct vm_struct *area, gfp_t gfp,
+				     int nr_pages)
+{
+}
+static void vmalloc_memcg_free_hook(struct vm_struct *area)
+{
+}
+#endif
+
 /**
  * vm_area_add_early - add vmap area early during boot
  * @vm: vm_struct to add
@@ -3338,7 +3381,6 @@ void vfree(const void *addr)
 		struct page *page = vm->pages[i];

 		BUG_ON(!page);
-		mod_memcg_page_state(page, MEMCG_VMALLOC, -1);
 		/*
 		 * High-order allocs for huge vmallocs are split, so
 		 * can be freed as an array of order-0 allocations
@@ -3347,6 +3389,7 @@ void vfree(const void *addr)
 		cond_resched();
 	}
 	atomic_long_sub(vm->nr_pages, &nr_vmalloc_pages);
+	vmalloc_memcg_free_hook(vm);
 	kvfree(vm->pages);
 	kfree(vm);
 }
@@ -3643,12 +3686,7 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
 		node, page_order, nr_small_pages, area->pages);

 	atomic_long_add(area->nr_pages, &nr_vmalloc_pages);
-	if (gfp_mask & __GFP_ACCOUNT) {
-		int i;
-
-		for (i = 0; i < area->nr_pages; i++)
-			mod_memcg_page_state(area->pages[i], MEMCG_VMALLOC, 1);
-	}
+	vmalloc_memcg_alloc_hook(area, gfp_mask, area->nr_pages);

 	/*
 	 * If not enough pages were obtained to accomplish an




[Index of Archives]     [Linux ARM Kernel]     [Linux ARM]     [Linux Omap]     [Fedora ARM]     [IETF Annouce]     [Security]     [Bugtraq]     [Linux OMAP]     [Linux MIPS]     [eCos]     [Asterisk Internet PBX]     [Linux API]     [Monitors]

  Powered by Linux