On Fri, Jun 05, 2020 at 07:49:53PM +0000, Dennis Zhou wrote: > On Thu, May 28, 2020 at 04:25:05PM -0700, Roman Gushchin wrote: > > Percpu memory is becoming more and more widely used by various > > subsystems, and the total amount of memory controlled by the percpu > > allocator can make a good part of the total memory. > > > > As an example, bpf maps can consume a lot of percpu memory, > > and they are created by a user. Also, some cgroup internals > > (e.g. memory controller statistics) can be quite large. > > On a machine with many CPUs and big number of cgroups they > > can consume hundreds of megabytes. > > > > So the lack of memcg accounting is creating a breach in the memory > > isolation. Similar to the slab memory, percpu memory should be > > accounted by default. > > > > To implement the perpcu accounting it's possible to take the slab > > memory accounting as a model to follow. Let's introduce two types of > > percpu chunks: root and memcg. What makes memcg chunks different is > > an additional space allocated to store memcg membership information. > > If __GFP_ACCOUNT is passed on allocation, a memcg chunk should be be > > used. If it's possible to charge the corresponding size to the target > > memory cgroup, allocation is performed, and the memcg ownership data > > is recorded. System-wide allocations are performed using root chunks, > > so there is no additional memory overhead. > > > > To implement a fast reparenting of percpu memory on memcg removal, > > we don't store mem_cgroup pointers directly: instead we use obj_cgroup > > API, introduced for slab accounting. > > > > Signed-off-by: Roman Gushchin <guro@xxxxxx> > > --- > > mm/percpu-internal.h | 57 ++++++++++++- > > mm/percpu-km.c | 5 +- > > mm/percpu-stats.c | 36 +++++---- > > mm/percpu-vm.c | 5 +- > > mm/percpu.c | 186 ++++++++++++++++++++++++++++++++++++++----- > > 5 files changed, 248 insertions(+), 41 deletions(-) > > > > diff --git a/mm/percpu-internal.h b/mm/percpu-internal.h > > index 0468ba500bd4..0cf36337eb47 100644 > > --- a/mm/percpu-internal.h > > +++ b/mm/percpu-internal.h > > @@ -5,6 +5,27 @@ > > #include <linux/types.h> > > #include <linux/percpu.h> > > > > +/* > > + * There are two chunk types: root and memcg-aware. > > + * Chunks of each type have separate slots list. > > + * > > + * Memcg-aware chunks have an attached vector of obj_cgroup > > + * pointers, which is used to store memcg membership data > > + * of a percpu object. Obj_cgroups are ref-counted pointers > > + * to a memory cgroup with an ability to switch dynamically > > + * to the parent memory cgroup. This allows to reclaim a deleted > > + * memory cgroup without reclaiming of all outstanding objects, > > + * which do hold a reference at it. > > + */ > > nit: do you mind reflowing this to 80 characters and doing 2 spaces > after each period to keep the formatting uniform. > > > +enum pcpu_chunk_type { > > + PCPU_CHUNK_ROOT, > > +#ifdef CONFIG_MEMCG_KMEM > > + PCPU_CHUNK_MEMCG, > > +#endif > > + PCPU_NR_CHUNK_TYPES, > > + PCPU_FAIL_ALLOC = PCPU_NR_CHUNK_TYPES > > +}; > > + > > /* > > * pcpu_block_md is the metadata block struct. > > * Each chunk's bitmap is split into a number of full blocks. > > @@ -54,6 +75,9 @@ struct pcpu_chunk { > > int end_offset; /* additional area required to > > have the region end page > > aligned */ > > +#ifdef CONFIG_MEMCG_KMEM > > + struct obj_cgroup **obj_cgroups; /* vector of object cgroups */ > > +#endif > > > > int nr_pages; /* # of pages served by this chunk */ > > int nr_populated; /* # of populated pages */ > > @@ -63,7 +87,7 @@ struct pcpu_chunk { > > > > extern spinlock_t pcpu_lock; > > > > -extern struct list_head *pcpu_slot; > > +extern struct list_head *pcpu_chunk_lists; > > extern int pcpu_nr_slots; > > extern int pcpu_nr_empty_pop_pages; > > > > @@ -106,6 +130,37 @@ static inline int pcpu_chunk_map_bits(struct pcpu_chunk *chunk) > > return pcpu_nr_pages_to_map_bits(chunk->nr_pages); > > } > > > > +#ifdef CONFIG_MEMCG_KMEM > > +static enum pcpu_chunk_type pcpu_chunk_type(struct pcpu_chunk *chunk) > > +{ > > + if (chunk->obj_cgroups) > > + return PCPU_CHUNK_MEMCG; > > + return PCPU_CHUNK_ROOT; > > +} > > + > > +static bool pcpu_is_memcg_chunk(enum pcpu_chunk_type chunk_type) > > +{ > > + return chunk_type == PCPU_CHUNK_MEMCG; > > +} > > + > > +#else > > +static enum pcpu_chunk_type pcpu_chunk_type(struct pcpu_chunk *chunk) > > +{ > > + return PCPU_CHUNK_ROOT; > > +} > > + > > +static bool pcpu_is_memcg_chunk(enum pcpu_chunk_type chunk_type) > > +{ > > + return false; > > +} > > +#endif > > + > > +static struct list_head *pcpu_chunk_list(enum pcpu_chunk_type chunk_type) > > +{ > > + return &pcpu_chunk_lists[pcpu_nr_slots * > > + pcpu_is_memcg_chunk(chunk_type)]; > > +} > > + > > #ifdef CONFIG_PERCPU_STATS > > > > #include <linux/spinlock.h> > > diff --git a/mm/percpu-km.c b/mm/percpu-km.c > > index 20d2b69a13b0..35c9941077ee 100644 > > --- a/mm/percpu-km.c > > +++ b/mm/percpu-km.c > > @@ -44,7 +44,8 @@ static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk, > > /* nada */ > > } > > > > -static struct pcpu_chunk *pcpu_create_chunk(gfp_t gfp) > > +static struct pcpu_chunk *pcpu_create_chunk(enum pcpu_chunk_type type, > > + gfp_t gfp) > > { > > const int nr_pages = pcpu_group_sizes[0] >> PAGE_SHIFT; > > struct pcpu_chunk *chunk; > > @@ -52,7 +53,7 @@ static struct pcpu_chunk *pcpu_create_chunk(gfp_t gfp) > > unsigned long flags; > > int i; > > > > - chunk = pcpu_alloc_chunk(gfp); > > + chunk = pcpu_alloc_chunk(type, gfp); > > if (!chunk) > > return NULL; > > > > diff --git a/mm/percpu-stats.c b/mm/percpu-stats.c > > index 32558063c3f9..c8400a2adbc2 100644 > > --- a/mm/percpu-stats.c > > +++ b/mm/percpu-stats.c > > @@ -34,11 +34,15 @@ static int find_max_nr_alloc(void) > > { > > struct pcpu_chunk *chunk; > > int slot, max_nr_alloc; > > + enum pcpu_chunk_type type; > > > > max_nr_alloc = 0; > > - for (slot = 0; slot < pcpu_nr_slots; slot++) > > - list_for_each_entry(chunk, &pcpu_slot[slot], list) > > - max_nr_alloc = max(max_nr_alloc, chunk->nr_alloc); > > + for (type = 0; type < PCPU_NR_CHUNK_TYPES; type++) > > + for (slot = 0; slot < pcpu_nr_slots; slot++) > > + list_for_each_entry(chunk, &pcpu_chunk_list(type)[slot], > > + list) > > + max_nr_alloc = max(max_nr_alloc, > > + chunk->nr_alloc); > > > > return max_nr_alloc; > > } > > @@ -129,6 +133,9 @@ static void chunk_map_stats(struct seq_file *m, struct pcpu_chunk *chunk, > > P("cur_min_alloc", cur_min_alloc); > > P("cur_med_alloc", cur_med_alloc); > > P("cur_max_alloc", cur_max_alloc); > > +#ifdef CONFIG_MEMCG_KMEM > > + P("memcg_aware", pcpu_is_memcg_chunk(pcpu_chunk_type(chunk))); > > +#endif > > seq_putc(m, '\n'); > > } > > > > @@ -137,6 +144,7 @@ static int percpu_stats_show(struct seq_file *m, void *v) > > struct pcpu_chunk *chunk; > > int slot, max_nr_alloc; > > int *buffer; > > + enum pcpu_chunk_type type; > > > > alloc_buffer: > > spin_lock_irq(&pcpu_lock); > > @@ -202,18 +210,18 @@ static int percpu_stats_show(struct seq_file *m, void *v) > > chunk_map_stats(m, pcpu_reserved_chunk, buffer); > > } > > > > - for (slot = 0; slot < pcpu_nr_slots; slot++) { > > - list_for_each_entry(chunk, &pcpu_slot[slot], list) { > > - if (chunk == pcpu_first_chunk) { > > - seq_puts(m, "Chunk: <- First Chunk\n"); > > - chunk_map_stats(m, chunk, buffer); > > - > > - > > - } else { > > - seq_puts(m, "Chunk:\n"); > > - chunk_map_stats(m, chunk, buffer); > > + for (type = 0; type < PCPU_NR_CHUNK_TYPES; type++) { > > + for (slot = 0; slot < pcpu_nr_slots; slot++) { > > + list_for_each_entry(chunk, &pcpu_chunk_list(type)[slot], > > + list) { > > + if (chunk == pcpu_first_chunk) { > > + seq_puts(m, "Chunk: <- First Chunk\n"); > > + chunk_map_stats(m, chunk, buffer); > > + } else { > > + seq_puts(m, "Chunk:\n"); > > + chunk_map_stats(m, chunk, buffer); > > + } > > } > > - > > } > > } > > > > diff --git a/mm/percpu-vm.c b/mm/percpu-vm.c > > index a2b395acef89..e46f7a6917f9 100644 > > --- a/mm/percpu-vm.c > > +++ b/mm/percpu-vm.c > > @@ -328,12 +328,13 @@ static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk, > > pcpu_free_pages(chunk, pages, page_start, page_end); > > } > > > > -static struct pcpu_chunk *pcpu_create_chunk(gfp_t gfp) > > +static struct pcpu_chunk *pcpu_create_chunk(enum pcpu_chunk_type type, > > + gfp_t gfp) > > { > > struct pcpu_chunk *chunk; > > struct vm_struct **vms; > > > > - chunk = pcpu_alloc_chunk(gfp); > > + chunk = pcpu_alloc_chunk(type, gfp); > > if (!chunk) > > return NULL; > > > > diff --git a/mm/percpu.c b/mm/percpu.c > > index aa36b78d45a6..85f5755c9114 100644 > > --- a/mm/percpu.c > > +++ b/mm/percpu.c > > @@ -37,9 +37,14 @@ > > * takes care of normal allocations. > > * > > * The allocator organizes chunks into lists according to free size and > > - * tries to allocate from the fullest chunk first. Each chunk is managed > > - * by a bitmap with metadata blocks. The allocation map is updated on > > - * every allocation and free to reflect the current state while the boundary > > + * memcg-awareness. To make a percpu allocation memcg-aware the __GFP_ACCOUNT > > + * flag should be passed. All memcg-aware allocations are sharing one set > > + * of chunks and all unaccounted allocations and allocations performed > > + * by processes belonging to the root memory cgroup are using the second set. > > + * > > + * The allocator tries to allocate from the fullest chunk first. Each chunk > > + * is managed by a bitmap with metadata blocks. The allocation map is updated > > + * on every allocation and free to reflect the current state while the boundary > > * map is only updated on allocation. Each metadata block contains > > * information to help mitigate the need to iterate over large portions > > * of the bitmap. The reverse mapping from page to chunk is stored in > > @@ -81,6 +86,7 @@ > > #include <linux/kmemleak.h> > > #include <linux/sched.h> > > #include <linux/sched/mm.h> > > +#include <linux/memcontrol.h> > > > > #include <asm/cacheflush.h> > > #include <asm/sections.h> > > @@ -160,7 +166,7 @@ struct pcpu_chunk *pcpu_reserved_chunk __ro_after_init; > > DEFINE_SPINLOCK(pcpu_lock); /* all internal data structures */ > > static DEFINE_MUTEX(pcpu_alloc_mutex); /* chunk create/destroy, [de]pop, map ext */ > > > > -struct list_head *pcpu_slot __ro_after_init; /* chunk list slots */ > > +struct list_head *pcpu_chunk_lists __ro_after_init; /* chunk list slots */ > > > > /* chunks which need their map areas extended, protected by pcpu_lock */ > > static LIST_HEAD(pcpu_map_extend_chunks); > > @@ -500,6 +506,9 @@ static void __pcpu_chunk_move(struct pcpu_chunk *chunk, int slot, > > bool move_front) > > { > > if (chunk != pcpu_reserved_chunk) { > > + struct list_head *pcpu_slot; > > + > > + pcpu_slot = pcpu_chunk_list(pcpu_chunk_type(chunk)); > > if (move_front) > > list_move(&chunk->list, &pcpu_slot[slot]); > > else > > @@ -1341,6 +1350,10 @@ static struct pcpu_chunk * __init pcpu_alloc_first_chunk(unsigned long tmp_addr, > > panic("%s: Failed to allocate %zu bytes\n", __func__, > > alloc_size); > > > > +#ifdef CONFIG_MEMCG_KMEM > > + /* first chunk isn't memcg-aware */ > > + chunk->obj_cgroups = NULL; > > +#endif > > pcpu_init_md_blocks(chunk); > > > > /* manage populated page bitmap */ > > @@ -1380,7 +1393,7 @@ static struct pcpu_chunk * __init pcpu_alloc_first_chunk(unsigned long tmp_addr, > > return chunk; > > } > > > > -static struct pcpu_chunk *pcpu_alloc_chunk(gfp_t gfp) > > +static struct pcpu_chunk *pcpu_alloc_chunk(enum pcpu_chunk_type type, gfp_t gfp) > > { > > struct pcpu_chunk *chunk; > > int region_bits; > > @@ -1408,6 +1421,16 @@ static struct pcpu_chunk *pcpu_alloc_chunk(gfp_t gfp) > > if (!chunk->md_blocks) > > goto md_blocks_fail; > > > > +#ifdef CONFIG_MEMCG_KMEM > > + if (pcpu_is_memcg_chunk(type)) { > > + chunk->obj_cgroups = > > + pcpu_mem_zalloc(pcpu_chunk_map_bits(chunk) * > > + sizeof(struct obj_cgroup *), gfp); > > + if (!chunk->obj_cgroups) > > + goto objcg_fail; > > + } > > +#endif > > + > > pcpu_init_md_blocks(chunk); > > > > /* init metadata */ > > @@ -1415,6 +1438,8 @@ static struct pcpu_chunk *pcpu_alloc_chunk(gfp_t gfp) > > > > return chunk; > > > > +objcg_fail: > > + pcpu_mem_free(chunk->md_blocks); > > md_blocks_fail: > > pcpu_mem_free(chunk->bound_map); > > bound_map_fail: > > @@ -1429,6 +1454,9 @@ static void pcpu_free_chunk(struct pcpu_chunk *chunk) > > { > > if (!chunk) > > return; > > +#ifdef CONFIG_MEMCG_KMEM > > + pcpu_mem_free(chunk->obj_cgroups); > > +#endif > > pcpu_mem_free(chunk->md_blocks); > > pcpu_mem_free(chunk->bound_map); > > pcpu_mem_free(chunk->alloc_map); > > @@ -1505,7 +1533,8 @@ static int pcpu_populate_chunk(struct pcpu_chunk *chunk, > > int page_start, int page_end, gfp_t gfp); > > static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk, > > int page_start, int page_end); > > -static struct pcpu_chunk *pcpu_create_chunk(gfp_t gfp); > > +static struct pcpu_chunk *pcpu_create_chunk(enum pcpu_chunk_type type, > > + gfp_t gfp); > > static void pcpu_destroy_chunk(struct pcpu_chunk *chunk); > > static struct page *pcpu_addr_to_page(void *addr); > > static int __init pcpu_verify_alloc_info(const struct pcpu_alloc_info *ai); > > @@ -1547,6 +1576,77 @@ static struct pcpu_chunk *pcpu_chunk_addr_search(void *addr) > > return pcpu_get_page_chunk(pcpu_addr_to_page(addr)); > > } > > > > +#ifdef CONFIG_MEMCG_KMEM > > +static enum pcpu_chunk_type pcpu_memcg_pre_alloc_hook(size_t size, gfp_t gfp, > > + struct obj_cgroup **objcgp) > > +{ > > + struct obj_cgroup *objcg; > > + > > + if (!memcg_kmem_enabled() || !(gfp & __GFP_ACCOUNT) || > > + memcg_kmem_bypass()) > > + return PCPU_CHUNK_ROOT; > > + > > + objcg = get_obj_cgroup_from_current(); > > + if (!objcg) > > + return PCPU_CHUNK_ROOT; > > + > > + if (obj_cgroup_charge(objcg, gfp, size * num_possible_cpus())) { > > + obj_cgroup_put(objcg); > > + return PCPU_FAIL_ALLOC; > > + } > > + > > + *objcgp = objcg; > > + return PCPU_CHUNK_MEMCG; > > +} > > + > > +static void pcpu_memcg_post_alloc_hook(struct obj_cgroup *objcg, > > + struct pcpu_chunk *chunk, int off, > > + size_t size) > > +{ > > + if (!objcg) > > + return; > > + > > + if (chunk) { > > + chunk->obj_cgroups[off >> PCPU_MIN_ALLOC_SHIFT] = objcg; > > + } else { > > + obj_cgroup_uncharge(objcg, size * num_possible_cpus()); > > + obj_cgroup_put(objcg); > > + } > > +} > > + > > +static void pcpu_memcg_free_hook(struct pcpu_chunk *chunk, int off, size_t size) > > +{ > > + struct obj_cgroup *objcg; > > + > > + if (!pcpu_is_memcg_chunk(pcpu_chunk_type(chunk))) > > + return; > > + > > + objcg = chunk->obj_cgroups[off >> PCPU_MIN_ALLOC_SHIFT]; > > + chunk->obj_cgroups[off >> PCPU_MIN_ALLOC_SHIFT] = NULL; > > + > > + obj_cgroup_uncharge(objcg, size * num_possible_cpus()); > > + > > + obj_cgroup_put(objcg); > > +} > > + > > +#else /* CONFIG_MEMCG_KMEM */ > > +static enum pcpu_chunk_type pcpu_memcg_pre_alloc_hook(size_t size, gfp_t gfp, > > + struct mem_cgroup **memcgp) > > +{ > > + return PCPU_CHUNK_ROOT; > > +} > > + > > +static void pcpu_memcg_post_alloc_hook(struct mem_cgroup *memcg, > > + struct pcpu_chunk *chunk, int off, > > + size_t size) > > +{ > > +} > > + > > +static void pcpu_memcg_free_hook(struct pcpu_chunk *chunk, int off, size_t size) > > +{ > > +} > > +#endif /* CONFIG_MEMCG_KMEM */ > > + > > /** > > * pcpu_alloc - the percpu allocator > > * @size: size of area to allocate in bytes > > @@ -1568,6 +1668,9 @@ static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved, > > gfp_t pcpu_gfp; > > bool is_atomic; > > bool do_warn; > > + enum pcpu_chunk_type type; > > + struct list_head *pcpu_slot; > > + struct obj_cgroup *objcg = NULL; > > static int warn_limit = 10; > > struct pcpu_chunk *chunk, *next; > > const char *err; > > @@ -1602,16 +1705,23 @@ static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved, > > return NULL; > > } > > > > + type = pcpu_memcg_pre_alloc_hook(size, gfp, &objcg); > > + if (unlikely(type == PCPU_FAIL_ALLOC)) > > + return NULL; > > + pcpu_slot = pcpu_chunk_list(type); > > + > > if (!is_atomic) { > > /* > > * pcpu_balance_workfn() allocates memory under this mutex, > > * and it may wait for memory reclaim. Allow current task > > * to become OOM victim, in case of memory pressure. > > */ > > - if (gfp & __GFP_NOFAIL) > > + if (gfp & __GFP_NOFAIL) { > > mutex_lock(&pcpu_alloc_mutex); > > - else if (mutex_lock_killable(&pcpu_alloc_mutex)) > > + } else if (mutex_lock_killable(&pcpu_alloc_mutex)) { > > + pcpu_memcg_post_alloc_hook(objcg, NULL, 0, size); > > return NULL; > > + } > > } > > > > spin_lock_irqsave(&pcpu_lock, flags); > > @@ -1637,7 +1747,8 @@ static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved, > > restart: > > /* search through normal chunks */ > > for (slot = pcpu_size_to_slot(size); slot < pcpu_nr_slots; slot++) { > > - list_for_each_entry_safe(chunk, next, &pcpu_slot[slot], list) { > > + list_for_each_entry_safe(chunk, next, &pcpu_slot[slot], > > + list) { > > nit: this line change doesn't do anything. Can you please remove it. > > > off = pcpu_find_block_fit(chunk, bits, bit_align, > > is_atomic); > > if (off < 0) { > > @@ -1666,7 +1777,7 @@ static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved, > > } > > > > if (list_empty(&pcpu_slot[pcpu_nr_slots - 1])) { > > - chunk = pcpu_create_chunk(pcpu_gfp); > > + chunk = pcpu_create_chunk(type, pcpu_gfp); > > if (!chunk) { > > err = "failed to allocate new chunk"; > > goto fail; > > @@ -1723,6 +1834,8 @@ static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved, > > trace_percpu_alloc_percpu(reserved, is_atomic, size, align, > > chunk->base_addr, off, ptr); > > > > + pcpu_memcg_post_alloc_hook(objcg, chunk, off, size); > > + > > return ptr; > > > > fail_unlock: > > @@ -1744,6 +1857,9 @@ static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved, > > } else { > > mutex_unlock(&pcpu_alloc_mutex); > > } > > + > > + pcpu_memcg_post_alloc_hook(objcg, NULL, 0, size); > > + > > return NULL; > > } > > > > @@ -1803,8 +1919,8 @@ void __percpu *__alloc_reserved_percpu(size_t size, size_t align) > > } > > > > /** > > - * pcpu_balance_workfn - manage the amount of free chunks and populated pages > > - * @work: unused > > + * __pcpu_balance_workfn - manage the amount of free chunks and populated pages > > + * @type: chunk type > > * > > * Reclaim all fully free chunks except for the first one. This is also > > * responsible for maintaining the pool of empty populated pages. However, > > @@ -1813,11 +1929,12 @@ void __percpu *__alloc_reserved_percpu(size_t size, size_t align) > > * allocation causes the failure as it is possible that requests can be > > * serviced from already backed regions. > > */ > > -static void pcpu_balance_workfn(struct work_struct *work) > > +static void __pcpu_balance_workfn(enum pcpu_chunk_type type) > > { > > /* gfp flags passed to underlying allocators */ > > const gfp_t gfp = GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN; > > LIST_HEAD(to_free); > > + struct list_head *pcpu_slot = pcpu_chunk_list(type); > > struct list_head *free_head = &pcpu_slot[pcpu_nr_slots - 1]; > > struct pcpu_chunk *chunk, *next; > > int slot, nr_to_pop, ret; > > @@ -1915,7 +2032,7 @@ static void pcpu_balance_workfn(struct work_struct *work) > > > > if (nr_to_pop) { > > /* ran out of chunks to populate, create a new one and retry */ > > - chunk = pcpu_create_chunk(gfp); > > + chunk = pcpu_create_chunk(type, gfp); > > if (chunk) { > > spin_lock_irq(&pcpu_lock); > > pcpu_chunk_relocate(chunk, -1); > > @@ -1927,6 +2044,20 @@ static void pcpu_balance_workfn(struct work_struct *work) > > mutex_unlock(&pcpu_alloc_mutex); > > } > > > > +/** > > + * pcpu_balance_workfn - manage the amount of free chunks and populated pages > > + * @work: unused > > + * > > + * Call __pcpu_balance_workfn() for each chunk type. > > + */ > > +static void pcpu_balance_workfn(struct work_struct *work) > > +{ > > + enum pcpu_chunk_type type; > > + > > + for (type = 0; type < PCPU_NR_CHUNK_TYPES; type++) > > + __pcpu_balance_workfn(type); > > +} > > + > > /** > > * free_percpu - free percpu area > > * @ptr: pointer to area to free > > @@ -1941,8 +2072,9 @@ void free_percpu(void __percpu *ptr) > > void *addr; > > struct pcpu_chunk *chunk; > > unsigned long flags; > > - int off; > > + int size, off; > > bool need_balance = false; > > + struct list_head *pcpu_slot; > > > > if (!ptr) > > return; > > @@ -1956,7 +2088,11 @@ void free_percpu(void __percpu *ptr) > > chunk = pcpu_chunk_addr_search(addr); > > off = addr - chunk->base_addr; > > > > - pcpu_free_area(chunk, off); > > + size = pcpu_free_area(chunk, off); > > + > > + pcpu_slot = pcpu_chunk_list(pcpu_chunk_type(chunk)); > > + > > + pcpu_memcg_free_hook(chunk, off, size); > > > > /* if there are more than one fully free chunks, wake up grim reaper */ > > if (chunk->free_bytes == pcpu_unit_size) { > > @@ -2267,6 +2403,7 @@ void __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, > > int map_size; > > unsigned long tmp_addr; > > size_t alloc_size; > > + enum pcpu_chunk_type type; > > > > #define PCPU_SETUP_BUG_ON(cond) do { \ > > if (unlikely(cond)) { \ > > @@ -2384,13 +2521,18 @@ void __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, > > * empty chunks. > > */ > > pcpu_nr_slots = __pcpu_size_to_slot(pcpu_unit_size) + 2; > > - pcpu_slot = memblock_alloc(pcpu_nr_slots * sizeof(pcpu_slot[0]), > > - SMP_CACHE_BYTES); > > - if (!pcpu_slot) > > + pcpu_chunk_lists = memblock_alloc(pcpu_nr_slots * > > + sizeof(pcpu_chunk_lists[0]) * > > + PCPU_NR_CHUNK_TYPES, > > + SMP_CACHE_BYTES); > > + if (!pcpu_chunk_lists) > > panic("%s: Failed to allocate %zu bytes\n", __func__, > > - pcpu_nr_slots * sizeof(pcpu_slot[0])); > > - for (i = 0; i < pcpu_nr_slots; i++) > > - INIT_LIST_HEAD(&pcpu_slot[i]); > > + pcpu_nr_slots * sizeof(pcpu_chunk_lists[0]) * > > + PCPU_NR_CHUNK_TYPES); > > + > > + for (type = 0; type < PCPU_NR_CHUNK_TYPES; type++) > > + for (i = 0; i < pcpu_nr_slots; i++) > > + INIT_LIST_HEAD(&pcpu_chunk_list(type)[i]); > > > > /* > > * The end of the static region needs to be aligned with the > > -- > > 2.25.4 > > > > There were just 2 minor nits. Do you mind resending with them fixed as > I'm not sure I'll be carrying these patches or not. Sure, will send v2 based on the slab controller v6 early next week. > > Acked-by: Dennis Zhou <dennis@xxxxxxxxxx> Thank you!