Hello, On Sun, Jun 19, 2022 at 03:50:29PM +0000, Yafang Shao wrote: > This patch introduces a helper to recharge the corresponding pages of > a given percpu address. It is similar with how to recharge a kmalloc'ed > address. > > Signed-off-by: Yafang Shao <laoar.shao@xxxxxxxxx> > --- > include/linux/percpu.h | 1 + > mm/percpu.c | 98 ++++++++++++++++++++++++++++++++++++++++++ > 2 files changed, 99 insertions(+) > > diff --git a/include/linux/percpu.h b/include/linux/percpu.h > index f1ec5ad1351c..e88429410179 100644 > --- a/include/linux/percpu.h > +++ b/include/linux/percpu.h > @@ -128,6 +128,7 @@ extern void __init setup_per_cpu_areas(void); > extern void __percpu *__alloc_percpu_gfp(size_t size, size_t align, gfp_t gfp) __alloc_size(1); > extern void __percpu *__alloc_percpu(size_t size, size_t align) __alloc_size(1); > extern void free_percpu(void __percpu *__pdata); > +bool recharge_percpu(void __percpu *__pdata, int step); Nit: can you add extern to keep the file consistent. > extern phys_addr_t per_cpu_ptr_to_phys(void *addr); > > #define alloc_percpu_gfp(type, gfp) \ > diff --git a/mm/percpu.c b/mm/percpu.c > index 3633eeefaa0d..fd81f4d79f2f 100644 > --- a/mm/percpu.c > +++ b/mm/percpu.c > @@ -2310,6 +2310,104 @@ void free_percpu(void __percpu *ptr) > } > EXPORT_SYMBOL_GPL(free_percpu); > > +#ifdef CONFIG_MEMCG_KMEM > +bool recharge_percpu(void __percpu *ptr, int step) > +{ > + int bit_off, off, bits, size, end; > + struct obj_cgroup *objcg_old; > + struct obj_cgroup *objcg_new; > + struct pcpu_chunk *chunk; > + unsigned long flags; > + void *addr; > + > + WARN_ON(!in_task()); > + > + if (!ptr) > + return true; > + > + addr = __pcpu_ptr_to_addr(ptr); > + spin_lock_irqsave(&pcpu_lock, flags); > + chunk = pcpu_chunk_addr_search(addr); > + off = addr - chunk->base_addr; > + objcg_old = chunk->obj_cgroups[off >> PCPU_MIN_ALLOC_SHIFT]; > + if (!objcg_old && step != MEMCG_KMEM_POST_CHARGE) { > + spin_unlock_irqrestore(&pcpu_lock, flags); > + return true; > + } > + > + bit_off = off / PCPU_MIN_ALLOC_SIZE; > + /* find end index */ > + end = find_next_bit(chunk->bound_map, pcpu_chunk_map_bits(chunk), > + bit_off + 1); > + bits = end - bit_off; > + size = bits * PCPU_MIN_ALLOC_SIZE; > + > + switch (step) { > + case MEMCG_KMEM_PRE_CHARGE: > + objcg_new = get_obj_cgroup_from_current(); > + WARN_ON(!objcg_new); > + if (obj_cgroup_charge(objcg_new, GFP_KERNEL, > + size * num_possible_cpus())) { > + obj_cgroup_put(objcg_new); > + spin_unlock_irqrestore(&pcpu_lock, flags); > + return false; > + } > + break; > + case MEMCG_KMEM_UNCHARGE: > + obj_cgroup_uncharge(objcg_old, size * num_possible_cpus()); > + rcu_read_lock(); > + mod_memcg_state(obj_cgroup_memcg(objcg_old), MEMCG_PERCPU_B, > + -(size * num_possible_cpus())); > + rcu_read_unlock(); > + chunk->obj_cgroups[off >> PCPU_MIN_ALLOC_SHIFT] = NULL; > + obj_cgroup_put(objcg_old); > + break; > + case MEMCG_KMEM_POST_CHARGE: > + rcu_read_lock(); > + chunk->obj_cgroups[off >> PCPU_MIN_ALLOC_SHIFT] = obj_cgroup_from_current(); > + mod_memcg_state(mem_cgroup_from_task(current), MEMCG_PERCPU_B, > + (size * num_possible_cpus())); > + rcu_read_unlock(); > + break; > + case MEMCG_KMEM_CHARGE_ERR: > + /* > + * In case fail to charge to the new one in the pre charge state, > + * for example, we have pre-charged one memcg successfully but fail > + * to pre-charge the second memcg, then we should uncharge the first > + * memcg. > + */ > + objcg_new = obj_cgroup_from_current(); > + obj_cgroup_uncharge(objcg_new, size * num_possible_cpus()); > + obj_cgroup_put(objcg_new); > + rcu_read_lock(); > + mod_memcg_state(obj_cgroup_memcg(objcg_new), MEMCG_PERCPU_B, > + -(size * num_possible_cpus())); > + rcu_read_unlock(); > + > + break; > + } I'm not really the biggest fan of this step stuff. I see why you're doing it because you want to do all or nothing recharging the percpu bpf maps. Is there a way to have percpu own this logic and attempt to do all or nothing instead? I realize bpf is likely the largest percpu user, but the recharge_percpu() api seems to be more generic than forcing potential other users in the future to open code it. > + > + spin_unlock_irqrestore(&pcpu_lock, flags); > + > + return true; > +} > +EXPORT_SYMBOL(recharge_percpu); > + > +#else /* CONFIG_MEMCG_KMEM */ > + > +bool charge_percpu(void __percpu *ptr, bool charge) > +{ > + return true; > +} > +EXPORT_SYMBOL(charge_percpu); > + > +void uncharge_percpu(void __percpu *ptr) > +{ > +} I'm guessing this is supposed to be recharge_percpu() not (un)charge_percpu(). > +EXPORT_SYMBOL(uncharge_percpu); > + > +#endif /* CONFIG_MEMCG_KMEM */ > + > bool __is_kernel_percpu_address(unsigned long addr, unsigned long *can_addr) > { > #ifdef CONFIG_SMP > -- > 2.17.1 > > Thanks, Dennis