On Wed, Apr 22, 2020 at 01:46:55PM -0700, Roman Gushchin wrote: > --- a/mm/memcontrol.c > +++ b/mm/memcontrol.c > @@ -257,6 +257,78 @@ struct cgroup_subsys_state *vmpressure_to_css(struct vmpressure *vmpr) > } > > #ifdef CONFIG_MEMCG_KMEM > +extern spinlock_t css_set_lock; > + > +static void obj_cgroup_release(struct percpu_ref *ref) > +{ > + struct obj_cgroup *objcg = container_of(ref, struct obj_cgroup, refcnt); > + struct mem_cgroup *memcg; > + unsigned int nr_bytes; > + unsigned int nr_pages; > + unsigned long flags; > + > + nr_bytes = atomic_read(&objcg->nr_charged_bytes); > + WARN_ON_ONCE(nr_bytes & (PAGE_SIZE - 1)); > + nr_pages = nr_bytes >> PAGE_SHIFT; What guarantees that we don't have a partial page in there at this point? I guess any outstanding allocations would pin the objcg, so when it's released all objects have been freed. But if that's true, how can we have full pages remaining in there now? > @@ -2723,6 +2820,30 @@ struct mem_cgroup *mem_cgroup_from_obj(void *p) > return page->mem_cgroup; > } > > +__always_inline struct obj_cgroup *get_obj_cgroup_from_current(void) > +{ > + struct obj_cgroup *objcg = NULL; > + struct mem_cgroup *memcg; > + > + if (unlikely(!current->mm)) > + return NULL; > + > + rcu_read_lock(); > + if (unlikely(current->active_memcg)) > + memcg = rcu_dereference(current->active_memcg); > + else > + memcg = mem_cgroup_from_task(current); > + > + for (; memcg != root_mem_cgroup; memcg = parent_mem_cgroup(memcg)) { > + objcg = rcu_dereference(memcg->objcg); > + if (objcg && obj_cgroup_tryget(objcg)) > + break; > + } > + rcu_read_unlock(); > + > + return objcg; > +} Thanks for moving this here from one of the later patches, it helps understanding the life cycle of obj_cgroup better. > +int obj_cgroup_charge(struct obj_cgroup *objcg, gfp_t gfp, size_t size) > +{ > + struct mem_cgroup *memcg; > + unsigned int nr_pages, nr_bytes; > + int ret; > + > + if (consume_obj_stock(objcg, size)) > + return 0; > + > + rcu_read_lock(); > + memcg = obj_cgroup_memcg(objcg); > + css_get(&memcg->css); > + rcu_read_unlock(); > + > + nr_pages = size >> PAGE_SHIFT; > + nr_bytes = size & (PAGE_SIZE - 1); > + > + if (nr_bytes) > + nr_pages += 1; > + > + ret = __memcg_kmem_charge(memcg, gfp, nr_pages); If consume_obj_stock() fails because some other memcg is cached, should this try to consume the partial page in objcg->nr_charged_bytes before getting more pages? > + if (!ret && nr_bytes) > + refill_obj_stock(objcg, PAGE_SIZE - nr_bytes); This will put the cgroup into the cache if the allocation resulted in a partially free page. But if this was a page allocation, we may have objcg->nr_cache_bytes from a previous subpage allocation that we should probably put back into the stock. It's not a common case, I'm just trying to understand what objcg->nr_cache_bytes holds and when it does so. The rest of this patch looks good to me!