On Fri, Sep 29, 2023 at 11:00:52AM -0700, Roman Gushchin wrote: > @@ -553,6 +553,16 @@ static inline bool folio_memcg_kmem(struct folio *folio) > return folio->memcg_data & MEMCG_DATA_KMEM; > } > > +static inline bool current_objcg_needs_update(struct obj_cgroup *objcg) > +{ > + return (struct obj_cgroup *)((unsigned long)objcg & 0x1); > +} > + > +static inline struct obj_cgroup * > +current_objcg_without_update_flag(struct obj_cgroup *objcg) > +{ > + return (struct obj_cgroup *)((unsigned long)objcg & ~0x1); > +} I would slightly prefer naming the bit with a define, and open-coding the bitops in the current callsites. This makes it clearer that the actual pointer bits are overloaded in the places where the pointer is accessed. > @@ -3001,6 +3001,47 @@ static struct obj_cgroup *__get_obj_cgroup_from_memcg(struct mem_cgroup *memcg) > return objcg; > } > > +static struct obj_cgroup *current_objcg_update(struct obj_cgroup *old) > +{ > + struct mem_cgroup *memcg; > + struct obj_cgroup *objcg = NULL, *tmp = old; > + > + old = current_objcg_without_update_flag(old); > + if (old) > + obj_cgroup_put(old); > + > + rcu_read_lock(); > + do { > + /* Atomically drop the update bit, */ > + WARN_ON_ONCE(cmpxchg(¤t->objcg, tmp, 0) != tmp); > + > + /* ...obtain the new objcg pointer */ > + memcg = mem_cgroup_from_task(current); > + for (; memcg != root_mem_cgroup; memcg = parent_mem_cgroup(memcg)) { > + objcg = rcu_dereference(memcg->objcg); > + if (objcg && obj_cgroup_tryget(objcg)) > + break; > + objcg = NULL; > + } As per the other thread, it would be great to have a comment here explaining the scenario(s) when the tryget could fail and we'd have to defer to an ancestor. > + > + /* > + * ...and try atomically set up a new objcg pointer. If it > + * fails, it means the update flag was set concurrently, so > + * the whole procedure should be repeated. > + */ > + tmp = 0; > + } while (!try_cmpxchg(¤t->objcg, &tmp, objcg)); > + rcu_read_unlock(); > + > + return objcg; Overall this looks great to me. AFAICS the rcu_read_lock() is needed for the mem_cgroup_from_task() and tryget(). Is it possible to localize it around these operations? Or am I missing some other effect it has? > @@ -6358,8 +6407,27 @@ static void mem_cgroup_move_task(void) > } > #endif > > +#ifdef CONFIG_MEMCG_KMEM > +static void mem_cgroup_fork(struct task_struct *task) > +{ > + /* > + * Set the update flag to cause task->objcg to be initialized lazily > + * on the first allocation. > + */ > + task->objcg = (struct obj_cgroup *)0x1; > +} I like this open-coding! Should this mention why it doesn't need to be atomic? Task is in fork(), no concurrent modifications from allocations or migrations possible... None of the feedback is a blocker, though. Acked-by: Johannes Weiner <hannes@xxxxxxxxxxx>