On Wed, Feb 14, 2024 at 9:59 AM Vlastimil Babka <vbabka@xxxxxxx> wrote: > > On 2/12/24 22:38, Suren Baghdasaryan wrote: > > Currently slab pages can store only vectors of obj_cgroup pointers in > > page->memcg_data. Introduce slabobj_ext structure to allow more data > > to be stored for each slab object. Wrap obj_cgroup into slabobj_ext > > to support current functionality while allowing to extend slabobj_ext > > in the future. > > > > Signed-off-by: Suren Baghdasaryan <surenb@xxxxxxxxxx> > > ... > > > +static inline bool need_slab_obj_ext(void) > > +{ > > + /* > > + * CONFIG_MEMCG_KMEM creates vector of obj_cgroup objects conditionally > > + * inside memcg_slab_post_alloc_hook. No other users for now. > > + */ > > + return false; > > +} > > + > > +static inline struct slabobj_ext * > > +prepare_slab_obj_exts_hook(struct kmem_cache *s, gfp_t flags, void *p) > > +{ > > + struct slab *slab; > > + > > + if (!p) > > + return NULL; > > + > > + if (!need_slab_obj_ext()) > > + return NULL; > > + > > + slab = virt_to_slab(p); > > + if (!slab_obj_exts(slab) && > > + WARN(alloc_slab_obj_exts(slab, s, flags, false), > > + "%s, %s: Failed to create slab extension vector!\n", > > + __func__, s->name)) > > + return NULL; > > + > > + return slab_obj_exts(slab) + obj_to_index(s, slab, p); > > This is called in slab_post_alloc_hook() and the result stored to obj_exts > but unused. Maybe introduce this only in a later patch where it becomes > relevant? Ack. I'll move it into the patch where we start using obj_exts. > > > --- a/mm/slab_common.c > > +++ b/mm/slab_common.c > > @@ -201,6 +201,54 @@ struct kmem_cache *find_mergeable(unsigned int size, unsigned int align, > > return NULL; > > } > > > > +#ifdef CONFIG_SLAB_OBJ_EXT > > +/* > > + * The allocated objcg pointers array is not accounted directly. > > + * Moreover, it should not come from DMA buffer and is not readily > > + * reclaimable. So those GFP bits should be masked off. > > + */ > > +#define OBJCGS_CLEAR_MASK (__GFP_DMA | __GFP_RECLAIMABLE | \ > > + __GFP_ACCOUNT | __GFP_NOFAIL) > > + > > +int alloc_slab_obj_exts(struct slab *slab, struct kmem_cache *s, > > + gfp_t gfp, bool new_slab) > > Since you're moving this function between files anyway, could you please > instead move it to mm/slub.c. I expect we'll eventually (maybe even soon) > move the rest of performance sensitive kmemcg hooks there as well to make > inlining possible. Will do. > > > +{ > > + unsigned int objects = objs_per_slab(s, slab); > > + unsigned long obj_exts; > > + void *vec; > > + > > + gfp &= ~OBJCGS_CLEAR_MASK; > > + vec = kcalloc_node(objects, sizeof(struct slabobj_ext), gfp, > > + slab_nid(slab)); > > + if (!vec) > > + return -ENOMEM; > > + > > + obj_exts = (unsigned long)vec; > > +#ifdef CONFIG_MEMCG > > + obj_exts |= MEMCG_DATA_OBJEXTS; > > +#endif > > + if (new_slab) { > > + /* > > + * If the slab is brand new and nobody can yet access its > > + * obj_exts, no synchronization is required and obj_exts can > > + * be simply assigned. > > + */ > > + slab->obj_exts = obj_exts; > > + } else if (cmpxchg(&slab->obj_exts, 0, obj_exts)) { > > + /* > > + * If the slab is already in use, somebody can allocate and > > + * assign slabobj_exts in parallel. In this case the existing > > + * objcg vector should be reused. > > + */ > > + kfree(vec); > > + return 0; > > + } > > + > > + kmemleak_not_leak(vec); > > + return 0; > > +} > > +#endif /* CONFIG_SLAB_OBJ_EXT */ > > + > > static struct kmem_cache *create_cache(const char *name, > > unsigned int object_size, unsigned int align, > > slab_flags_t flags, unsigned int useroffset, > > diff --git a/mm/slub.c b/mm/slub.c > > index 2ef88bbf56a3..1eb1050814aa 100644 > > --- a/mm/slub.c > > +++ b/mm/slub.c > > @@ -683,10 +683,10 @@ static inline bool __slab_update_freelist(struct kmem_cache *s, struct slab *sla > > > > if (s->flags & __CMPXCHG_DOUBLE) { > > ret = __update_freelist_fast(slab, freelist_old, counters_old, > > - freelist_new, counters_new); > > + freelist_new, counters_new); > > } else { > > ret = __update_freelist_slow(slab, freelist_old, counters_old, > > - freelist_new, counters_new); > > + freelist_new, counters_new); > > } > > if (likely(ret)) > > return true; > > @@ -710,13 +710,13 @@ static inline bool slab_update_freelist(struct kmem_cache *s, struct slab *slab, > > > > if (s->flags & __CMPXCHG_DOUBLE) { > > ret = __update_freelist_fast(slab, freelist_old, counters_old, > > - freelist_new, counters_new); > > + freelist_new, counters_new); > > } else { > > unsigned long flags; > > > > local_irq_save(flags); > > ret = __update_freelist_slow(slab, freelist_old, counters_old, > > - freelist_new, counters_new); > > + freelist_new, counters_new); > > I can see the mixing of tabs and spaces is wrong but perhaps not fix it as > part of the series? I'll fix them in the next version. > > > local_irq_restore(flags); > > } > > if (likely(ret)) > > @@ -1881,13 +1881,25 @@ static inline enum node_stat_item cache_vmstat_idx(struct kmem_cache *s) > > NR_SLAB_RECLAIMABLE_B : NR_SLAB_UNRECLAIMABLE_B; > > } > > > > -#ifdef CONFIG_MEMCG_KMEM > > -static inline void memcg_free_slab_cgroups(struct slab *slab) > > +#ifdef CONFIG_SLAB_OBJ_EXT > > +static inline void free_slab_obj_exts(struct slab *slab) > > Right, freeing is already here, so makes sense put the allocation here as well. > > > @@ -3817,6 +3820,7 @@ void slab_post_alloc_hook(struct kmem_cache *s, struct obj_cgroup *objcg, > > kmemleak_alloc_recursive(p[i], s->object_size, 1, > > s->flags, init_flags); > > kmsan_slab_alloc(s, p[i], init_flags); > > + obj_exts = prepare_slab_obj_exts_hook(s, flags, p[i]); > > Yeah here's the hook used. Doesn't it generate a compiler warning? Maybe at > least postpone the call until the result is further used. Yes, I'll move that into the patch where we start using it. Thanks for the review, Vlastimil! > > > } > > > > memcg_slab_post_alloc_hook(s, objcg, flags, size, p); > > -- > To unsubscribe from this group and stop receiving emails from it, send an email to kernel-team+unsubscribe@xxxxxxxxxxx. >