On Tue, Jan 04, 2022 at 01:10:18AM +0100, Vlastimil Babka wrote: > From: "Matthew Wilcox (Oracle)" <willy@xxxxxxxxxxxxx> > > Make struct slab independent of struct page. It still uses the > underlying memory in struct page for storing slab-specific data, but > slab and slub can now be weaned off using struct page directly. Some of > the wrapper functions (slab_address() and slab_order()) still need to > cast to struct folio, but this is a significant disentanglement. > > [ vbabka@xxxxxxx: Rebase on folios, use folio instead of page where > possible. > > Do not duplicate flags field in struct slab, instead make the related > accessors go through slab_folio(). For testing pfmemalloc use the > folio_*_active flag accessors directly so the PageSlabPfmemalloc > wrappers can be removed later. > > Make folio_slab() expect only folio_test_slab() == true folios and > virt_to_slab() return NULL when folio_test_slab() == false. > > Move struct slab to mm/slab.h. > > Don't represent with struct slab pages that are not true slab pages, > but just a compound page obtained directly rom page allocator (with a typo here: (f)rom > large kmalloc() for SLUB and SLOB). ] > > Signed-off-by: Matthew Wilcox (Oracle) <willy@xxxxxxxxxxxxx> > Signed-off-by: Vlastimil Babka <vbabka@xxxxxxx> > Acked-by: Johannes Weiner <hannes@xxxxxxxxxxx> > Reviewed-by: Roman Gushchin <guro@xxxxxx> > --- > include/linux/mm_types.h | 10 +-- > mm/slab.h | 167 +++++++++++++++++++++++++++++++++++++++ > mm/slub.c | 8 +- > 3 files changed, 176 insertions(+), 9 deletions(-) > > diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h > index c3a6e6209600..1ae3537c7920 100644 > --- a/include/linux/mm_types.h > +++ b/include/linux/mm_types.h > @@ -56,11 +56,11 @@ struct mem_cgroup; > * in each subpage, but you may need to restore some of their values > * afterwards. > * > - * SLUB uses cmpxchg_double() to atomically update its freelist and > - * counters. That requires that freelist & counters be adjacent and > - * double-word aligned. We align all struct pages to double-word > - * boundaries, and ensure that 'freelist' is aligned within the > - * struct. > + * SLUB uses cmpxchg_double() to atomically update its freelist and counters. > + * That requires that freelist & counters in struct slab be adjacent and > + * double-word aligned. Because struct slab currently just reinterprets the > + * bits of struct page, we align all struct pages to double-word boundaries, > + * and ensure that 'freelist' is aligned within struct slab. > */ > #ifdef CONFIG_HAVE_ALIGNED_STRUCT_PAGE > #define _struct_page_alignment __aligned(2 * sizeof(unsigned long)) > diff --git a/mm/slab.h b/mm/slab.h > index 56ad7eea3ddf..0e67a8cb7f80 100644 > --- a/mm/slab.h > +++ b/mm/slab.h > @@ -5,6 +5,173 @@ > * Internal slab definitions > */ > > +/* Reuses the bits in struct page */ > +struct slab { > + unsigned long __page_flags; > + union { > + struct list_head slab_list; > + struct { /* Partial pages */ > + struct slab *next; > +#ifdef CONFIG_64BIT > + int slabs; /* Nr of slabs left */ > +#else > + short int slabs; > +#endif > + }; > + struct rcu_head rcu_head; > + }; > + struct kmem_cache *slab_cache; /* not slob */ > + /* Double-word boundary */ > + void *freelist; /* first free object */ > + union { > + void *s_mem; /* slab: first object */ > + unsigned long counters; /* SLUB */ > + struct { /* SLUB */ > + unsigned inuse:16; > + unsigned objects:15; > + unsigned frozen:1; > + }; > + }; > + > + union { > + unsigned int active; /* SLAB */ > + int units; /* SLOB */ > + }; > + atomic_t __page_refcount; > +#ifdef CONFIG_MEMCG > + unsigned long memcg_data; > +#endif > +}; > + > +#define SLAB_MATCH(pg, sl) \ > + static_assert(offsetof(struct page, pg) == offsetof(struct slab, sl)) > +SLAB_MATCH(flags, __page_flags); > +SLAB_MATCH(compound_head, slab_list); /* Ensure bit 0 is clear */ > +SLAB_MATCH(slab_list, slab_list); > +SLAB_MATCH(rcu_head, rcu_head); > +SLAB_MATCH(slab_cache, slab_cache); > +SLAB_MATCH(s_mem, s_mem); > +SLAB_MATCH(active, active); > +SLAB_MATCH(_refcount, __page_refcount); > +#ifdef CONFIG_MEMCG > +SLAB_MATCH(memcg_data, memcg_data); > +#endif > +#undef SLAB_MATCH > +static_assert(sizeof(struct slab) <= sizeof(struct page)); > + > +/** > + * folio_slab - Converts from folio to slab. > + * @folio: The folio. > + * > + * Currently struct slab is a different representation of a folio where > + * folio_test_slab() is true. > + * > + * Return: The slab which contains this folio. > + */ > +#define folio_slab(folio) (_Generic((folio), \ > + const struct folio *: (const struct slab *)(folio), \ > + struct folio *: (struct slab *)(folio))) > + > +/** > + * slab_folio - The folio allocated for a slab > + * @slab: The slab. > + * > + * Slabs are allocated as folios that contain the individual objects and are > + * using some fields in the first struct page of the folio - those fields are > + * now accessed by struct slab. It is occasionally necessary to convert back to > + * a folio in order to communicate with the rest of the mm. Please use this > + * helper function instead of casting yourself, as the implementation may change > + * in the future. > + */ > +#define slab_folio(s) (_Generic((s), \ > + const struct slab *: (const struct folio *)s, \ > + struct slab *: (struct folio *)s)) > + > +/** > + * page_slab - Converts from first struct page to slab. > + * @p: The first (either head of compound or single) page of slab. > + * > + * A temporary wrapper to convert struct page to struct slab in situations where > + * we know the page is the compound head, or single order-0 page. > + * > + * Long-term ideally everything would work with struct slab directly or go > + * through folio to struct slab. > + * > + * Return: The slab which contains this page > + */ > +#define page_slab(p) (_Generic((p), \ > + const struct page *: (const struct slab *)(p), \ > + struct page *: (struct slab *)(p))) > + > +/** > + * slab_page - The first struct page allocated for a slab > + * @slab: The slab. > + * > + * A convenience wrapper for converting slab to the first struct page of the > + * underlying folio, to communicate with code not yet converted to folio or > + * struct slab. > + */ > +#define slab_page(s) folio_page(slab_folio(s), 0) > + > +/* > + * If network-based swap is enabled, sl*b must keep track of whether pages > + * were allocated from pfmemalloc reserves. > + */ > +static inline bool slab_test_pfmemalloc(const struct slab *slab) > +{ > + return folio_test_active((struct folio *)slab_folio(slab)); > +} > + > +static inline void slab_set_pfmemalloc(struct slab *slab) > +{ > + folio_set_active(slab_folio(slab)); > +} > + > +static inline void slab_clear_pfmemalloc(struct slab *slab) > +{ > + folio_clear_active(slab_folio(slab)); > +} > + > +static inline void __slab_clear_pfmemalloc(struct slab *slab) > +{ > + __folio_clear_active(slab_folio(slab)); > +} > + > +static inline void *slab_address(const struct slab *slab) > +{ > + return folio_address(slab_folio(slab)); > +} > + > +static inline int slab_nid(const struct slab *slab) > +{ > + return folio_nid(slab_folio(slab)); > +} > + > +static inline pg_data_t *slab_pgdat(const struct slab *slab) > +{ > + return folio_pgdat(slab_folio(slab)); > +} > + > +static inline struct slab *virt_to_slab(const void *addr) > +{ > + struct folio *folio = virt_to_folio(addr); > + > + if (!folio_test_slab(folio)) > + return NULL; > + > + return folio_slab(folio); > +} > + > +static inline int slab_order(const struct slab *slab) > +{ > + return folio_order((struct folio *)slab_folio(slab)); > +} > + > +static inline size_t slab_size(const struct slab *slab) > +{ > + return PAGE_SIZE << slab_order(slab); > +} > + > #ifdef CONFIG_SLOB > /* > * Common fields provided in kmem_cache by all slab allocators > diff --git a/mm/slub.c b/mm/slub.c > index 2ccb1c71fc36..a211d96011ba 100644 > --- a/mm/slub.c > +++ b/mm/slub.c > @@ -3787,7 +3787,7 @@ static unsigned int slub_min_objects; > * requested a higher minimum order then we start with that one instead of > * the smallest order which will fit the object. > */ > -static inline unsigned int slab_order(unsigned int size, > +static inline unsigned int calc_slab_order(unsigned int size, > unsigned int min_objects, unsigned int max_order, > unsigned int fract_leftover) > { > @@ -3851,7 +3851,7 @@ static inline int calculate_order(unsigned int size) > > fraction = 16; > while (fraction >= 4) { > - order = slab_order(size, min_objects, > + order = calc_slab_order(size, min_objects, > slub_max_order, fraction); > if (order <= slub_max_order) > return order; > @@ -3864,14 +3864,14 @@ static inline int calculate_order(unsigned int size) > * We were unable to place multiple objects in a slab. Now > * lets see if we can place a single object there. > */ > - order = slab_order(size, 1, slub_max_order, 1); > + order = calc_slab_order(size, 1, slub_max_order, 1); > if (order <= slub_max_order) > return order; > > /* > * Doh this slab cannot be placed using slub_max_order. > */ > - order = slab_order(size, 1, MAX_ORDER, 1); > + order = calc_slab_order(size, 1, MAX_ORDER, 1); > if (order < MAX_ORDER) > return order; > return -ENOSYS; This patch looks good. Reviewed-by: Hyeonggon Yoo <42.hyeyoo@xxxxxxxxx> SL[AUO]B works fine on the top of this patch. Tested-by: Hyeonggon Yoo <42.hyeyoo@xxxxxxxxx> > -- > 2.34.1 >