[RFC v2 1/3] mm: move PG_slab flag to page_type

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



For now, only SLAB uses _mapcount field as a number of active objects in
a slab, and other slab allocators do not use it. As 16 bits are enough
for that, use remaining 16 bits of _mapcount as page_type even when
SLAB is used. And then move PG_slab flag to page_type.

As suggested by Matthew, store number of active objects in negative
form and use helper when accessing or modifying it.

Note that page_type is always placed in upper 16 bits of _mapcount to
avoid confusing normal _mapcount as page_type. As underflow (actually
I mean, yeah, overflow) is not a concern anymore, use more lower bits.

Add more folio helpers for PAGE_TYPE_OPS() not to break existing
slab implementations.

Remove PG_slab check from PAGE_FLAGS_CHECK_AT_FREE. buddy will still
check if _mapcount is properly set at free.

Exclude PG_slab from hwpoison and show_page_flags() for now.

Note that with this patch, page_mapped() and folio_mapped() always return
false for slab page.

Cc: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx>
Cc: Christoph Lameter <cl@xxxxxxxxx>
Cc: Pekka Enberg <penberg@xxxxxxxxxx>
Cc: David Rientjes <rientjes@xxxxxxxxxx>
Cc: Vlastimil Babka <vbabka@xxxxxxx>
Cc: Naoya Horiguchi <naoya.horiguchi@xxxxxxx>
Cc: Miaohe Lin <linmiaohe@xxxxxxxxxx>
Cc: "Matthew Wilcox (Oracle)" <willy@xxxxxxxxxxxxx>
Cc: Minchan Kim <minchan@xxxxxxxxxx>
Cc: Mel Gorman <mgorman@xxxxxxxxxxxxxxxxxxx>
Cc: Andrea Arcangeli <aarcange@xxxxxxxxxx>
Cc: Dan Williams <dan.j.williams@xxxxxxxxx>
Cc: Hugh Dickins <hughd@xxxxxxxxxx>
Cc: Muchun Song <songmuchun@xxxxxxxxxxxxx>
Cc: David Hildenbrand <david@xxxxxxxxxx>
Signed-off-by: Hyeonggon Yoo <42.hyeyoo@xxxxxxxxx>
---
 fs/proc/page.c                 | 13 ++----
 include/linux/mm_types.h       | 11 +++--
 include/linux/page-flags.h     | 77 ++++++++++++++++++++++++----------
 include/trace/events/mmflags.h |  1 -
 kernel/crash_core.c            |  3 +-
 mm/memory-failure.c            |  8 ----
 mm/slab.c                      | 44 ++++++++++++-------
 mm/slab.h                      |  3 +-
 8 files changed, 98 insertions(+), 62 deletions(-)

diff --git a/fs/proc/page.c b/fs/proc/page.c
index f2273b164535..101be8d5a74e 100644
--- a/fs/proc/page.c
+++ b/fs/proc/page.c
@@ -67,7 +67,7 @@ static ssize_t kpagecount_read(struct file *file, char __user *buf,
 		 */
 		ppage = pfn_to_online_page(pfn);
 
-		if (!ppage || PageSlab(ppage) || page_has_type(ppage))
+		if (!ppage || page_has_type(ppage))
 			pcount = 0;
 		else
 			pcount = page_mapcount(ppage);
@@ -124,11 +124,8 @@ u64 stable_page_flags(struct page *page)
 
 	/*
 	 * pseudo flags for the well known (anonymous) memory mapped pages
-	 *
-	 * Note that page->_mapcount is overloaded in SLOB/SLUB/SLQB, so the
-	 * simple test in page_mapped() is not enough.
 	 */
-	if (!PageSlab(page) && page_mapped(page))
+	if (page_mapped(page))
 		u |= 1 << KPF_MMAP;
 	if (PageAnon(page))
 		u |= 1 << KPF_ANON;
@@ -178,16 +175,14 @@ u64 stable_page_flags(struct page *page)
 		u |= 1 << KPF_OFFLINE;
 	if (PageTable(page))
 		u |= 1 << KPF_PGTABLE;
+	if (PageSlab(page))
+		u |= 1 << KPF_SLAB;
 
 	if (page_is_idle(page))
 		u |= 1 << KPF_IDLE;
 
 	u |= kpf_copy_bit(k, KPF_LOCKED,	PG_locked);
 
-	u |= kpf_copy_bit(k, KPF_SLAB,		PG_slab);
-	if (PageTail(page) && PageSlab(compound_head(page)))
-		u |= 1 << KPF_SLAB;
-
 	u |= kpf_copy_bit(k, KPF_ERROR,		PG_error);
 	u |= kpf_copy_bit(k, KPF_DIRTY,		PG_dirty);
 	u |= kpf_copy_bit(k, KPF_UPTODATE,	PG_uptodate);
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 834022721bc6..2f298d1b8cf5 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -196,10 +196,13 @@ struct page {
 		atomic_t _mapcount;
 
 		/*
-		 * If the page is neither PageSlab nor mappable to userspace,
-		 * the value stored here may help determine what this page
-		 * is used for.  See page-flags.h for a list of page types
-		 * which are currently stored here.
+		 * If the page is not mappable to userspace, the value
+		 * stored here may help determine what this page is used for.
+		 * See page-flags.h for a list of page types which are currently
+		 * stored here.
+		 *
+		 * Note that only upper half is used for page types and lower
+		 * half is reserved for SLAB.
 		 */
 		unsigned int page_type;
 	};
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 0b0ae5084e60..31dda492cda5 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -107,7 +107,6 @@ enum pageflags {
 	PG_workingset,
 	PG_waiters,		/* Page has waiters, check its waitqueue. Must be bit #7 and in the same byte as "PG_locked" */
 	PG_error,
-	PG_slab,
 	PG_owner_priv_1,	/* Owner use. If pagecache, fs may use*/
 	PG_arch_1,
 	PG_reserved,
@@ -484,7 +483,6 @@ PAGEFLAG(Active, active, PF_HEAD) __CLEARPAGEFLAG(Active, active, PF_HEAD)
 	TESTCLEARFLAG(Active, active, PF_HEAD)
 PAGEFLAG(Workingset, workingset, PF_HEAD)
 	TESTCLEARFLAG(Workingset, workingset, PF_HEAD)
-__PAGEFLAG(Slab, slab, PF_NO_TAIL)
 __PAGEFLAG(SlobFree, slob_free, PF_NO_TAIL)
 PAGEFLAG(Checked, checked, PF_NO_COMPOUND)	   /* Used by some filesystems */
 
@@ -926,42 +924,72 @@ static inline bool is_page_hwpoison(struct page *page)
 }
 
 /*
- * For pages that are never mapped to userspace (and aren't PageSlab),
- * page_type may be used.  Because it is initialised to -1, we invert the
- * sense of the bit, so __SetPageFoo *clears* the bit used for PageFoo, and
- * __ClearPageFoo *sets* the bit used for PageFoo.  We reserve a few high and
- * low bits so that an underflow or overflow of page_mapcount() won't be
- * mistaken for a page type value.
+ * For pages that are never mapped to userspace, page_type may be used.
+ * Because it is initialised to -1, we invert the sense of the bit,
+ * so __SetPageFoo *clears* the bit used for PageFoo, and __ClearPageFoo
+ * *sets* the bit used for PageFoo.  We reserve a few high and low bits
+ * so that an underflow or overflow of page_mapcount() won't be mistaken
+ * for a page type value.
  */
 
 #define PAGE_TYPE_BASE	0xf0000000
-/* Reserve		0x0000007f to catch underflows of page_mapcount */
-#define PAGE_MAPCOUNT_RESERVE	-128
-#define PG_buddy	0x00000080
-#define PG_offline	0x00000100
-#define PG_table	0x00000200
-#define PG_guard	0x00000400
+#define PG_buddy	0x00010000
+#define PG_offline	0x00020000
+#define PG_table	0x00040000
+#define PG_guard	0x00080000
+#define PG_slab		0x00100000
 
 #define PageType(page, flag)						\
 	((page->page_type & (PAGE_TYPE_BASE | flag)) == PAGE_TYPE_BASE)
 
-static inline int page_has_type(struct page *page)
+#define PAGE_TYPE_MASK	0xffff0000
+
+static inline bool page_type_has_type(unsigned int page_type)
 {
-	return (int)page->page_type < PAGE_MAPCOUNT_RESERVE;
+	return ((int)page_type < (int)PAGE_TYPE_MASK);
 }
 
-#define PAGE_TYPE_OPS(uname, lname)					\
+static inline bool page_has_type(struct page *page)
+{
+	return page_type_has_type(page->page_type);
+}
+
+
+#define PAGE_TYPE_OPS(uname, lname, policy)				\
 static __always_inline int Page##uname(struct page *page)		\
 {									\
+	page = policy(page, 0);						\
+	return PageType(page, PG_##lname);				\
+}									\
+static __always_inline int folio_test_##lname(struct folio *folio)	\
+{									\
+	struct page *page = &folio->page;				\
+									\
 	return PageType(page, PG_##lname);				\
 }									\
 static __always_inline void __SetPage##uname(struct page *page)		\
 {									\
+	page = policy(page, 1);						\
+	VM_BUG_ON_PAGE(!PageType(page, 0), page);			\
+	page->page_type &= ~PG_##lname;					\
+}									\
+static __always_inline void __folio_set_##lname(struct folio *folio)	\
+{									\
+	struct page *page = &folio->page;				\
+									\
 	VM_BUG_ON_PAGE(!PageType(page, 0), page);			\
 	page->page_type &= ~PG_##lname;					\
 }									\
 static __always_inline void __ClearPage##uname(struct page *page)	\
 {									\
+	page = policy(page, 1);						\
+	VM_BUG_ON_PAGE(!Page##uname(page), page);			\
+	page->page_type |= PG_##lname;					\
+}									\
+static __always_inline void __folio_clear_##lname(struct folio *folio)	\
+{									\
+	struct page *page = &folio->page;				\
+									\
 	VM_BUG_ON_PAGE(!Page##uname(page), page);			\
 	page->page_type |= PG_##lname;					\
 }
@@ -970,7 +998,7 @@ static __always_inline void __ClearPage##uname(struct page *page)	\
  * PageBuddy() indicates that the page is free and in the buddy system
  * (see mm/page_alloc.c).
  */
-PAGE_TYPE_OPS(Buddy, buddy)
+PAGE_TYPE_OPS(Buddy, buddy, PF_ANY)
 
 /*
  * PageOffline() indicates that the page is logically offline although the
@@ -994,7 +1022,10 @@ PAGE_TYPE_OPS(Buddy, buddy)
  * pages should check PageOffline() and synchronize with such drivers using
  * page_offline_freeze()/page_offline_thaw().
  */
-PAGE_TYPE_OPS(Offline, offline)
+PAGE_TYPE_OPS(Offline, offline, PF_ANY)
+
+/* PageSlab() indicates that the page is used by slab subsystem. */
+PAGE_TYPE_OPS(Slab, slab, PF_NO_TAIL)
 
 extern void page_offline_freeze(void);
 extern void page_offline_thaw(void);
@@ -1004,12 +1035,12 @@ extern void page_offline_end(void);
 /*
  * Marks pages in use as page tables.
  */
-PAGE_TYPE_OPS(Table, table)
+PAGE_TYPE_OPS(Table, table, PF_ANY)
 
 /*
  * Marks guardpages used with debug_pagealloc.
  */
-PAGE_TYPE_OPS(Guard, guard)
+PAGE_TYPE_OPS(Guard, guard, PF_ANY)
 
 extern bool is_free_buddy_page(struct page *page);
 
@@ -1057,8 +1088,8 @@ static __always_inline void __ClearPageAnonExclusive(struct page *page)
 	(1UL << PG_lru		| 1UL << PG_locked	|	\
 	 1UL << PG_private	| 1UL << PG_private_2	|	\
 	 1UL << PG_writeback	| 1UL << PG_reserved	|	\
-	 1UL << PG_slab		| 1UL << PG_active 	|	\
-	 1UL << PG_unevictable	| __PG_MLOCKED | LRU_GEN_MASK)
+	 1UL << PG_active 	| 1UL << PG_unevictable |	\
+	 __PG_MLOCKED | LRU_GEN_MASK)
 
 /*
  * Flags checked when a page is prepped for return by the page allocator.
diff --git a/include/trace/events/mmflags.h b/include/trace/events/mmflags.h
index 11524cda4a95..72c11a16f771 100644
--- a/include/trace/events/mmflags.h
+++ b/include/trace/events/mmflags.h
@@ -112,7 +112,6 @@
 	{1UL << PG_lru,			"lru"		},		\
 	{1UL << PG_active,		"active"	},		\
 	{1UL << PG_workingset,		"workingset"	},		\
-	{1UL << PG_slab,		"slab"		},		\
 	{1UL << PG_owner_priv_1,	"owner_priv_1"	},		\
 	{1UL << PG_arch_1,		"arch_1"	},		\
 	{1UL << PG_reserved,		"reserved"	},		\
diff --git a/kernel/crash_core.c b/kernel/crash_core.c
index a0eb4d5cf557..f72437e4192f 100644
--- a/kernel/crash_core.c
+++ b/kernel/crash_core.c
@@ -479,13 +479,14 @@ static int __init crash_save_vmcoreinfo_init(void)
 	VMCOREINFO_NUMBER(PG_private);
 	VMCOREINFO_NUMBER(PG_swapcache);
 	VMCOREINFO_NUMBER(PG_swapbacked);
-	VMCOREINFO_NUMBER(PG_slab);
 #ifdef CONFIG_MEMORY_FAILURE
 	VMCOREINFO_NUMBER(PG_hwpoison);
 #endif
 	VMCOREINFO_NUMBER(PG_head_mask);
 #define PAGE_BUDDY_MAPCOUNT_VALUE	(~PG_buddy)
 	VMCOREINFO_NUMBER(PAGE_BUDDY_MAPCOUNT_VALUE);
+#define PAGE_SLAB_MAPCOUNT_VALUE	(~PG_slab)
+	VMCOREINFO_NUMBER(PAGE_SLAB_MAPCOUNT_VALUE);
 #ifdef CONFIG_HUGETLB_PAGE
 	VMCOREINFO_NUMBER(HUGETLB_PAGE_DTOR);
 #define PAGE_OFFLINE_MAPCOUNT_VALUE	(~PG_offline)
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 779a426d2cab..9494f47c4cee 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -1145,7 +1145,6 @@ static int me_huge_page(struct page_state *ps, struct page *p)
 #define mlock		(1UL << PG_mlocked)
 #define lru		(1UL << PG_lru)
 #define head		(1UL << PG_head)
-#define slab		(1UL << PG_slab)
 #define reserved	(1UL << PG_reserved)
 
 static struct page_state error_states[] = {
@@ -1155,13 +1154,6 @@ static struct page_state error_states[] = {
 	 * PG_buddy pages only make a small fraction of all free pages.
 	 */
 
-	/*
-	 * Could in theory check if slab page is free or if we can drop
-	 * currently unused objects without touching them. But just
-	 * treat it as standard kernel for now.
-	 */
-	{ slab,		slab,		MF_MSG_SLAB,	me_kernel },
-
 	{ head,		head,		MF_MSG_HUGE,		me_huge_page },
 
 	{ sc|dirty,	sc|dirty,	MF_MSG_DIRTY_SWAPCACHE,	me_swapcache_dirty },
diff --git a/mm/slab.c b/mm/slab.c
index 59c8e28f7b6a..da12e82aba41 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -2265,6 +2265,21 @@ void __kmem_cache_release(struct kmem_cache *cachep)
 	}
 }
 
+static inline unsigned int slab_get_active(struct slab *slab)
+{
+	return ~(slab->page_type | PG_slab);
+}
+
+static inline void slab_inc_active(struct slab *slab)
+{
+	slab->page_type--;
+}
+
+static inline void slab_dec_active(struct slab *slab)
+{
+	slab->page_type++;
+}
+
 /*
  * Get the memory for a slab management obj.
  *
@@ -2287,7 +2302,6 @@ static void *alloc_slabmgmt(struct kmem_cache *cachep,
 	void *addr = slab_address(slab);
 
 	slab->s_mem = addr + colour_off;
-	slab->active = 0;
 
 	if (OBJFREELIST_SLAB(cachep))
 		freelist = NULL;
@@ -2506,8 +2520,8 @@ static void *slab_get_obj(struct kmem_cache *cachep, struct slab *slab)
 {
 	void *objp;
 
-	objp = index_to_obj(cachep, slab, get_free_obj(slab, slab->active));
-	slab->active++;
+	objp = index_to_obj(cachep, slab, get_free_obj(slab, slab_get_active(slab)));
+	slab_inc_active(slab);
 
 	return objp;
 }
@@ -2520,7 +2534,7 @@ static void slab_put_obj(struct kmem_cache *cachep,
 	unsigned int i;
 
 	/* Verify double free bug */
-	for (i = slab->active; i < cachep->num; i++) {
+	for (i = slab_get_active(slab); i < cachep->num; i++) {
 		if (get_free_obj(slab, i) == objnr) {
 			pr_err("slab: double free detected in cache '%s', objp %px\n",
 			       cachep->name, objp);
@@ -2528,11 +2542,11 @@ static void slab_put_obj(struct kmem_cache *cachep,
 		}
 	}
 #endif
-	slab->active--;
+	slab_dec_active(slab);
 	if (!slab->freelist)
 		slab->freelist = objp + obj_offset(cachep);
 
-	set_free_obj(slab, slab->active, objnr);
+	set_free_obj(slab, slab_get_active(slab), objnr);
 }
 
 /*
@@ -2631,14 +2645,14 @@ static void cache_grow_end(struct kmem_cache *cachep, struct slab *slab)
 
 	spin_lock(&n->list_lock);
 	n->total_slabs++;
-	if (!slab->active) {
+	if (!slab_get_active(slab)) {
 		list_add_tail(&slab->slab_list, &n->slabs_free);
 		n->free_slabs++;
 	} else
 		fixup_slab_list(cachep, n, slab, &list);
 
 	STATS_INC_GROWN(cachep);
-	n->free_objects += cachep->num - slab->active;
+	n->free_objects += cachep->num - slab_get_active(slab);
 	spin_unlock(&n->list_lock);
 
 	fixup_objfreelist_debug(cachep, &list);
@@ -2740,7 +2754,7 @@ static inline void fixup_slab_list(struct kmem_cache *cachep,
 {
 	/* move slabp to correct slabp list: */
 	list_del(&slab->slab_list);
-	if (slab->active == cachep->num) {
+	if (slab_get_active(slab) == cachep->num) {
 		list_add(&slab->slab_list, &n->slabs_full);
 		if (OBJFREELIST_SLAB(cachep)) {
 #if DEBUG
@@ -2779,7 +2793,7 @@ static noinline struct slab *get_valid_first_slab(struct kmem_cache_node *n,
 
 	/* Move pfmemalloc slab to the end of list to speed up next search */
 	list_del(&slab->slab_list);
-	if (!slab->active) {
+	if (!slab_get_active(slab)) {
 		list_add_tail(&slab->slab_list, &n->slabs_free);
 		n->free_slabs++;
 	} else
@@ -2861,9 +2875,9 @@ static __always_inline int alloc_block(struct kmem_cache *cachep,
 	 * There must be at least one object available for
 	 * allocation.
 	 */
-	BUG_ON(slab->active >= cachep->num);
+	BUG_ON(slab_get_active(slab) >= cachep->num);
 
-	while (slab->active < cachep->num && batchcount--) {
+	while (slab_get_active(slab) < cachep->num && batchcount--) {
 		STATS_INC_ALLOCED(cachep);
 		STATS_INC_ACTIVE(cachep);
 		STATS_SET_HIGH(cachep);
@@ -3158,7 +3172,7 @@ static void *____cache_alloc_node(struct kmem_cache *cachep, gfp_t flags,
 	STATS_INC_ACTIVE(cachep);
 	STATS_SET_HIGH(cachep);
 
-	BUG_ON(slab->active == cachep->num);
+	BUG_ON(slab_get_active(slab) == cachep->num);
 
 	obj = slab_get_obj(cachep, slab);
 	n->free_objects--;
@@ -3292,7 +3306,7 @@ static void free_block(struct kmem_cache *cachep, void **objpp,
 		STATS_DEC_ACTIVE(cachep);
 
 		/* fixup slab chains */
-		if (slab->active == 0) {
+		if (slab_get_active(slab) == 0) {
 			list_add(&slab->slab_list, &n->slabs_free);
 			n->free_slabs++;
 		} else {
@@ -3347,7 +3361,7 @@ static void cache_flusharray(struct kmem_cache *cachep, struct array_cache *ac)
 		struct slab *slab;
 
 		list_for_each_entry(slab, &n->slabs_free, slab_list) {
-			BUG_ON(slab->active);
+			BUG_ON(slab_get_active(slab);
 
 			i++;
 		}
diff --git a/mm/slab.h b/mm/slab.h
index 0202a8c2f0d2..f9df0fc3a918 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -18,7 +18,8 @@ struct slab {
 	struct kmem_cache *slab_cache;
 	void *freelist;	/* array of free object indexes */
 	void *s_mem;	/* first object */
-	unsigned int active;
+	/* lower half of page_type is used as active objects counter */
+	unsigned int page_type;
 
 #elif defined(CONFIG_SLUB)
 
-- 
2.32.0





[Index of Archives]     [Linux ARM Kernel]     [Linux ARM]     [Linux Omap]     [Fedora ARM]     [IETF Annouce]     [Bugtraq]     [Linux OMAP]     [Linux MIPS]     [eCos]     [Asterisk Internet PBX]     [Linux API]

  Powered by Linux