On Sat, Oct 5, 2024 at 6:06 AM Yunsheng Lin <yunshenglin0825@xxxxxxxxx> wrote: > > On 10/4/2024 6:40 AM, Alexander Duyck wrote: > > On Tue, Oct 1, 2024 at 12:59 AM Yunsheng Lin <yunshenglin0825@xxxxxxxxx> wrote: > >> > >> Currently there is one 'struct page_frag' for every 'struct > >> sock' and 'struct task_struct', we are about to replace the > >> 'struct page_frag' with 'struct page_frag_cache' for them. > >> Before begin the replacing, we need to ensure the size of > >> 'struct page_frag_cache' is not bigger than the size of > >> 'struct page_frag', as there may be tens of thousands of > >> 'struct sock' and 'struct task_struct' instances in the > >> system. > >> > >> By or'ing the page order & pfmemalloc with lower bits of > >> 'va' instead of using 'u16' or 'u32' for page size and 'u8' > >> for pfmemalloc, we are able to avoid 3 or 5 bytes space waste. > >> And page address & pfmemalloc & order is unchanged for the > >> same page in the same 'page_frag_cache' instance, it makes > >> sense to fit them together. > >> > >> After this patch, the size of 'struct page_frag_cache' should be > >> the same as the size of 'struct page_frag'. > >> > >> CC: Alexander Duyck <alexander.duyck@xxxxxxxxx> > >> Signed-off-by: Yunsheng Lin <linyunsheng@xxxxxxxxxx> > >> --- > >> include/linux/mm_types_task.h | 19 +++++---- > >> include/linux/page_frag_cache.h | 26 +++++++++++- > >> mm/page_frag_cache.c | 75 +++++++++++++++++++++++---------- > >> 3 files changed, 88 insertions(+), 32 deletions(-) > >> > >> diff --git a/include/linux/mm_types_task.h b/include/linux/mm_types_task.h > >> index 0ac6daebdd5c..a82aa80c0ba4 100644 > >> --- a/include/linux/mm_types_task.h > >> +++ b/include/linux/mm_types_task.h > >> @@ -47,18 +47,21 @@ struct page_frag { > >> #define PAGE_FRAG_CACHE_MAX_SIZE __ALIGN_MASK(32768, ~PAGE_MASK) > >> #define PAGE_FRAG_CACHE_MAX_ORDER get_order(PAGE_FRAG_CACHE_MAX_SIZE) > >> struct page_frag_cache { > >> - void *va; > >> -#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE) > >> + /* encoded_page consists of the virtual address, pfmemalloc bit and > >> + * order of a page. > >> + */ > >> + unsigned long encoded_page; > >> + > >> + /* we maintain a pagecount bias, so that we dont dirty cache line > >> + * containing page->_refcount every time we allocate a fragment. > >> + */ > >> +#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE) && (BITS_PER_LONG <= 32) > >> __u16 offset; > >> - __u16 size; > >> + __u16 pagecnt_bias; > >> #else > >> __u32 offset; > >> + __u32 pagecnt_bias; > >> #endif > >> - /* we maintain a pagecount bias, so that we dont dirty cache line > >> - * containing page->_refcount every time we allocate a fragment. > >> - */ > >> - unsigned int pagecnt_bias; > >> - bool pfmemalloc; > >> }; > >> > >> /* Track pages that require TLB flushes */ > >> diff --git a/include/linux/page_frag_cache.h b/include/linux/page_frag_cache.h > >> index 0a52f7a179c8..75aaad6eaea2 100644 > >> --- a/include/linux/page_frag_cache.h > >> +++ b/include/linux/page_frag_cache.h > >> @@ -3,18 +3,40 @@ > >> #ifndef _LINUX_PAGE_FRAG_CACHE_H > >> #define _LINUX_PAGE_FRAG_CACHE_H > >> > >> +#include <linux/bits.h> > >> #include <linux/log2.h> > >> #include <linux/mm_types_task.h> > >> #include <linux/types.h> > >> > >> +#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE) > >> +/* Use a full byte here to enable assembler optimization as the shift > >> + * operation is usually expecting a byte. > >> + */ > >> +#define PAGE_FRAG_CACHE_ORDER_MASK GENMASK(7, 0) > >> +#define PAGE_FRAG_CACHE_PFMEMALLOC_SHIFT 8 > >> +#define PAGE_FRAG_CACHE_PFMEMALLOC_BIT BIT(PAGE_FRAG_CACHE_PFMEMALLOC_SHIFT) > >> +#else > >> +/* Compiler should be able to figure out we don't read things as any value > >> + * ANDed with 0 is 0. > >> + */ > >> +#define PAGE_FRAG_CACHE_ORDER_MASK 0 > >> +#define PAGE_FRAG_CACHE_PFMEMALLOC_SHIFT 0 > >> +#define PAGE_FRAG_CACHE_PFMEMALLOC_BIT BIT(PAGE_FRAG_CACHE_PFMEMALLOC_SHIFT) > >> +#endif > >> + > > > > Minor nit on this. You probably only need to have > > PAGE_FRAG_CACHE_ORDER_SHIFT defined in the ifdef. The PFMEMALLOC bit > > I guess you meant PAGE_FRAG_CACHE_ORDER_MASK here instead of > PAGE_FRAG_CACHE_ORDER_SHIFT, as the ORDER_SHIFT is always > zero? Yes. > > code is the same in both so you could pull it out. > > > > Also depending on how you defined it you could just define the > > PFMEMALLOC_BIT as the ORDER_MASK + 1. > > But the PFMEMALLOC_SHIFT still need to be defined as it is used in > page_frag_encode_page(), right? I am not sure if I understand what is > the point of defining the PFMEMALLOC_BIT as the ORDER_MASK + 1 instead > of defining the PFMEMALLOC_BIT as BIT(PAGE_FRAG_CACHE_PFMEMALLOC_SHIFT) > here. Actually the shift probably isn't needed. Since it is a single bit value you could just use a multiply by the bit and it would accomplish the same thing as the shift and would likely be converted to the same assembler code.