On 2024/4/18 17:39, Yunsheng Lin wrote: ... > >> combining the pagecnt_bias with the va. I'm wondering if it wouldn't >> make more sense to look at putting together the structure something >> like: >> >> #if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE) >> typedef u16 page_frag_bias_t; >> #else >> typedef u32 page_frag_bias_t; >> #endif >> >> struct page_frag_cache { >> /* page address and offset */ >> void *va; > > Generally I am agreed with combining the virtual address with the > offset for the reason you mentioned below. > >> page_frag_bias_t pagecnt_bias; >> u8 pfmemalloc; >> u8 page_frag_order; >> } > > The issue with the 'page_frag_order' I see is that we might need to do > a 'PAGE << page_frag_order' to get actual size, and we might also need > to do 'size - 1' to get the size_mask if we want to mask out the offset > from the 'va'. > > For page_frag_order, we need to: > size = PAGE << page_frag_order > size_mask = size - 1 > > For size_mask, it seem we only need to do: > size = size_mask + 1 > > And as PAGE_FRAG_CACHE_MAX_SIZE = 32K, which can be fitted into 15 bits > if we use size_mask instead of size. > > Does it make sense to use below, so that we only need to use bitfield > for SIZE < PAGE_FRAG_CACHE_MAX_SIZE in 32 bits system? And 'struct > page_frag' is using a similar '(BITS_PER_LONG > 32)' checking trick. > > struct page_frag_cache { > /* page address and offset */ > void *va; > > #if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE) && (BITS_PER_LONG <= 32) > u16 pagecnt_bias; > u16 size_mask:15; > u16 pfmemalloc:1; > #else > u32 pagecnt_bias; > u16 size_mask; > u16 pfmemalloc; > #endif > }; > After considering a few different layouts for 'struct page_frag_cache', it seems the below is more optimized: struct page_frag_cache { /* page address & pfmemalloc & order */ void *va; #if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE) && (BITS_PER_LONG <= 32) u16 pagecnt_bias; u16 size; #else u32 pagecnt_bias; u32 size; #endif } The lower bits of 'va' is or'ed with the page order & pfmemalloc instead of offset or pagecnt_bias, so that we don't have to add more checking for handling the problem of not having enough space for offset or pagecnt_bias for PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE and 32 bits system. And page address & pfmemalloc & order is unchanged for the same page in the same 'page_frag_cache' instance, it makes sense to fit them together. Also, it seems it is better to replace 'offset' with 'size', which indicates the remaining size for the cache in a 'page_frag_cache' instance, and we might be able to do a single 'size >= fragsz' checking for the case of cache being enough, which should be the fast path if we ensure size is zoro when 'va' == NULL. Something like below: #define PAGE_FRAG_CACHE_ORDER_MASK GENMASK(1, 0) #define PAGE_FRAG_CACHE_PFMEMALLOC_BIT BIT(2) struct page_frag_cache { /* page address & pfmemalloc & order */ void *va; #if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE) && (BITS_PER_LONG <= 32) u16 pagecnt_bias; u16 size; #else u32 pagecnt_bias; u32 size; #endif }; static void *__page_frag_cache_refill(struct page_frag_cache *nc, unsigned int fragsz, gfp_t gfp_mask, unsigned int align_mask) { gfp_t gfp = gfp_mask; struct page *page; void *va; #if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE) /* Ensure free_unref_page() can be used to free the page fragment */ BUILD_BUG_ON(PAGE_FRAG_CACHE_MAX_ORDER > PAGE_ALLOC_COSTLY_ORDER); gfp_mask = (gfp_mask & ~__GFP_DIRECT_RECLAIM) | __GFP_COMP | __GFP_NOWARN | __GFP_NORETRY | __GFP_NOMEMALLOC; page = alloc_pages_node(NUMA_NO_NODE, gfp_mask, PAGE_FRAG_CACHE_MAX_ORDER); if (likely(page)) { nc->size = PAGE_FRAG_CACHE_MAX_SIZE - fragsz; va = page_address(page); nc->va = (void *)((unsigned long)va | PAGE_FRAG_CACHE_MAX_ORDER | (page_is_pfmemalloc(page) ? PAGE_FRAG_CACHE_PFMEMALLOC_BIT : 0)); page_ref_add(page, PAGE_FRAG_CACHE_MAX_SIZE); nc->pagecnt_bias = PAGE_FRAG_CACHE_MAX_SIZE; return va; } #endif page = alloc_pages_node(NUMA_NO_NODE, gfp, 0); if (likely(page)) { nc->size = PAGE_SIZE - fragsz; va = page_address(page); nc->va = (void *)((unsigned long)va | (page_is_pfmemalloc(page) ? PAGE_FRAG_CACHE_PFMEMALLOC_BIT : 0)); page_ref_add(page, PAGE_FRAG_CACHE_MAX_SIZE); nc->pagecnt_bias = PAGE_FRAG_CACHE_MAX_SIZE; return va; } nc->va = NULL; nc->size = 0; return NULL; } void *__page_frag_alloc_va_align(struct page_frag_cache *nc, unsigned int fragsz, gfp_t gfp_mask, unsigned int align_mask) { #if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE) unsigned long page_order; #endif unsigned long page_size; unsigned long size; struct page *page; void *va; size = nc->size & align_mask; va = nc->va; #if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE) page_order = (unsigned long)va & PAGE_FRAG_CACHE_ORDER_MASK; page_size = PAGE_SIZE << page_order; #else page_size = PAGE_SIZE; #endif if (unlikely(fragsz > size)) { if (unlikely(!va)) return __page_frag_cache_refill(nc, fragsz, gfp_mask, align_mask); /* fragsz is not supposed to be bigger than PAGE_SIZE as we are * allowing order 3 page allocation to fail easily under low * memory condition. */ if (WARN_ON_ONCE(fragsz > PAGE_SIZE)) return NULL; page = virt_to_page(va); if (!page_ref_sub_and_test(page, nc->pagecnt_bias)) return __page_frag_cache_refill(nc, fragsz, gfp_mask, align_mask); if (unlikely((unsigned long)va & PAGE_FRAG_CACHE_PFMEMALLOC_BIT)) { free_unref_page(page, compound_order(page)); return __page_frag_cache_refill(nc, fragsz, gfp_mask, align_mask); } /* OK, page count is 0, we can safely set it */ set_page_count(page, PAGE_FRAG_CACHE_MAX_SIZE + 1); /* reset page count bias and offset to start of new frag */ nc->pagecnt_bias = PAGE_FRAG_CACHE_MAX_SIZE + 1; size = page_size; } va = (void *)((unsigned long)va & PAGE_MASK); va = va + (page_size - size); nc->size = size - fragsz; nc->pagecnt_bias--; return va; } EXPORT_SYMBOL(__page_frag_alloc_va_align);