On Wed, Jul 24, 2024 at 03:03:59PM GMT, Baolin Wang wrote: > From: Daniel Gomez <da.gomez@xxxxxxxxxxx> > > Add large folio support for shmem write and fallocate paths matching the > same high order preference mechanism used in the iomap buffered IO path > as used in __filemap_get_folio(). > > Add shmem_mapping_size_order() to get a hint for the order of the folio > based on the file size which takes care of the mapping requirements. > > Swap does not support high order folios for now, so make it order-0 in > case swap is enabled. > > If the top level huge page (controlled by '/sys/kernel/mm/transparent_hugepage/shmem_enabled') > is enabled, we just allow PMD sized THP to keep interface backward > compatibility. > > Co-developed-by: Baolin Wang <baolin.wang@xxxxxxxxxxxxxxxxx> > Signed-off-by: Daniel Gomez <da.gomez@xxxxxxxxxxx> > Signed-off-by: Baolin Wang <baolin.wang@xxxxxxxxxxxxxxxxx> > --- > include/linux/shmem_fs.h | 4 +-- > mm/huge_memory.c | 2 +- > mm/shmem.c | 57 ++++++++++++++++++++++++++++++++++++---- > 3 files changed, 55 insertions(+), 8 deletions(-) > > diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h > index 34beaca2f853..fb0771218f1b 100644 > --- a/include/linux/shmem_fs.h > +++ b/include/linux/shmem_fs.h > @@ -113,11 +113,11 @@ int shmem_unuse(unsigned int type); > #ifdef CONFIG_TRANSPARENT_HUGEPAGE > unsigned long shmem_allowable_huge_orders(struct inode *inode, > struct vm_area_struct *vma, pgoff_t index, > - bool shmem_huge_force); > + bool shmem_huge_force, size_t len); > #else > static inline unsigned long shmem_allowable_huge_orders(struct inode *inode, > struct vm_area_struct *vma, pgoff_t index, > - bool shmem_huge_force) > + bool shmem_huge_force, size_t len) > { > return 0; > } > diff --git a/mm/huge_memory.c b/mm/huge_memory.c > index e555fcdd19d4..a8fc3b9e4034 100644 > --- a/mm/huge_memory.c > +++ b/mm/huge_memory.c > @@ -162,7 +162,7 @@ unsigned long __thp_vma_allowable_orders(struct vm_area_struct *vma, > if (!in_pf && shmem_file(vma->vm_file)) > return shmem_allowable_huge_orders(file_inode(vma->vm_file), > vma, vma->vm_pgoff, > - !enforce_sysfs); > + !enforce_sysfs, PAGE_SIZE); > > if (!vma_is_anonymous(vma)) { > /* > diff --git a/mm/shmem.c b/mm/shmem.c > index 92ed09527682..cc0c1b790267 100644 > --- a/mm/shmem.c > +++ b/mm/shmem.c > @@ -1630,10 +1630,47 @@ static gfp_t limit_gfp_mask(gfp_t huge_gfp, gfp_t limit_gfp) > return result; > } > > +/** > + * shmem_mapping_size_order - Get maximum folio order for the given file size. > + * @mapping: Target address_space. > + * @index: The page index. > + * @size: The suggested size of the folio to create. > + * > + * This returns a high order for folios (when supported) based on the file size > + * which the mapping currently allows at the given index. The index is relevant > + * due to alignment considerations the mapping might have. The returned order > + * may be less than the size passed. > + * > + * Like __filemap_get_folio order calculation. > + * > + * Return: The order. > + */ > +static inline unsigned int > +shmem_mapping_size_order(struct address_space *mapping, pgoff_t index, > + size_t size, struct shmem_sb_info *sbinfo) > +{ > + unsigned int order = ilog2(size); > + > + if ((order <= PAGE_SHIFT) || > + (!mapping_large_folio_support(mapping) || !sbinfo->noswap)) > + return 0; > + > + order -= PAGE_SHIFT; > + > + /* If we're not aligned, allocate a smaller folio */ > + if (index & ((1UL << order) - 1)) > + order = __ffs(index); > + > + order = min_t(size_t, order, MAX_PAGECACHE_ORDER); > + > + /* Order-1 not supported due to THP dependency */ > + return (order == 1) ? 0 : order; > +} I have an updated version of shmem_mapping_size_order() that I didn't posted but uses get_order() instead as suggested in [1]: [1] https://lore.kernel.org/all/ZT7rd3CSr+VnKj7v@xxxxxxxxxxxxxxxxxxxx/ /** * shmem_mapping_size_order - Get maximum folio order for the given file size. * @mapping: Target address_space. * @index: The page index. * @size: The suggested size of the folio to create. * * This returns a high order for folios (when supported) based on the file size * which the mapping currently allows at the given index. The index is relevant * due to alignment considerations the mapping might have. The returned order * may be less than the size passed. * * Like __filemap_get_folio order calculation. * * Return: The order. */ static inline unsigned int shmem_mapping_size_order(struct address_space *mapping, pgoff_t index, size_t size) * Return: The order. */ static inline unsigned int shmem_mapping_size_order(struct address_space *mapping, pgoff_t index, size_t size) { unsigned int order = get_order(max_t(size_t, size, PAGE_SIZE)); if (!mapping_large_folio_support(mapping)) return 0; /* If we're not aligned, allocate a smaller folio */ if (index & ((1UL << order) - 1)) order = __ffs(index); return min_t(size_t, order, MAX_PAGECACHE_ORDER); } order-1 is already supported by commit [2], so I've removed that condition as well. [2] 8897277acfef7f70fdecc054073bea2542fc7a1b ("mm: support order-1 folios in the page cache"). > + > #ifdef CONFIG_TRANSPARENT_HUGEPAGE > unsigned long shmem_allowable_huge_orders(struct inode *inode, > struct vm_area_struct *vma, pgoff_t index, > - bool shmem_huge_force) > + bool shmem_huge_force, size_t len) > { > unsigned long mask = READ_ONCE(huge_shmem_orders_always); > unsigned long within_size_orders = READ_ONCE(huge_shmem_orders_within_size); > @@ -1659,10 +1696,20 @@ unsigned long shmem_allowable_huge_orders(struct inode *inode, > vma, vm_flags); > if (!vma || !vma_is_anon_shmem(vma)) { > /* > - * For tmpfs, we now only support PMD sized THP if huge page > - * is enabled, otherwise fallback to order 0. > + * For tmpfs, if top level huge page is enabled, we just allow > + * PMD size THP to keep interface backward compatibility. > + */ > + if (global_huge) > + return BIT(HPAGE_PMD_ORDER); > + > + /* > + * Otherwise, get a highest order hint based on the size of > + * write and fallocate paths, then will try each allowable > + * huge orders. > */ > - return global_huge ? BIT(HPAGE_PMD_ORDER) : 0; > + order = shmem_mapping_size_order(inode->i_mapping, index, > + len, SHMEM_SB(inode->i_sb)); > + return BIT(order + 1) - 1; > } > > /* > @@ -2174,7 +2221,7 @@ static int shmem_get_folio_gfp(struct inode *inode, pgoff_t index, > } > > /* Find hugepage orders that are allowed for anonymous shmem and tmpfs. */ > - orders = shmem_allowable_huge_orders(inode, vma, index, false); > + orders = shmem_allowable_huge_orders(inode, vma, index, false, len); > if (orders > 0) { > gfp_t huge_gfp; > > -- > 2.39.3 >