On 2024/8/26 06:05, Hugh Dickins wrote:
On Mon, 12 Aug 2024, Baolin Wang wrote:
To support large folio swapin for shmem in the following patches, add
large folio allocation for the new replacement folio in shmem_replace_folio().
Moreover large folios occupy N consecutive entries in the swap cache
instead of using multi-index entries like the page cache, therefore
we should replace each consecutive entries in the swap cache instead
of using the shmem_replace_entry().
As well as updating statistics and folio reference count using the number
of pages in the folio.
Signed-off-by: Baolin Wang <baolin.wang@xxxxxxxxxxxxxxxxx>
---
mm/shmem.c | 54 +++++++++++++++++++++++++++++++-----------------------
1 file changed, 31 insertions(+), 23 deletions(-)
diff --git a/mm/shmem.c b/mm/shmem.c
index f6bab42180ea..d94f02ad7bd1 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1889,28 +1889,24 @@ static bool shmem_should_replace_folio(struct folio *folio, gfp_t gfp)
static int shmem_replace_folio(struct folio **foliop, gfp_t gfp,
struct shmem_inode_info *info, pgoff_t index)
{
- struct folio *old, *new;
- struct address_space *swap_mapping;
- swp_entry_t entry;
- pgoff_t swap_index;
- int error;
-
- old = *foliop;
- entry = old->swap;
- swap_index = swap_cache_index(entry);
- swap_mapping = swap_address_space(entry);
+ struct folio *new, *old = *foliop;
+ swp_entry_t entry = old->swap;
+ struct address_space *swap_mapping = swap_address_space(entry);
+ pgoff_t swap_index = swap_cache_index(entry);
+ XA_STATE(xas, &swap_mapping->i_pages, swap_index);
+ int nr_pages = folio_nr_pages(old);
+ int error = 0, i;
/*
* We have arrived here because our zones are constrained, so don't
* limit chance of success by further cpuset and node constraints.
*/
gfp &= ~GFP_CONSTRAINT_MASK;
- VM_BUG_ON_FOLIO(folio_test_large(old), old);
- new = shmem_alloc_folio(gfp, 0, info, index);
+ new = shmem_alloc_folio(gfp, folio_order(old), info, index);
It is not clear to me whether folio_order(old) will ever be more than 0
here: but if it can be, then care will need to be taken over the gfp flags,
With this patch set, it can be a large folio. If a large folio still
exists in the swap cache, we will get a large folio during swap in.
And yes, the gfp flags should be updated. How about the following fix?
that they are suited to allocating the large folio; and there will need to
be (could be awkward!) fallback to order 0 when that allocation fails.
I do not think we should fallback to order 0 for a large folio, which
will introduce more complex logic, for example, we should split the
original large swap entries in shmem mapping, and it is tricky to free
large swap entries, etc. So I want to keept it simple now.
My own testing never comes to shmem_replace_folio(): it was originally for
one lowend graphics driver; but IIRC there's now a more common case for it.
Good to know. Thank you very much for your valuable input.
[PATCH] mm: shmem: fix the gfp flag for large folio allocation
In shmem_replace_folio(), it may be necessary to allocate a large folio,
so we should update the gfp flags to ensure it is suitable for
allocating the large folio.
Signed-off-by: Baolin Wang <baolin.wang@xxxxxxxxxxxxxxxxx>
---
mm/shmem.c | 18 +++++++++++++-----
1 file changed, 13 insertions(+), 5 deletions(-)
diff --git a/mm/shmem.c b/mm/shmem.c
index dd384d4ab035..d8038a66b110 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -155,7 +155,7 @@ static unsigned long shmem_default_max_inodes(void)
static int shmem_swapin_folio(struct inode *inode, pgoff_t index,
struct folio **foliop, enum sgp_type sgp, gfp_t
gfp,
- struct mm_struct *fault_mm, vm_fault_t *fault_type);
+ struct vm_area_struct *vma, vm_fault_t *fault_type);
static inline struct shmem_sb_info *SHMEM_SB(struct super_block *sb)
{
@@ -1887,7 +1887,8 @@ static bool shmem_should_replace_folio(struct
folio *folio, gfp_t gfp)
}
static int shmem_replace_folio(struct folio **foliop, gfp_t gfp,
- struct shmem_inode_info *info, pgoff_t
index)
+ struct shmem_inode_info *info, pgoff_t
index,
+ struct vm_area_struct *vma)
{
struct folio *new, *old = *foliop;
swp_entry_t entry = old->swap;
@@ -1902,6 +1903,12 @@ static int shmem_replace_folio(struct folio
**foliop, gfp_t gfp,
* limit chance of success by further cpuset and node constraints.
*/
gfp &= ~GFP_CONSTRAINT_MASK;
+ if (nr_pages > 1) {
+ gfp_t huge_gfp = vma_thp_gfp_mask(vma);
+
+ gfp = limit_gfp_mask(huge_gfp, gfp);
+ }
+
new = shmem_alloc_folio(gfp, folio_order(old), info, index);
if (!new)
return -ENOMEM;
@@ -2073,10 +2080,11 @@ static int shmem_split_large_entry(struct inode
*inode, pgoff_t index,
*/
static int shmem_swapin_folio(struct inode *inode, pgoff_t index,
struct folio **foliop, enum sgp_type sgp,
- gfp_t gfp, struct mm_struct *fault_mm,
+ gfp_t gfp, struct vm_area_struct *vma,
vm_fault_t *fault_type)
{
struct address_space *mapping = inode->i_mapping;
+ struct mm_struct *fault_mm = vma ? vma->vm_mm : NULL;
struct shmem_inode_info *info = SHMEM_I(inode);
struct swap_info_struct *si;
struct folio *folio = NULL;
@@ -2162,7 +2170,7 @@ static int shmem_swapin_folio(struct inode *inode,
pgoff_t index,
arch_swap_restore(folio_swap(swap, folio), folio);
if (shmem_should_replace_folio(folio, gfp)) {
- error = shmem_replace_folio(&folio, gfp, info, index);
+ error = shmem_replace_folio(&folio, gfp, info, index, vma);
if (error)
goto failed;
}
@@ -2243,7 +2251,7 @@ static int shmem_get_folio_gfp(struct inode
*inode, pgoff_t index,
if (xa_is_value(folio)) {
error = shmem_swapin_folio(inode, index, &folio,
- sgp, gfp, fault_mm, fault_type);
+ sgp, gfp, vma, fault_type);
if (error == -EEXIST)
goto repeat;