During our tests in containers, there is a read-only file (i.e., shared libraies) in the overlayfs filesystem, and the underlying filesystem is ext4, which supports large folio. We mmap the file with PROT_READ prot, and then call madvise(MADV_COLLAPSE) for it. However, the madvise call fails and returns EINVAL. The reason is that the mapping address isn't aligned to PMD size. Since overlayfs doesn't support large folio, __get_unmapped_area() doesn't call thp_get_unmapped_area() to get a THP aligned address. To fix it, call get_unmapped_area() with the realfile. Besides, since overlayfs may be built with CONFIG_OVERLAY_FS=m, we should export get_unmapped_area(). Signed-off-by: Jinjiang Tu <tujinjiang@xxxxxxxxxx> --- fs/overlayfs/file.c | 20 ++++++++++++++++++++ mm/mmap.c | 1 + 2 files changed, 21 insertions(+) diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c index 969b458100fe..d0dcf675ebe8 100644 --- a/fs/overlayfs/file.c +++ b/fs/overlayfs/file.c @@ -653,6 +653,25 @@ static int ovl_flush(struct file *file, fl_owner_t id) return err; } +static unsigned long ovl_get_unmapped_area(struct file *file, + unsigned long addr, unsigned long len, unsigned long pgoff, + unsigned long flags) +{ + struct file *realfile; + const struct cred *old_cred; + unsigned long ret; + + realfile = ovl_real_file(file); + if (IS_ERR(realfile)) + return PTR_ERR(realfile); + + old_cred = ovl_override_creds(file_inode(file)->i_sb); + ret = get_unmapped_area(realfile, addr, len, pgoff, flags); + ovl_revert_creds(old_cred); + + return ret; +} + const struct file_operations ovl_file_operations = { .open = ovl_open, .release = ovl_release, @@ -661,6 +680,7 @@ const struct file_operations ovl_file_operations = { .write_iter = ovl_write_iter, .fsync = ovl_fsync, .mmap = ovl_mmap, + .get_unmapped_area = ovl_get_unmapped_area, .fallocate = ovl_fallocate, .fadvise = ovl_fadvise, .flush = ovl_flush, diff --git a/mm/mmap.c b/mm/mmap.c index 16f8e8be01f8..60eb1ff7c9a8 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -913,6 +913,7 @@ __get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, error = security_mmap_addr(addr); return error ? error : addr; } +EXPORT_SYMBOL(__get_unmapped_area); unsigned long mm_get_unmapped_area(struct mm_struct *mm, struct file *file, -- 2.34.1