The routine is_file_hugepages() checks f_op == hugetlbfs_file_operations to determine if the file resides in hugetlbfs. This is problematic when the file is on a union or overlay. Instead, define a new file mode FMODE_HUGETLBFS which is set when a hugetlbfs file is opened. The mode can easily be copied to other 'files' derived from the original hugetlbfs file. With this change hugetlbfs_file_operations can be static as it should be. There is also a (duplicate) set of shm file operations used for the routine is_file_shm_hugepages(). Instead of setting/using special f_op's, just propagate the FMODE_HUGETLBFS mode. This means is_file_shm_hugepages() and the duplicate f_ops can be removed. While cleaning things up, change the name of is_file_hugepages() to is_file_hugetlbfs(). The term hugepages is a bit ambiguous. A subsequent patch will propagate FMODE_HUGETLBFS in overlayfs. Suggested-by: Al Viro <viro@xxxxxxxxxxxxxxxxxx> Signed-off-by: Mike Kravetz <mike.kravetz@xxxxxxxxxx> --- fs/hugetlbfs/inode.c | 7 +++++++ fs/io_uring.c | 2 +- include/linux/fs.h | 3 +++ include/linux/hugetlb.h | 10 ++++------ include/linux/shm.h | 5 ----- ipc/shm.c | 34 ++++++++-------------------------- mm/memfd.c | 2 +- mm/mmap.c | 8 ++++---- 8 files changed, 28 insertions(+), 43 deletions(-) diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 991c60c7ffe0..5c0c50a88c84 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -324,6 +324,12 @@ static ssize_t hugetlbfs_read_iter(struct kiocb *iocb, struct iov_iter *to) return retval; } +static int hugetlbfs_open(struct inode *inode, struct file *file) +{ + file->f_mode |= FMODE_HUGETLBFS; + return 0; +} + static int hugetlbfs_write_begin(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, @@ -1112,6 +1118,7 @@ static void init_once(void *foo) const struct file_operations hugetlbfs_file_operations = { .read_iter = hugetlbfs_read_iter, + .open = hugetlbfs_open, .mmap = hugetlbfs_file_mmap, .fsync = noop_fsync, .get_unmapped_area = hugetlb_get_unmapped_area, diff --git a/fs/io_uring.c b/fs/io_uring.c index bb25e3997d41..96e8a4bb610a 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -7123,7 +7123,7 @@ static int io_sqe_buffer_register(struct io_ring_ctx *ctx, void __user *arg, struct vm_area_struct *vma = vmas[j]; if (vma->vm_file && - !is_file_hugepages(vma->vm_file)) { + !is_file_hugetlbfs(vma->vm_file)) { ret = -EOPNOTSUPP; break; } diff --git a/include/linux/fs.h b/include/linux/fs.h index 45cc10cdf6dd..99af9513f9ab 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -175,6 +175,9 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset, /* File does not contribute to nr_files count */ #define FMODE_NOACCOUNT ((__force fmode_t)0x20000000) +/* File is in hugetlbfs filesystem */ +#define FMODE_HUGETLBFS ((__force fmode_t)0x40000000) + /* * Flag for rw_copy_check_uvector and compat_rw_copy_check_uvector * that indicates that they should check the contents of the iovec are diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 43a1cef8f0f1..aa3408775464 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -429,18 +429,16 @@ static inline struct hugetlbfs_inode_info *HUGETLBFS_I(struct inode *inode) return container_of(inode, struct hugetlbfs_inode_info, vfs_inode); } -extern const struct file_operations hugetlbfs_file_operations; extern const struct vm_operations_struct hugetlb_vm_ops; struct file *hugetlb_file_setup(const char *name, size_t size, vm_flags_t acct, struct user_struct **user, int creat_flags, int page_size_log); -static inline bool is_file_hugepages(struct file *file) +static inline bool is_file_hugetlbfs(struct file *file) { - if (file->f_op == &hugetlbfs_file_operations) + if (unlikely(file->f_mode & FMODE_HUGETLBFS)) return true; - - return is_file_shm_hugepages(file); + return false; } static inline struct hstate *hstate_inode(struct inode *i) @@ -449,7 +447,7 @@ static inline struct hstate *hstate_inode(struct inode *i) } #else /* !CONFIG_HUGETLBFS */ -#define is_file_hugepages(file) false +#define is_file_hugetlbfs(file) false static inline struct file * hugetlb_file_setup(const char *name, size_t size, vm_flags_t acctflag, struct user_struct **user, int creat_flags, diff --git a/include/linux/shm.h b/include/linux/shm.h index d8e69aed3d32..1ab62d7b334f 100644 --- a/include/linux/shm.h +++ b/include/linux/shm.h @@ -16,7 +16,6 @@ struct sysv_shm { long do_shmat(int shmid, char __user *shmaddr, int shmflg, unsigned long *addr, unsigned long shmlba); -bool is_file_shm_hugepages(struct file *file); void exit_shm(struct task_struct *task); #define shm_init_task(task) INIT_LIST_HEAD(&(task)->sysvshm.shm_clist) #else @@ -30,10 +29,6 @@ static inline long do_shmat(int shmid, char __user *shmaddr, { return -ENOSYS; } -static inline bool is_file_shm_hugepages(struct file *file) -{ - return false; -} static inline void exit_shm(struct task_struct *task) { } diff --git a/ipc/shm.c b/ipc/shm.c index 0ba6add05b35..8f119b1d6170 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -285,7 +285,7 @@ static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp) ns->shm_tot -= (shp->shm_segsz + PAGE_SIZE - 1) >> PAGE_SHIFT; shm_rmid(ns, shp); shm_unlock(shp); - if (!is_file_hugepages(shm_file)) + if (!is_file_hugetlbfs(shm_file)) shmem_lock(shm_file, 0, shp->mlock_user); else if (shp->mlock_user) user_shm_unlock(i_size_read(file_inode(shm_file)), @@ -560,24 +560,6 @@ static const struct file_operations shm_file_operations = { .fallocate = shm_fallocate, }; -/* - * shm_file_operations_huge is now identical to shm_file_operations, - * but we keep it distinct for the sake of is_file_shm_hugepages(). - */ -static const struct file_operations shm_file_operations_huge = { - .mmap = shm_mmap, - .fsync = shm_fsync, - .release = shm_release, - .get_unmapped_area = shm_get_unmapped_area, - .llseek = noop_llseek, - .fallocate = shm_fallocate, -}; - -bool is_file_shm_hugepages(struct file *file) -{ - return file->f_op == &shm_file_operations_huge; -} - static const struct vm_operations_struct shm_vm_ops = { .open = shm_open, /* callback for a new vm-area open */ .close = shm_close, /* callback for when the vm-area is released */ @@ -698,7 +680,7 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params) no_id: ipc_update_pid(&shp->shm_cprid, NULL); ipc_update_pid(&shp->shm_lprid, NULL); - if (is_file_hugepages(file) && shp->mlock_user) + if (is_file_hugetlbfs(file) && shp->mlock_user) user_shm_unlock(size, shp->mlock_user); fput(file); ipc_rcu_putref(&shp->shm_perm, shm_rcu_free); @@ -836,7 +818,7 @@ static void shm_add_rss_swap(struct shmid_kernel *shp, inode = file_inode(shp->shm_file); - if (is_file_hugepages(shp->shm_file)) { + if (is_file_hugetlbfs(shp->shm_file)) { struct address_space *mapping = inode->i_mapping; struct hstate *h = hstate_file(shp->shm_file); *rss_add += pages_per_huge_page(h) * mapping->nrpages; @@ -1102,7 +1084,7 @@ static int shmctl_do_lock(struct ipc_namespace *ns, int shmid, int cmd) } shm_file = shp->shm_file; - if (is_file_hugepages(shm_file)) + if (is_file_hugetlbfs(shm_file)) goto out_unlock0; if (cmd == SHM_LOCK) { @@ -1523,10 +1505,7 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, goto out_nattch; } - file = alloc_file_clone(base, f_flags, - is_file_hugepages(base) ? - &shm_file_operations_huge : - &shm_file_operations); + file = alloc_file_clone(base, f_flags, &shm_file_operations); err = PTR_ERR(file); if (IS_ERR(file)) { kfree(sfd); @@ -1534,6 +1513,9 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, goto out_nattch; } + /* copy hugetlbfs mode for is_file_hugetlbfs() */ + file->f_mode |= (base->f_mode & FMODE_HUGETLBFS); + sfd->id = shp->shm_perm.id; sfd->ns = get_ipc_ns(ns); sfd->file = base; diff --git a/mm/memfd.c b/mm/memfd.c index 2647c898990c..e6c16b6bf3f6 100644 --- a/mm/memfd.c +++ b/mm/memfd.c @@ -123,7 +123,7 @@ static unsigned int *memfd_file_seals_ptr(struct file *file) return &SHMEM_I(file_inode(file))->seals; #ifdef CONFIG_HUGETLBFS - if (is_file_hugepages(file)) + if (is_file_hugetlbfs(file)) return &HUGETLBFS_I(file_inode(file))->seals; #endif diff --git a/mm/mmap.c b/mm/mmap.c index f609e9ec4a25..703a9680a937 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1538,7 +1538,7 @@ unsigned long do_mmap(struct file *file, unsigned long addr, vm_flags |= VM_NORESERVE; /* hugetlb applies strict overcommit unless MAP_NORESERVE */ - if (file && is_file_hugepages(file)) + if (file && is_file_hugetlbfs(file)) vm_flags |= VM_NORESERVE; } @@ -1562,10 +1562,10 @@ unsigned long ksys_mmap_pgoff(unsigned long addr, unsigned long len, file = fget(fd); if (!file) return -EBADF; - if (is_file_hugepages(file)) + if (is_file_hugetlbfs(file)) len = ALIGN(len, huge_page_size(hstate_file(file))); retval = -EINVAL; - if (unlikely(flags & MAP_HUGETLB && !is_file_hugepages(file))) + if (unlikely(flags & MAP_HUGETLB && !is_file_hugetlbfs(file))) goto out_fput; } else if (flags & MAP_HUGETLB) { struct user_struct *user = NULL; @@ -1678,7 +1678,7 @@ static inline int accountable_mapping(struct file *file, vm_flags_t vm_flags) * hugetlb has its own accounting separate from the core VM * VM_HUGETLB may not be set yet so we cannot check for that flag. */ - if (file && is_file_hugepages(file)) + if (file && is_file_hugetlbfs(file)) return 0; return (vm_flags & (VM_NORESERVE | VM_SHARED | VM_WRITE)) == VM_WRITE; -- 2.25.4