On Wed, Apr 24, 2013 at 11:39:51AM -0400, Johannes Weiner wrote: > On Wed, Apr 24, 2013 at 11:16:39AM -0400, Naoya Horiguchi wrote: > > On Wed, Apr 24, 2013 at 04:14:54AM -0400, Johannes Weiner wrote: > > > @@ -491,10 +491,13 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params) > > > > > > sprintf (name, "SYSV%08x", key); > > > if (shmflg & SHM_HUGETLB) { > > > + unsigned int hugesize; > > > + > > > /* hugetlb_file_setup applies strict accounting */ > > > if (shmflg & SHM_NORESERVE) > > > acctflag = VM_NORESERVE; > > > - file = hugetlb_file_setup(name, 0, size, acctflag, > > > + hugesize = ALIGN(size, huge_page_size(&default_hstate)); > > > + file = hugetlb_file_setup(name, hugesize, acctflag, > > > &shp->mlock_user, HUGETLB_SHMFS_INODE, > > > (shmflg >> SHM_HUGE_SHIFT) & SHM_HUGE_MASK); > > > } else { > > > > Would it be better to find proper hstate instead of using default_hstate? > > You are probably right, I guess we can't assume default_hstate anymore > after page_size_log can be passed in. > > Can we have hugetlb_file_setup() return an adjusted length, or an > alignment requirement? Yes, it's possible if callers pass the pointer of size (length) to hugetlb_file_setup() and make it adjusted inside the function. And as for alignment, I think it's not a hugetlb_file_setup's job, so we don't have to do it in this function. > Or pull the hstate lookup into the callsites (since they pass in > page_size_log to begin with)? This is also a possible solution, where we might need to define and export a function converting hugepage order to hstate. I like the former one, so wrote a patch like below. # I added your Signed-off-by: because this's based on your draft patch. # if you don't like it, please let me know. Thanks, Naoya Horiguchi --- From: Naoya Horiguchi <n-horiguchi@xxxxxxxxxxxxx> Date: Wed, 24 Apr 2013 16:44:19 -0400 Subject: [PATCH] hugetlbfs: fix mmap failure in unaligned size request As reported in https://bugzilla.kernel.org/show_bug.cgi?id=56881, current kernel returns -EINVAL unless a given mmap length is "almost" hugepage aligned. This is because in sys_mmap_pgoff() the given length is passed to vm_mmap_pgoff() as it is without being aligned with hugepage boundary. This is a regression introduced in commit 40716e29243d "hugetlbfs: fix alignment of huge page requests", where alignment code is pushed into hugetlb_file_setup() and the variable len in caller side is not changed. To fix this, this patch partially reverts that commit, and changes the type of parameter size from size_t to (size_t *) in order to align the size in caller side. Signed-off-by: Naoya Horiguchi <n-horiguchi@xxxxxxxxxxxxx> Signed-off-by: Johannes Weiner <hannes@xxxxxxxxxxx> --- fs/hugetlbfs/inode.c | 20 ++++++++++---------- include/linux/hugetlb.h | 7 +++---- ipc/shm.c | 2 +- mm/mmap.c | 2 +- 4 files changed, 15 insertions(+), 16 deletions(-) diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 523464e..7adbd7b 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -929,9 +929,8 @@ static struct dentry_operations anon_ops = { .d_dname = hugetlb_dname }; -struct file *hugetlb_file_setup(const char *name, unsigned long addr, - size_t size, vm_flags_t acctflag, - struct user_struct **user, +struct file *hugetlb_file_setup(const char *name, size_t *sizeptr, + vm_flags_t acctflag, struct user_struct **user, int creat_flags, int page_size_log) { struct file *file = ERR_PTR(-ENOMEM); @@ -939,9 +938,8 @@ struct file *hugetlb_file_setup(const char *name, unsigned long addr, struct path path; struct super_block *sb; struct qstr quick_string; - struct hstate *hstate; - unsigned long num_pages; int hstate_idx; + size_t size; hstate_idx = get_hstate_idx(page_size_log); if (hstate_idx < 0) @@ -951,6 +949,10 @@ struct file *hugetlb_file_setup(const char *name, unsigned long addr, if (!hugetlbfs_vfsmount[hstate_idx]) return ERR_PTR(-ENOENT); + size = 1 << hstate_index_to_shift(hstate_idx); + if (sizeptr) + *sizeptr = ALIGN(*sizeptr, size); + if (creat_flags == HUGETLB_SHMFS_INODE && !can_do_hugetlb_shm()) { *user = current_user(); if (user_shm_lock(size, *user)) { @@ -980,12 +982,10 @@ struct file *hugetlb_file_setup(const char *name, unsigned long addr, if (!inode) goto out_dentry; - hstate = hstate_inode(inode); - size += addr & ~huge_page_mask(hstate); - num_pages = ALIGN(size, huge_page_size(hstate)) >> - huge_page_shift(hstate); file = ERR_PTR(-ENOMEM); - if (hugetlb_reserve_pages(inode, 0, num_pages, NULL, acctflag)) + if (hugetlb_reserve_pages(inode, 0, + size >> huge_page_shift(hstate_inode(inode)), NULL, + acctflag)) goto out_inode; d_instantiate(path.dentry, inode); diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 8220a8a..ca67d42 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -193,8 +193,7 @@ static inline struct hugetlbfs_sb_info *HUGETLBFS_SB(struct super_block *sb) extern const struct file_operations hugetlbfs_file_operations; extern const struct vm_operations_struct hugetlb_vm_ops; -struct file *hugetlb_file_setup(const char *name, unsigned long addr, - size_t size, vm_flags_t acct, +struct file *hugetlb_file_setup(const char *name, size_t *size, vm_flags_t acct, struct user_struct **user, int creat_flags, int page_size_log); @@ -213,8 +212,8 @@ static inline int is_file_hugepages(struct file *file) #define is_file_hugepages(file) 0 static inline struct file * -hugetlb_file_setup(const char *name, unsigned long addr, size_t size, - vm_flags_t acctflag, struct user_struct **user, int creat_flags, +hugetlb_file_setup(const char *name, size_t *size, vm_flags_t acctflag, + struct user_struct **user, int creat_flags, int page_size_log) { return ERR_PTR(-ENOSYS); diff --git a/ipc/shm.c b/ipc/shm.c index cb858df..e2cb809 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -494,7 +494,7 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params) /* hugetlb_file_setup applies strict accounting */ if (shmflg & SHM_NORESERVE) acctflag = VM_NORESERVE; - file = hugetlb_file_setup(name, 0, size, acctflag, + file = hugetlb_file_setup(name, NULL, acctflag, &shp->mlock_user, HUGETLB_SHMFS_INODE, (shmflg >> SHM_HUGE_SHIFT) & SHM_HUGE_MASK); } else { diff --git a/mm/mmap.c b/mm/mmap.c index 2664a47..d03a541 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1333,7 +1333,7 @@ SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len, * A dummy user value is used because we are not locking * memory so no accounting is necessary */ - file = hugetlb_file_setup(HUGETLB_ANON_FILE, addr, len, + file = hugetlb_file_setup(HUGETLB_ANON_FILE, &len, VM_NORESERVE, &user, HUGETLB_ANONHUGE_INODE, (flags >> MAP_HUGE_SHIFT) & MAP_HUGE_MASK); -- 1.7.11.7 -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@xxxxxxxxx. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: <a href=mailto:"dont@xxxxxxxxx"> email@xxxxxxxxx </a>