This patch adds a new kernel structure `struct mempolicy_args`, intended to be used for an extensible get/set_mempolicy interface. This implements the fields required to support the existing syscall interfaces interfaces, but does not expose any user-facing arg structure. mpol_new is refactored to take the argument structure so that future mempolicy extensions can all be managed in the mempolicy constructor. The get_mempolicy and mbind syscalls are refactored to utilize the new argument structure, as are all the callers of mpol_new() and do_set_mempolicy. Signed-off-by: Gregory Price <gregory.price@xxxxxxxxxxxx> --- include/linux/mempolicy.h | 14 ++++++++ mm/mempolicy.c | 69 +++++++++++++++++++++++++++++---------- 2 files changed, 65 insertions(+), 18 deletions(-) diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h index ba09167e80f7..117c5395c6eb 100644 --- a/include/linux/mempolicy.h +++ b/include/linux/mempolicy.h @@ -61,6 +61,20 @@ struct mempolicy { } wil; }; +/* + * Describes settings of a mempolicy during set/get syscalls and + * kernel internal calls to do_set_mempolicy() + */ +struct mempolicy_args { + unsigned short mode; /* policy mode */ + unsigned short mode_flags; /* policy mode flags */ + nodemask_t *policy_nodes; /* get/set/mbind */ + int policy_node; /* get: policy node information */ + unsigned long addr; /* get: vma address */ + int addr_node; /* get: node the address belongs to */ + int home_node; /* mbind: use MPOL_MF_HOME_NODE */ +}; + /* * Support for managing mempolicy data objects (clone, copy, destroy) * The default fast path of a NULL MPOL_DEFAULT policy is always inlined. diff --git a/mm/mempolicy.c b/mm/mempolicy.c index eec807d0c6a1..4c343218c033 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -268,10 +268,12 @@ static int mpol_set_nodemask(struct mempolicy *pol, * This function just creates a new policy, does some check and simple * initialization. You must invoke mpol_set_nodemask() to set nodes. */ -static struct mempolicy *mpol_new(unsigned short mode, unsigned short flags, - nodemask_t *nodes) +static struct mempolicy *mpol_new(struct mempolicy_args *args) { struct mempolicy *policy; + unsigned short mode = args->mode; + unsigned short flags = args->mode_flags; + nodemask_t *nodes = args->policy_nodes; if (mode == MPOL_DEFAULT) { if (nodes && !nodes_empty(*nodes)) @@ -820,8 +822,7 @@ static int mbind_range(struct vma_iterator *vmi, struct vm_area_struct *vma, } /* Set the process memory policy */ -static long do_set_mempolicy(unsigned short mode, unsigned short flags, - nodemask_t *nodes) +static long do_set_mempolicy(struct mempolicy_args *args) { struct mempolicy *new, *old; NODEMASK_SCRATCH(scratch); @@ -830,14 +831,14 @@ static long do_set_mempolicy(unsigned short mode, unsigned short flags, if (!scratch) return -ENOMEM; - new = mpol_new(mode, flags, nodes); + new = mpol_new(args); if (IS_ERR(new)) { ret = PTR_ERR(new); goto out; } task_lock(current); - ret = mpol_set_nodemask(new, nodes, scratch); + ret = mpol_set_nodemask(new, args->policy_nodes, scratch); if (ret) { task_unlock(current); mpol_put(new); @@ -1235,8 +1236,7 @@ static struct folio *alloc_migration_target_by_mpol(struct folio *src, #endif static long do_mbind(unsigned long start, unsigned long len, - unsigned short mode, unsigned short mode_flags, - nodemask_t *nmask, unsigned long flags) + struct mempolicy_args *margs, unsigned long flags) { struct mm_struct *mm = current->mm; struct vm_area_struct *vma, *prev; @@ -1256,7 +1256,7 @@ static long do_mbind(unsigned long start, unsigned long len, if (start & ~PAGE_MASK) return -EINVAL; - if (mode == MPOL_DEFAULT) + if (margs->mode == MPOL_DEFAULT) flags &= ~MPOL_MF_STRICT; len = PAGE_ALIGN(len); @@ -1267,7 +1267,7 @@ static long do_mbind(unsigned long start, unsigned long len, if (end == start) return 0; - new = mpol_new(mode, mode_flags, nmask); + new = mpol_new(margs); if (IS_ERR(new)) return PTR_ERR(new); @@ -1284,7 +1284,8 @@ static long do_mbind(unsigned long start, unsigned long len, NODEMASK_SCRATCH(scratch); if (scratch) { mmap_write_lock(mm); - err = mpol_set_nodemask(new, nmask, scratch); + err = mpol_set_nodemask(new, margs->policy_nodes, + scratch); if (err) mmap_write_unlock(mm); } else @@ -1298,7 +1299,7 @@ static long do_mbind(unsigned long start, unsigned long len, * Lock the VMAs before scanning for pages to migrate, * to ensure we don't miss a concurrently inserted page. */ - nr_failed = queue_pages_range(mm, start, end, nmask, + nr_failed = queue_pages_range(mm, start, end, margs->policy_nodes, flags | MPOL_MF_INVERT | MPOL_MF_WRLOCK, &pagelist); if (nr_failed < 0) { @@ -1503,6 +1504,7 @@ static long kernel_mbind(unsigned long start, unsigned long len, unsigned long mode, const unsigned long __user *nmask, unsigned long maxnode, unsigned int flags) { + struct mempolicy_args margs; unsigned short mode_flags; nodemask_t nodes; int lmode = mode; @@ -1517,7 +1519,12 @@ static long kernel_mbind(unsigned long start, unsigned long len, if (err) return err; - return do_mbind(start, len, lmode, mode_flags, &nodes, flags); + memset(&margs, 0, sizeof(margs)); + margs.mode = lmode; + margs.mode_flags = mode_flags; + margs.policy_nodes = &nodes; + + return do_mbind(start, len, &margs, flags); } SYSCALL_DEFINE4(set_mempolicy_home_node, unsigned long, start, unsigned long, len, @@ -1598,6 +1605,7 @@ SYSCALL_DEFINE6(mbind, unsigned long, start, unsigned long, len, static long kernel_set_mempolicy(int mode, const unsigned long __user *nmask, unsigned long maxnode) { + struct mempolicy_args args; unsigned short mode_flags; nodemask_t nodes; int lmode = mode; @@ -1611,7 +1619,12 @@ static long kernel_set_mempolicy(int mode, const unsigned long __user *nmask, if (err) return err; - return do_set_mempolicy(lmode, mode_flags, &nodes); + memset(&args, 0, sizeof(args)); + args.mode = lmode; + args.mode_flags = mode_flags; + args.policy_nodes = &nodes; + + return do_set_mempolicy(&args); } SYSCALL_DEFINE3(set_mempolicy, int, mode, const unsigned long __user *, nmask, @@ -2877,6 +2890,7 @@ static int shared_policy_replace(struct shared_policy *sp, pgoff_t start, void mpol_shared_policy_init(struct shared_policy *sp, struct mempolicy *mpol) { int ret; + struct mempolicy_args margs; sp->root = RB_ROOT; /* empty tree == default mempolicy */ rwlock_init(&sp->lock); @@ -2889,8 +2903,12 @@ void mpol_shared_policy_init(struct shared_policy *sp, struct mempolicy *mpol) if (!scratch) goto put_mpol; + memset(&margs, 0, sizeof(margs)); + margs.mode = mpol->mode; + margs.mode_flags = mpol->flags; + margs.policy_nodes = &mpol->w.user_nodemask; /* contextualize the tmpfs mount point mempolicy to this file */ - npol = mpol_new(mpol->mode, mpol->flags, &mpol->w.user_nodemask); + npol = mpol_new(&margs); if (IS_ERR(npol)) goto free_scratch; /* no valid nodemask intersection */ @@ -2998,6 +3016,7 @@ static inline void __init check_numabalancing_enable(void) void __init numa_policy_init(void) { + struct mempolicy_args args; nodemask_t interleave_nodes; unsigned long largest = 0; int nid, prefer = 0; @@ -3043,7 +3062,11 @@ void __init numa_policy_init(void) if (unlikely(nodes_empty(interleave_nodes))) node_set(prefer, interleave_nodes); - if (do_set_mempolicy(MPOL_INTERLEAVE, 0, &interleave_nodes)) + memset(&args, 0, sizeof(args)); + args.mode = MPOL_INTERLEAVE; + args.policy_nodes = &interleave_nodes; + + if (do_set_mempolicy(&args)) pr_err("%s: interleaving failed\n", __func__); check_numabalancing_enable(); @@ -3052,7 +3075,12 @@ void __init numa_policy_init(void) /* Reset policy of current process to default */ void numa_default_policy(void) { - do_set_mempolicy(MPOL_DEFAULT, 0, NULL); + struct mempolicy_args args; + + memset(&args, 0, sizeof(args)); + args.mode = MPOL_DEFAULT; + + do_set_mempolicy(&args); } /* @@ -3082,6 +3110,7 @@ static const char * const policy_modes[] = */ int mpol_parse_str(char *str, struct mempolicy **mpol) { + struct mempolicy_args margs; struct mempolicy *new = NULL; unsigned short mode_flags; nodemask_t nodes; @@ -3168,7 +3197,11 @@ int mpol_parse_str(char *str, struct mempolicy **mpol) goto out; } - new = mpol_new(mode, mode_flags, &nodes); + memset(&margs, 0, sizeof(margs)); + margs.mode = mode; + margs.mode_flags = mode_flags; + margs.policy_nodes = &nodes; + new = mpol_new(&margs); if (IS_ERR(new)) goto out; -- 2.39.1