In container environment, we don't want users to bind their memory to a specific numa node, while we want to unit control memory resource with kubelet. Therefore, add a new lsm hook for mbind(2), then we can enforce fine-grained control over memory policy adjustment by the tasks in a container. Signed-off-by: Yafang Shao <laoar.shao@xxxxxxxxx> --- include/linux/lsm_hook_defs.h | 4 ++++ include/linux/security.h | 10 ++++++++++ mm/mempolicy.c | 4 ++++ security/security.c | 7 +++++++ 4 files changed, 25 insertions(+) diff --git a/include/linux/lsm_hook_defs.h b/include/linux/lsm_hook_defs.h index 99b8176..b1b5e3a 100644 --- a/include/linux/lsm_hook_defs.h +++ b/include/linux/lsm_hook_defs.h @@ -419,3 +419,7 @@ LSM_HOOK(int, 0, uring_sqpoll, void) LSM_HOOK(int, 0, uring_cmd, struct io_uring_cmd *ioucmd) #endif /* CONFIG_IO_URING */ + +LSM_HOOK(int, 0, mbind, unsigned long start, unsigned long len, + unsigned long mode, const unsigned long __user *nmask, + unsigned long maxnode, unsigned int flags) diff --git a/include/linux/security.h b/include/linux/security.h index 1d1df326..9f87543 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -484,6 +484,9 @@ int security_setprocattr(const char *lsm, const char *name, void *value, int security_inode_setsecctx(struct dentry *dentry, void *ctx, u32 ctxlen); int security_inode_getsecctx(struct inode *inode, void **ctx, u32 *ctxlen); int security_locked_down(enum lockdown_reason what); +int security_mbind(unsigned long start, unsigned long len, + unsigned long mode, const unsigned long __user *nmask, + unsigned long maxnode, unsigned int flags); #else /* CONFIG_SECURITY */ static inline int call_blocking_lsm_notifier(enum lsm_event event, void *data) @@ -1395,6 +1398,13 @@ static inline int security_locked_down(enum lockdown_reason what) { return 0; } + +static inline int security_mbind(unsigned long start, unsigned long len, + unsigned long mode, const unsigned long __user *nmask, + unsigned long maxnode, unsigned int flags) +{ + return 0; +} #endif /* CONFIG_SECURITY */ #if defined(CONFIG_SECURITY) && defined(CONFIG_WATCH_QUEUE) diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 10a590e..98a378c 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -1483,6 +1483,10 @@ static long kernel_mbind(unsigned long start, unsigned long len, if (err) return err; + err = security_mbind(start, len, mode, nmask, maxnode, flags); + if (err) + return err; + return do_mbind(start, len, lmode, mode_flags, &nodes, flags); } diff --git a/security/security.c b/security/security.c index dcb3e70..425ec1c 100644 --- a/security/security.c +++ b/security/security.c @@ -5337,3 +5337,10 @@ int security_uring_cmd(struct io_uring_cmd *ioucmd) return call_int_hook(uring_cmd, 0, ioucmd); } #endif /* CONFIG_IO_URING */ + +int security_mbind(unsigned long start, unsigned long len, + unsigned long mode, const unsigned long __user *nmask, + unsigned long maxnode, unsigned int flags) +{ + return call_int_hook(mbind, 0, start, len, mode, nmask, maxnode, flags); +} -- 1.8.3.1