Add a new prctl, PR_SET_COW_PTE, to allow the user to enable COW PTE. Since it has a time gap between using the prctl to enable the COW PTE and doing the fork, we use two states (MMF_COW_PTE_READY and MMF_COW_PTE) to determine the task that wants to do COW PTE or already doing it. The MMF_COW_PTE_READY flag marks the task to do COW PTE in the next time of fork(). During fork(), if MMF_COW_PTE_READY set, fork() will unset the flag and set the MMF_COW_PTE flag. After that, fork() might shares PTEs instead of duplicates it. Signed-off-by: Chih-En Lin <shiyn.lin@xxxxxxxxx> --- include/linux/sched/coredump.h | 13 ++++++++++++- include/uapi/linux/prctl.h | 6 ++++++ kernel/sys.c | 11 +++++++++++ 3 files changed, 29 insertions(+), 1 deletion(-) diff --git a/include/linux/sched/coredump.h b/include/linux/sched/coredump.h index 0e17ae7fbfd3..dff4b0938c39 100644 --- a/include/linux/sched/coredump.h +++ b/include/linux/sched/coredump.h @@ -87,7 +87,18 @@ static inline int get_dumpable(struct mm_struct *mm) #define MMF_DISABLE_THP_MASK (1 << MMF_DISABLE_THP) +/* + * MMF_COW_PTE_READY: Marking the task to do COW PTE in the next time of + * fork(). During fork(), if MMF_COW_PTE_READY set, fork() will unset the + * flag and set the MMF_COW_PTE flag. After that, fork() might shares PTEs + * rather than duplicates it. + */ +#define MMF_COW_PTE_READY 29 /* Share PTE tables in next time of fork() */ +#define MMF_COW_PTE 30 /* PTE tables are shared between processes */ +#define MMF_COW_PTE_MASK (1 << MMF_COW_PTE) + #define MMF_INIT_MASK (MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK |\ - MMF_DISABLE_THP_MASK | MMF_HAS_MDWE_MASK) + MMF_DISABLE_THP_MASK | MMF_HAS_MDWE_MASK |\ + MMF_COW_PTE_MASK) #endif /* _LINUX_SCHED_COREDUMP_H */ diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h index 1312a137f7fb..8fc82ced80b5 100644 --- a/include/uapi/linux/prctl.h +++ b/include/uapi/linux/prctl.h @@ -290,4 +290,10 @@ struct prctl_mm_map { #define PR_SET_VMA 0x53564d41 # define PR_SET_VMA_ANON_NAME 0 +/* + * Set the prepare flag, MMF_COW_PTE_READY, to do the share (copy-on-write) + * page table in the next time of fork. + */ +#define PR_SET_COW_PTE 65 + #endif /* _LINUX_PRCTL_H */ diff --git a/kernel/sys.c b/kernel/sys.c index 495cd87d9bf4..eb1c38c4bad2 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -2377,6 +2377,14 @@ static inline int prctl_get_mdwe(unsigned long arg2, unsigned long arg3, PR_MDWE_REFUSE_EXEC_GAIN : 0; } +static int prctl_set_cow_pte(struct mm_struct *mm) +{ + if (test_bit(MMF_COW_PTE, &mm->flags)) + return -EINVAL; + set_bit(MMF_COW_PTE_READY, &mm->flags); + return 0; +} + SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, unsigned long, arg4, unsigned long, arg5) { @@ -2661,6 +2669,9 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, case PR_SET_VMA: error = prctl_set_vma(arg2, arg3, arg4, arg5); break; + case PR_SET_COW_PTE: + error = prctl_set_cow_pte(me->mm); + break; default: error = -EINVAL; break; -- 2.34.1