A new scalar value (PTEAScanScale) to control per task PTE A bit scanning is introduced. 0 : scanning disabled 1-10 : scanning enabled. In future PTEAScanScale could be used to control aggressiveness of scanning. CC: linux-doc@xxxxxxxxxxxxxxx CC: Jonathan Corbet <corbet@xxxxxxx> CC: linux-fsdevel@xxxxxxxxxxxxxxx Suggested-by: David Rientjes <rientjes@xxxxxxxxxx> Signed-off-by: Raghavendra K T <raghavendra.kt@xxxxxxx> --- Documentation/filesystems/proc.rst | 2 ++ fs/proc/task_mmu.c | 4 ++++ include/linux/kmmscand.h | 1 + include/linux/mm_types.h | 3 +++ include/uapi/linux/prctl.h | 7 +++++++ kernel/fork.c | 4 ++++ kernel/sys.c | 25 +++++++++++++++++++++++++ mm/kmmscand.c | 5 +++++ 8 files changed, 51 insertions(+) diff --git a/Documentation/filesystems/proc.rst b/Documentation/filesystems/proc.rst index 09f0aed5a08b..78633cab3f1a 100644 --- a/Documentation/filesystems/proc.rst +++ b/Documentation/filesystems/proc.rst @@ -195,6 +195,7 @@ read the file /proc/PID/status:: VmLib: 1412 kB VmPTE: 20 kb VmSwap: 0 kB + PTEAScanScale: 0 HugetlbPages: 0 kB CoreDumping: 0 THP_enabled: 1 @@ -278,6 +279,7 @@ It's slow but very precise. VmPTE size of page table entries VmSwap amount of swap used by anonymous private data (shmem swap usage is not included) + PTEAScanScale Integer representing async PTE A bit scan agrression HugetlbPages size of hugetlb memory portions CoreDumping process's memory is currently being dumped (killing the process may lead to a corrupted core) diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index f02cd362309a..55620a5178fb 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -79,6 +79,10 @@ void task_mem(struct seq_file *m, struct mm_struct *mm) " kB\nVmPTE:\t", mm_pgtables_bytes(mm) >> 10, 8); SEQ_PUT_DEC(" kB\nVmSwap:\t", swap); seq_puts(m, " kB\n"); +#ifdef CONFIG_KMMSCAND + seq_put_decimal_ull_width(m, "PTEAScanScale:\t", mm->pte_scan_scale, 8); + seq_puts(m, "\n"); +#endif hugetlb_report_usage(m, mm); } #undef SEQ_PUT_DEC diff --git a/include/linux/kmmscand.h b/include/linux/kmmscand.h index b120c65ee8c6..7021f7d979a6 100644 --- a/include/linux/kmmscand.h +++ b/include/linux/kmmscand.h @@ -13,6 +13,7 @@ static inline void kmmscand_execve(struct mm_struct *mm) static inline void kmmscand_fork(struct mm_struct *mm, struct mm_struct *oldmm) { + mm->pte_scan_scale = oldmm->pte_scan_scale; __kmmscand_enter(mm); } diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index eeaedc7473b1..12184e8ebc58 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -1018,6 +1018,9 @@ struct mm_struct { #ifdef CONFIG_KMMSCAND /* Tracks promotion node. XXX: use nodemask */ int target_node; + + /* Integer representing PTE A bit scan aggression (0-10) */ + unsigned int pte_scan_scale; #endif /* * An operation with batched TLB flushing is going on. Anything diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h index 5c6080680cb2..18face11440a 100644 --- a/include/uapi/linux/prctl.h +++ b/include/uapi/linux/prctl.h @@ -353,4 +353,11 @@ struct prctl_mm_map { */ #define PR_LOCK_SHADOW_STACK_STATUS 76 +/* Set/get PTE A bit scan scale */ +#define PR_SET_PTE_A_SCAN_SCALE 77 +#define PR_GET_PTE_A_SCAN_SCALE 78 +# define PR_PTE_A_SCAN_SCALE_MIN 0 +# define PR_PTE_A_SCAN_SCALE_MAX 10 +# define PR_PTE_A_SCAN_SCALE_DEFAULT 1 + #endif /* _LINUX_PRCTL_H */ diff --git a/kernel/fork.c b/kernel/fork.c index f61c55cf33c2..bfbbacb8ec36 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -106,6 +106,7 @@ #include <uapi/linux/pidfd.h> #include <linux/pidfs.h> #include <linux/tick.h> +#include <linux/prctl.h> #include <asm/pgalloc.h> #include <linux/uaccess.h> @@ -1292,6 +1293,9 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p, init_tlb_flush_pending(mm); #if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !defined(CONFIG_SPLIT_PMD_PTLOCKS) mm->pmd_huge_pte = NULL; +#endif +#ifdef CONFIG_KMMSCAND + mm->pte_scan_scale = PR_PTE_A_SCAN_SCALE_DEFAULT; #endif mm_init_uprobes_state(mm); hugetlb_count_init(mm); diff --git a/kernel/sys.c b/kernel/sys.c index cb366ff8703a..0518480d8f78 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -2142,6 +2142,19 @@ static int prctl_set_auxv(struct mm_struct *mm, unsigned long addr, return 0; } +#ifdef CONFIG_KMMSCAND +static int prctl_pte_scan_scale_write(unsigned int scale) +{ + scale = clamp(scale, PR_PTE_A_SCAN_SCALE_MIN, PR_PTE_A_SCAN_SCALE_MAX); + current->mm->pte_scan_scale = scale; + return 0; +} + +static unsigned int prctl_pte_scan_scale_read(void) +{ + return current->mm->pte_scan_scale; +} +#endif static int prctl_set_mm(int opt, unsigned long addr, unsigned long arg4, unsigned long arg5) @@ -2811,6 +2824,18 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, return -EINVAL; error = arch_lock_shadow_stack_status(me, arg2); break; +#ifdef CONFIG_KMMSCAND + case PR_SET_PTE_A_SCAN_SCALE: + if (arg3 || arg4 || arg5) + return -EINVAL; + error = prctl_pte_scan_scale_write((unsigned int) arg2); + break; + case PR_GET_PTE_A_SCAN_SCALE: + if (arg2 || arg3 || arg4 || arg5) + return -EINVAL; + error = prctl_pte_scan_scale_read(); + break; +#endif default: trace_task_prctl_unknown(option, arg2, arg3, arg4, arg5); error = -EINVAL; diff --git a/mm/kmmscand.c b/mm/kmmscand.c index 38d7825c0d62..68ef2141c349 100644 --- a/mm/kmmscand.c +++ b/mm/kmmscand.c @@ -1113,6 +1113,11 @@ static unsigned long kmmscand_scan_mm_slot(void) goto outerloop; } + if (!mm->pte_scan_scale) { + next_mm = true; + goto outerloop; + } + mm_target_node = READ_ONCE(mm->target_node); if (mm_target_node != mm_slot_target_node) WRITE_ONCE(mm->target_node, mm_slot_target_node); -- 2.34.1