From: Peter Zijlstra <peterz@xxxxxxxxxxxxx> from two to four. This prepares for optimizations based on tasks' VMA access history. Signed-off-by: Peter Zijlstra <peterz@xxxxxxxxxxxxx> Signed-off-by: Raghavendra K T <raghavendra.kt@xxxxxxx> --- include/linux/mm.h | 12 ++++++++---- include/linux/mm_types.h | 4 +++- kernel/sched/fair.c | 29 ++++++++++++++++++++++++----- 3 files changed, 35 insertions(+), 10 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index 406ab9ea818f..7794dc91c50f 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1689,10 +1689,14 @@ static inline int xchg_page_access_time(struct page *page, int time) static inline void vma_set_access_pid_bit(struct vm_area_struct *vma) { unsigned int pid_bit; - - pid_bit = hash_32(current->pid, ilog2(BITS_PER_LONG)); - if (vma->numab_state && !test_bit(pid_bit, &vma->numab_state->access_pids[1])) { - __set_bit(pid_bit, &vma->numab_state->access_pids[1]); + unsigned long *pids, pid_idx; + + if (vma->numab_state) { + pid_bit = hash_32(current->pid, ilog2(BITS_PER_LONG)); + pid_idx = READ_ONCE(vma->numab_state->access_pid_idx); + pids = vma->numab_state->access_pids + pid_idx; + if (!test_bit(pid_bit, pids)) + __set_bit(pid_bit, pids); } } #else /* !CONFIG_NUMA_BALANCING */ diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 647d9fc5da8d..676afa9e497c 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -475,10 +475,12 @@ struct vma_lock { struct rw_semaphore lock; }; +#define NR_ACCESS_PID_HIST 4 struct vma_numab_state { unsigned long next_scan; unsigned long next_pid_reset; - unsigned long access_pids[2]; + unsigned long access_pids[NR_ACCESS_PID_HIST]; + unsigned long access_pid_idx; unsigned long vma_scan_select; }; diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index e26e847a8e26..3ae2a1a3ef5c 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -2958,12 +2958,26 @@ static bool task_disjoint_vma_select(struct vm_area_struct *vma) return true; } +static inline bool vma_test_access_pid_history(struct vm_area_struct *vma) +{ + unsigned int i, pid_bit; + unsigned long pids = 0; + + pid_bit = hash_32(current->pid, ilog2(BITS_PER_LONG)); + + for (i = 0; i < NR_ACCESS_PID_HIST; i++) + pids |= vma->numab_state->access_pids[i]; + + return test_bit(pid_bit, &pids); +} + static bool vma_is_accessed(struct vm_area_struct *vma) { - unsigned long pids; + /* Check if the current task had historically accessed VMA. */ + if (vma_test_access_pid_history(vma)) + return true; - pids = vma->numab_state->access_pids[0] | vma->numab_state->access_pids[1]; - return test_bit(hash_32(current->pid, ilog2(BITS_PER_LONG)), &pids); + return false; } #define VMA_PID_RESET_PERIOD (4 * sysctl_numa_balancing_scan_delay) @@ -2983,6 +2997,7 @@ static void task_numa_work(struct callback_head *work) unsigned long nr_pte_updates = 0; long pages, virtpages; struct vma_iterator vmi; + unsigned long pid_idx; SCHED_WARN_ON(p != container_of(work, struct task_struct, numa_work)); @@ -3097,8 +3112,12 @@ static void task_numa_work(struct callback_head *work) time_after(jiffies, vma->numab_state->next_pid_reset)) { vma->numab_state->next_pid_reset = vma->numab_state->next_pid_reset + msecs_to_jiffies(VMA_PID_RESET_PERIOD); - vma->numab_state->access_pids[0] = READ_ONCE(vma->numab_state->access_pids[1]); - vma->numab_state->access_pids[1] = 0; + + pid_idx = vma->numab_state->access_pid_idx; + pid_idx = (pid_idx + 1) % NR_ACCESS_PID_HIST; + + vma->numab_state->access_pid_idx = pid_idx; + vma->numab_state->access_pids[pid_idx] = 0; } /* -- 2.34.1