After setting PTE none or PMD none in DAMON, NUMA access of "damon_region" will be counted in page fault if current pid matches the pid that DAMON is monitoring. Signed-off-by: Xin Hao <xhao@xxxxxxxxxxxxxxxxx> Signed-off-by: Rongwei Wang <rongwei.wang@xxxxxxxxxxxxxxxxx> --- include/linux/damon.h | 18 ++++++++++ mm/damon/core.c | 80 +++++++++++++++++++++++++++++++++++++++++-- mm/damon/dbgfs.c | 18 +++++++--- mm/damon/vaddr.c | 11 ++---- mm/huge_memory.c | 5 +++ mm/memory.c | 5 +++ 6 files changed, 121 insertions(+), 16 deletions(-) diff --git a/include/linux/damon.h b/include/linux/damon.h index 77d0937dcab5..5bf1eb92584b 100644 --- a/include/linux/damon.h +++ b/include/linux/damon.h @@ -12,12 +12,16 @@ #include <linux/time64.h> #include <linux/types.h> #include <linux/random.h> +#include <linux/mm.h> /* Minimal region size. Every damon_region is aligned by this. */ #define DAMON_MIN_REGION PAGE_SIZE /* Max priority score for DAMON-based operation schemes */ #define DAMOS_MAX_SCORE (99) +extern struct damon_ctx **dbgfs_ctxs; +extern int dbgfs_nr_ctxs; + /* Get a random number in [l, r) */ static inline unsigned long damon_rand(unsigned long l, unsigned long r) { @@ -68,6 +72,7 @@ struct damon_region { * @nr_regions: Number of monitoring target regions of this target. * @regions_list: Head of the monitoring target regions of this target. * @list: List head for siblings. + * @target_lock: Use damon_region lock to avoid race. * * Each monitoring context could have multiple targets. For example, a context * for virtual memory address spaces could have multiple target processes. The @@ -80,6 +85,7 @@ struct damon_target { unsigned int nr_regions; struct list_head regions_list; struct list_head list; + spinlock_t target_lock; }; /** @@ -503,8 +509,20 @@ int damon_stop(struct damon_ctx **ctxs, int nr_ctxs); #endif /* CONFIG_DAMON */ #ifdef CONFIG_DAMON_VADDR + +/* + * 't->id' should be the pointer to the relevant 'struct pid' having reference + * count. Caller must put the returned task, unless it is NULL. + */ +static inline struct task_struct *damon_get_task_struct(struct damon_target *t) +{ + return get_pid_task((struct pid *)t->id, PIDTYPE_PID); +} bool damon_va_target_valid(void *t); void damon_va_set_primitives(struct damon_ctx *ctx); +void damon_numa_fault(int page_nid, int node_id, struct vm_fault *vmf); +#else +static inline void damon_numa_fault(int page_nid, int node_id, struct vm_fault *vmf) { } #endif /* CONFIG_DAMON_VADDR */ #ifdef CONFIG_DAMON_PADDR diff --git a/mm/damon/core.c b/mm/damon/core.c index 933ef51afa71..970fc02abeba 100644 --- a/mm/damon/core.c +++ b/mm/damon/core.c @@ -157,6 +157,7 @@ struct damon_target *damon_new_target(unsigned long id) t->id = id; t->nr_regions = 0; INIT_LIST_HEAD(&t->regions_list); + spin_lock_init(&t->target_lock); return t; } @@ -792,8 +793,11 @@ static void kdamond_merge_regions(struct damon_ctx *c, unsigned int threshold, { struct damon_target *t; - damon_for_each_target(t, c) + damon_for_each_target(t, c) { + spin_lock(&t->target_lock); damon_merge_regions_of(t, threshold, sz_limit); + spin_unlock(&t->target_lock); + } } /* @@ -879,8 +883,11 @@ static void kdamond_split_regions(struct damon_ctx *ctx) nr_regions < ctx->max_nr_regions / 3) nr_subregions = 3; - damon_for_each_target(t, ctx) + damon_for_each_target(t, ctx) { + spin_lock(&t->target_lock); damon_split_regions_of(ctx, t, nr_subregions); + spin_unlock(&t->target_lock); + } last_nr_regions = nr_regions; } @@ -1000,6 +1007,73 @@ static int kdamond_wait_activation(struct damon_ctx *ctx) return -EBUSY; } +static struct damon_target *get_damon_target(struct task_struct *task) +{ + int i; + unsigned long id1, id2; + struct damon_target *t; + + rcu_read_lock(); + for (i = 0; i < READ_ONCE(dbgfs_nr_ctxs); i++) { + struct damon_ctx *ctx = rcu_dereference(dbgfs_ctxs[i]); + + if (!ctx || !ctx->kdamond) + continue; + damon_for_each_target(t, dbgfs_ctxs[i]) { + struct task_struct *ts = damon_get_task_struct(t); + + if (ts) { + id1 = (unsigned long)pid_vnr((struct pid *)t->id); + id2 = (unsigned long)pid_vnr(get_task_pid(task, PIDTYPE_PID)); + put_task_struct(ts); + if (id1 == id2) + return t; + } + } + } + rcu_read_unlock(); + + return NULL; +} + +static struct damon_region *get_damon_region(struct damon_target *t, unsigned long addr) +{ + struct damon_region *r, *next; + + if (!t || !addr) + return NULL; + + spin_lock(&t->target_lock); + damon_for_each_region_safe(r, next, t) { + if (r->ar.start <= addr && r->ar.end >= addr) { + spin_unlock(&t->target_lock); + return r; + } + } + spin_unlock(&t->target_lock); + + return NULL; +} + +void damon_numa_fault(int page_nid, int node_id, struct vm_fault *vmf) +{ + struct damon_target *t; + struct damon_region *r; + + if (nr_online_nodes > 1) { + t = get_damon_target(current); + if (!t) + return; + r = get_damon_region(t, vmf->address); + if (r) { + if (page_nid == node_id) + r->local++; + else + r->remote++; + } + } +} + /* * The monitoring daemon that runs as a kernel thread */ @@ -1057,8 +1131,10 @@ static int kdamond_fn(void *data) } } damon_for_each_target(t, ctx) { + spin_lock(&t->target_lock); damon_for_each_region_safe(r, next, t) damon_destroy_region(r, t); + spin_unlock(&t->target_lock); } if (ctx->callback.before_terminate) diff --git a/mm/damon/dbgfs.c b/mm/damon/dbgfs.c index 5b899601e56c..c7f4e95abc14 100644 --- a/mm/damon/dbgfs.c +++ b/mm/damon/dbgfs.c @@ -15,11 +15,12 @@ #include <linux/page_idle.h> #include <linux/slab.h> -static struct damon_ctx **dbgfs_ctxs; -static int dbgfs_nr_ctxs; +struct damon_ctx **dbgfs_ctxs; +int dbgfs_nr_ctxs; static struct dentry **dbgfs_dirs; static DEFINE_MUTEX(damon_dbgfs_lock); + /* * Returns non-empty string on success, negative error code otherwise. */ @@ -808,10 +809,18 @@ static int dbgfs_rm_context(char *name) return -ENOMEM; } - for (i = 0, j = 0; i < dbgfs_nr_ctxs; i++) { + dbgfs_nr_ctxs--; + /* Prevent NUMA fault get the wrong value */ + smp_mb(); + + for (i = 0, j = 0; i < dbgfs_nr_ctxs + 1; i++) { if (dbgfs_dirs[i] == dir) { + struct damon_ctx *tmp_ctx = dbgfs_ctxs[i]; + + rcu_assign_pointer(dbgfs_ctxs[i], NULL); + synchronize_rcu(); debugfs_remove(dbgfs_dirs[i]); - dbgfs_destroy_ctx(dbgfs_ctxs[i]); + dbgfs_destroy_ctx(tmp_ctx); continue; } new_dirs[j] = dbgfs_dirs[i]; @@ -823,7 +832,6 @@ static int dbgfs_rm_context(char *name) dbgfs_dirs = new_dirs; dbgfs_ctxs = new_ctxs; - dbgfs_nr_ctxs--; return 0; } diff --git a/mm/damon/vaddr.c b/mm/damon/vaddr.c index 732b41ed134c..78b90972d171 100644 --- a/mm/damon/vaddr.c +++ b/mm/damon/vaddr.c @@ -22,15 +22,6 @@ #define DAMON_MIN_REGION 1 #endif -/* - * 't->id' should be the pointer to the relevant 'struct pid' having reference - * count. Caller must put the returned task, unless it is NULL. - */ -static inline struct task_struct *damon_get_task_struct(struct damon_target *t) -{ - return get_pid_task((struct pid *)t->id, PIDTYPE_PID); -} - /* * Get the mm_struct of the given target * @@ -363,7 +354,9 @@ static void damon_va_update(struct damon_ctx *ctx) damon_for_each_target(t, ctx) { if (damon_va_three_regions(t, three_regions)) continue; + spin_lock(&t->target_lock); damon_va_apply_three_regions(t, three_regions); + spin_unlock(&t->target_lock); } } diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 406a3c28c026..9cb413a8cd4a 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -34,6 +34,7 @@ #include <linux/oom.h> #include <linux/numa.h> #include <linux/page_owner.h> +#include <linux/damon.h> #include <asm/tlb.h> #include <asm/pgalloc.h> @@ -1450,6 +1451,10 @@ vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf) flags |= TNF_NO_GROUP; page_nid = page_to_nid(page); + + /* Get the NUMA accesses of monitored processes by DAMON */ + damon_numa_fault(page_nid, numa_node_id(), vmf); + last_cpupid = page_cpupid_last(page); target_nid = numa_migrate_prep(page, vma, haddr, page_nid, &flags); diff --git a/mm/memory.c b/mm/memory.c index c125c4969913..fb55264f36af 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -74,6 +74,7 @@ #include <linux/perf_event.h> #include <linux/ptrace.h> #include <linux/vmalloc.h> +#include <linux/damon.h> #include <trace/events/kmem.h> @@ -4392,6 +4393,10 @@ static vm_fault_t do_numa_page(struct vm_fault *vmf) last_cpupid = page_cpupid_last(page); page_nid = page_to_nid(page); + + /* Get the NUMA accesses of monitored processes by DAMON */ + damon_numa_fault(page_nid, numa_node_id(), vmf); + target_nid = numa_migrate_prep(page, vma, vmf->address, page_nid, &flags); if (target_nid == NUMA_NO_NODE) { -- 2.27.0