Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
Cc: Ingo Molnar <mingo@xxxxxxxxxx>
Cc: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx>
Signed-off-by: Gabriele Monaco <gmonaco@xxxxxxxxxx>
---
include/linux/mm_types.h | 16 ++++++----
include/linux/sched.h | 1 -
kernel/sched/core.c | 66 +++++-----------------------------------
kernel/sched/sched.h | 7 -----
4 files changed, 18 insertions(+), 72 deletions(-)
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index d56948a74254..16076e70a6b9 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -829,12 +829,6 @@ struct mm_struct {
* runqueue locks.
*/
struct mm_cid __percpu *pcpu_cid;
- /*
- * @mm_cid_next_scan: Next mm_cid scan (in jiffies).
- *
- * When the next mm_cid scan is due (in jiffies).
- */
- unsigned long mm_cid_next_scan;
/**
* @nr_cpus_allowed: Number of CPUs allowed for mm.
*
@@ -857,6 +851,7 @@ struct mm_struct {
* mm nr_cpus_allowed updates.
*/
raw_spinlock_t cpus_allowed_lock;
+ struct delayed_work mm_cid_work;
#endif
#ifdef CONFIG_MMU
atomic_long_t pgtables_bytes; /* size of all page tables */
@@ -1145,11 +1140,16 @@ static inline void vma_iter_init(struct vma_iterator *vmi,
#ifdef CONFIG_SCHED_MM_CID
+#define SCHED_MM_CID_PERIOD_NS (100ULL * 1000000) /* 100ms */
+#define MM_CID_SCAN_DELAY 100 /* 100ms */
+
enum mm_cid_state {
MM_CID_UNSET = -1U, /* Unset state has lazy_put flag set. */
MM_CID_LAZY_PUT = (1U << 31),
};
+extern void task_mm_cid_work(struct work_struct *work);
+
static inline bool mm_cid_is_unset(int cid)
{
return cid == MM_CID_UNSET;
@@ -1222,12 +1222,16 @@ static inline int mm_alloc_cid_noprof(struct mm_struct *mm, struct task_struct *
if (!mm->pcpu_cid)
return -ENOMEM;
mm_init_cid(mm, p);
+ INIT_DELAYED_WORK(&mm->mm_cid_work, task_mm_cid_work);
+ schedule_delayed_work(&mm->mm_cid_work,
+ msecs_to_jiffies(MM_CID_SCAN_DELAY));
return 0;
}
#define mm_alloc_cid(...) alloc_hooks(mm_alloc_cid_noprof(__VA_ARGS__))
static inline void mm_destroy_cid(struct mm_struct *mm)
{
+ disable_delayed_work_sync(&mm->mm_cid_work);
free_percpu(mm->pcpu_cid);
mm->pcpu_cid = NULL;
}
diff --git a/include/linux/sched.h b/include/linux/sched.h
index d380bffee2ef..5d141c310917 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1374,7 +1374,6 @@ struct task_struct {
int last_mm_cid; /* Most recent cid in mm */
int migrate_from_cpu;
int mm_cid_active; /* Whether cid bitmap is active */
- struct callback_head cid_work;
#endif
struct tlbflush_unmap_batch tlb_ubc;
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index c6d8232ad9ee..30d78fe14eff 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -4516,7 +4516,6 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
p->wake_entry.u_flags = CSD_TYPE_TTWU;
p->migration_pending = NULL;
#endif
- init_sched_mm_cid(p);
}
DEFINE_STATIC_KEY_FALSE(sched_numa_balancing);
@@ -5654,7 +5653,6 @@ void sched_tick(void)
resched_latency = cpu_resched_latency(rq);
calc_global_load_tick(rq);
sched_core_tick(rq);
- task_tick_mm_cid(rq, donor);
scx_tick(rq);
rq_unlock(rq, &rf);
@@ -10520,38 +10518,17 @@ static void sched_mm_cid_remote_clear_weight(struct mm_struct *mm, int cpu,
sched_mm_cid_remote_clear(mm, pcpu_cid, cpu);
}
-static void task_mm_cid_work(struct callback_head *work)
+void task_mm_cid_work(struct work_struct *work)
{
- unsigned long now = jiffies, old_scan, next_scan;
- struct task_struct *t = current;
struct cpumask *cidmask;
- struct mm_struct *mm;
+ struct delayed_work *delayed_work = container_of(work, struct delayed_work, work);
+ struct mm_struct *mm = container_of(delayed_work, struct mm_struct, mm_cid_work);
int weight, cpu;
- SCHED_WARN_ON(t != container_of(work, struct task_struct, cid_work));
-
- work->next = work; /* Prevent double-add */
- if (t->flags & PF_EXITING)
- return;
- mm = t->mm;
- if (!mm)
- return;
- old_scan = READ_ONCE(mm->mm_cid_next_scan);
- next_scan = now + msecs_to_jiffies(MM_CID_SCAN_DELAY);
- if (!old_scan) {
- unsigned long res;
-
- res = cmpxchg(&mm->mm_cid_next_scan, old_scan, next_scan);
- if (res != old_scan)
- old_scan = res;
- else
- old_scan = next_scan;
- }
- if (time_before(now, old_scan))
- return;
- if (!try_cmpxchg(&mm->mm_cid_next_scan, &old_scan, next_scan))
- return;
cidmask = mm_cidmask(mm);
+ /* Nothing to clear for now */
+ if (cpumask_empty(cidmask))
+ goto out;
/* Clear cids that were not recently used. */
for_each_possible_cpu(cpu)
sched_mm_cid_remote_clear_old(mm, cpu);
@@ -10562,35 +10539,8 @@ static void task_mm_cid_work(struct callback_head *work)
*/
for_each_possible_cpu(cpu)
sched_mm_cid_remote_clear_weight(mm, cpu, weight);
-}
-
-void init_sched_mm_cid(struct task_struct *t)
-{
- struct mm_struct *mm = t->mm;
- int mm_users = 0;
-
- if (mm) {
- mm_users = atomic_read(&mm->mm_users);
- if (mm_users == 1)
- mm->mm_cid_next_scan = jiffies + msecs_to_jiffies(MM_CID_SCAN_DELAY);
- }
- t->cid_work.next = &t->cid_work; /* Protect against double add */
- init_task_work(&t->cid_work, task_mm_cid_work);
-}
-
-void task_tick_mm_cid(struct rq *rq, struct task_struct *curr)
-{
- struct callback_head *work = &curr->cid_work;
- unsigned long now = jiffies;
-
- if (!curr->mm || (curr->flags & (PF_EXITING | PF_KTHREAD)) ||
- work->next != work)
- return;
- if (time_before(now, READ_ONCE(curr->mm->mm_cid_next_scan)))
- return;
-
- /* No page allocation under rq lock */
- task_work_add(curr, work, TWA_RESUME | TWAF_NO_ALLOC);
+out:
+ schedule_delayed_work(delayed_work, msecs_to_jiffies(MM_CID_SCAN_DELAY));
}
void sched_mm_cid_exit_signals(struct task_struct *t)
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index b50dcd908702..f3b0d1d86622 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -3581,16 +3581,11 @@ extern void sched_dynamic_update(int mode);
#ifdef CONFIG_SCHED_MM_CID
-#define SCHED_MM_CID_PERIOD_NS (100ULL * 1000000) /* 100ms */
-#define MM_CID_SCAN_DELAY 100 /* 100ms */
-
extern raw_spinlock_t cid_lock;
extern int use_cid_lock;
extern void sched_mm_cid_migrate_from(struct task_struct *t);
extern void sched_mm_cid_migrate_to(struct rq *dst_rq, struct task_struct *t);
-extern void task_tick_mm_cid(struct rq *rq, struct task_struct *curr);
-extern void init_sched_mm_cid(struct task_struct *t);
static inline void __mm_cid_put(struct mm_struct *mm, int cid)
{
@@ -3858,8 +3853,6 @@ static inline void switch_mm_cid(struct rq *rq,
static inline void switch_mm_cid(struct rq *rq, struct task_struct *prev, struct task_struct *next) { }
static inline void sched_mm_cid_migrate_from(struct task_struct *t) { }
static inline void sched_mm_cid_migrate_to(struct rq *dst_rq, struct task_struct *t) { }
-static inline void task_tick_mm_cid(struct rq *rq, struct task_struct *curr) { }
-static inline void init_sched_mm_cid(struct task_struct *t) { }
#endif /* !CONFIG_SCHED_MM_CID */
extern u64 avg_vruntime(struct cfs_rq *cfs_rq);