On 4/28/23 23:54, Bernd Schubert wrote: > On 4/28/23 03:44, Hillf Danton wrote: >> restart: >> for (;;) { >> spin_lock(&fiq->lock); >> --- a/include/linux/sched.h >> +++ b/include/linux/sched.h >> @@ -953,6 +953,7 @@ struct task_struct { >> /* delay due to memory thrashing */ >> unsigned in_thrashing:1; >> #endif >> + unsigned seesaw:1; >> unsigned long atomic_flags; /* Flags requiring atomic >> access. */ >> --- a/kernel/sched/fair.c >> +++ b/kernel/sched/fair.c >> @@ -7424,6 +7424,8 @@ select_task_rq_fair(struct task_struct * >> if (wake_flags & WF_TTWU) { >> record_wakee(p); >> + if (p->seesaw && current->seesaw) >> + return cpu; >> if (sched_energy_enabled()) { >> new_cpu = find_energy_efficient_cpu(p, prev_cpu); >> if (new_cpu >= 0) > > > Hmm, WF_CURRENT_CPU works rather similar, except that it tests if cpu is > in cpus_ptr? The combination of both patches results in > > if (p->seesaw && current->seesaw) > return cpu; > > if ((wake_flags & WF_CURRENT_CPU) && > cpumask_test_cpu(cpu, p->cpus_ptr)) > return cpu; > > > > While writing the mail kernel compilation is ready, but it got late, > will test in the morning. This works wonders! The fuse-uring part is this diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index cd7aa679c3ee..ec5853ca9646 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -373,6 +373,9 @@ static void request_wait_answer(struct fuse_req *req) int err; int prev_cpu = task_cpu(current); + if (fc->ring.per_core_queue) + current->seesaw = 1; + if (!fc->no_interrupt) { /* Any signal may interrupt this */ err = wait_event_interruptible(req->waitq, diff --git a/fs/fuse/dev_uring.c b/fs/fuse/dev_uring.c index 7d327699b4c5..715741ed58bf 100644 --- a/fs/fuse/dev_uring.c +++ b/fs/fuse/dev_uring.c @@ -1312,6 +1312,13 @@ int fuse_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags) /* XXX error injection or test with malicious daemon */ } + /* In combination with requesting process (application) seesaw + * setting (see request_wait_answer), the application will + * stay on the same core. + */ + if (fc->ring.per_core_queue) + current->seesaw = 1; + ret = fuse_uring_fetch(ring_ent, cmd); break; case FUSE_URING_REQ_COMMIT_AND_FETCH: I'm not familiar at all with scheduler code, given this works perfectly this suggests the same function is also called without explicit waitq, when the scheduler preempts a task? I think there might be side effects - what is if multiple applications are on one core and another core would be available? With this flag they would stay on the same core? Maybe better two flags? diff --git a/include/linux/sched.h b/include/linux/sched.h index 63d242164b1a..07783ddaec5c 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -953,6 +953,8 @@ struct task_struct { /* delay due to memory thrashing */ unsigned in_thrashing:1; #endif + unsigned seesaw_req:1; + unsigned seesaw_io:1; unsigned long atomic_flags; /* Flags requiring atomic access. */ diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index b9d6ed7585c6..474bf3657ef0 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -7605,6 +7605,13 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int wake_flags) if (wake_flags & WF_TTWU) { record_wakee(p); + /* current is handling requests on behalf of the waking process, + * both want to run on the same core in seeswaw manner. + */ + if (p->seesaw_req && current->seesaw_io && + cpumask_test_cpu(cpu, p->cpus_ptr)) + return cpu; + if ((wake_flags & WF_CURRENT_CPU) && cpumask_test_cpu(cpu, p->cpus_ptr)) return cpu; (not tested yet) Thanks, Bernd