Re: fuse uring / wake_up on the same core

Bernd Schubert <bschubert@xxxxxxx> · Mon, 1 May 2023 21:44:48 +0000

On 4/28/23 23:54, Bernd Schubert wrote:
> On 4/28/23 03:44, Hillf Danton wrote:
>>    restart:
>>       for (;;) {
>>           spin_lock(&fiq->lock);
>> --- a/include/linux/sched.h
>> +++ b/include/linux/sched.h
>> @@ -953,6 +953,7 @@ struct task_struct {
>>       /* delay due to memory thrashing */
>>       unsigned                        in_thrashing:1;
>>   #endif
>> +    unsigned             seesaw:1;
>>       unsigned long            atomic_flags; /* Flags requiring atomic 
>> access. */
>> --- a/kernel/sched/fair.c
>> +++ b/kernel/sched/fair.c
>> @@ -7424,6 +7424,8 @@ select_task_rq_fair(struct task_struct *
>>       if (wake_flags & WF_TTWU) {
>>           record_wakee(p);
>> +        if (p->seesaw && current->seesaw)
>> +            return cpu;
>>           if (sched_energy_enabled()) {
>>               new_cpu = find_energy_efficient_cpu(p, prev_cpu);
>>               if (new_cpu >= 0)
> 
> 
> Hmm, WF_CURRENT_CPU works rather similar, except that it tests if cpu is 
> in cpus_ptr?  The combination of both patches results in
> 
>          if (p->seesaw && current->seesaw)
>              return cpu;
> 
>          if ((wake_flags & WF_CURRENT_CPU) &&
>              cpumask_test_cpu(cpu, p->cpus_ptr))
>              return cpu;
> 
> 
> 
> While writing the mail kernel compilation is ready, but it got late, 
> will test in the morning.

This works wonders!  The fuse-uring part is this

diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index cd7aa679c3ee..ec5853ca9646 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -373,6 +373,9 @@ static void request_wait_answer(struct fuse_req *req)
         int err;
         int prev_cpu = task_cpu(current);
  
+       if (fc->ring.per_core_queue)
+               current->seesaw = 1;
+
         if (!fc->no_interrupt) {
                 /* Any signal may interrupt this */
                 err = wait_event_interruptible(req->waitq,
diff --git a/fs/fuse/dev_uring.c b/fs/fuse/dev_uring.c
index 7d327699b4c5..715741ed58bf 100644
--- a/fs/fuse/dev_uring.c
+++ b/fs/fuse/dev_uring.c
@@ -1312,6 +1312,13 @@ int fuse_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags)
                         /* XXX error injection or test with malicious daemon */
                 }
  
+               /* In combination with requesting process (application) seesaw
+                * setting (see request_wait_answer), the application will
+                * stay on the same core.
+                */
+               if (fc->ring.per_core_queue)
+                       current->seesaw = 1;
+
                 ret = fuse_uring_fetch(ring_ent, cmd);
                 break;
         case FUSE_URING_REQ_COMMIT_AND_FETCH:




I'm not familiar at all with scheduler code,
given this works perfectly this suggests the same function is also
called without explicit waitq, when the scheduler preempts a task?

I think there might be side effects - what is if multiple
applications are on one core and another core would be available?
With this flag they would stay on the same core? Maybe better two flags?

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 63d242164b1a..07783ddaec5c 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -953,6 +953,8 @@ struct task_struct {
         /* delay due to memory thrashing */
         unsigned                        in_thrashing:1;
  #endif
+       unsigned                        seesaw_req:1;
+       unsigned                        seesaw_io:1;
  
         unsigned long                   atomic_flags; /* Flags requiring atomic access. */
  
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index b9d6ed7585c6..474bf3657ef0 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -7605,6 +7605,13 @@ select_task_rq_fair(struct task_struct *p, int prev_cpu, int wake_flags)
         if (wake_flags & WF_TTWU) {
                 record_wakee(p);
  
+               /* current is handling requests on behalf of the waking process,
+                * both want to run on the same core in seeswaw manner.
+                */
+               if (p->seesaw_req && current->seesaw_io &&
+                   cpumask_test_cpu(cpu, p->cpus_ptr))
+                       return cpu;
+
                 if ((wake_flags & WF_CURRENT_CPU) &&
                     cpumask_test_cpu(cpu, p->cpus_ptr))
                         return cpu;

(not tested yet)


Thanks,
Bernd