Hi
Thanks for your patch. I've tested it and confirmed it solves the problem
we found.
This is how I reproduce the problem and verify whether the problem is
solved.
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index df9669d4ded7..7d9fd0166f91 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -3642,6 +3642,7 @@ static void nfs4_close_done(struct rpc_task *task,
void *data)
.inode = calldata->inode,
.stateid = &calldata->arg.stateid,
};
+ static int fault_inject = 1;
if (!nfs4_sequence_done(task, &calldata->res.seq_res))
return;
@@ -3655,6 +3656,11 @@ static void nfs4_close_done(struct rpc_task
*task, void *data)
/* hmm. we are done with the inode, and in the process of freeing
* the state_owner. we keep this around to process errors
*/
+ if (fault_inject) {
+ fault_inject = 0;
+ printk("%s %d inject err to tk_status\n", __func__,
__LINE__);
+ task->tk_status = -NFS4ERR_OLD_STATEID;
+ }
switch (task->tk_status) {
case 0:
res_stateid = &calldata->res.stateid;
@@ -3701,6 +3707,9 @@ static void nfs4_close_done(struct rpc_task *task,
void *data)
return;
out_restart:
task->tk_status = 0;
+ printk("%s %d sleep before restart task %px(tk_rpc_status
%d)...\n", __func__, __LINE__, task, task->tk_rpc_status);
+ msleep(10 * 1000);
+ printk("%s %d sleep done\n", __func__, __LINE__);
rpc_restart_call_prepare(task);
goto out_release;
}
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 2fe88ea79a70..78bc9739df82 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -1968,6 +1968,11 @@ rpc_xdr_encode(struct rpc_task *task)
static void
call_encode(struct rpc_task *task)
{
+ static int count = 0;
+ if (count++ > 10000) {
+ count = 0;
+ printk("call encode for task %px 10000 times\n", task);
+ }
if (!rpc_task_need_encode(task))
goto out;
1) reproduce the problem -- get an rpc_task that keeps looping.
[2025-02-05 14:19:50] Fedora 26 (Twenty Six)
[2025-02-05 14:19:50] Kernel 6.14.0-rc1-00028-g5c8c229261f1-dirty on an
x86_64 (ttyS0)
[2025-02-05 14:19:50]
[2025-02-05 14:19:50] nfs_test2 login: root
[2025-02-05 14:28:28] Password:
[2025-02-05 14:28:31] Last login: Wed Feb 5 14:01:32 from 192.168.240.1
[2025-02-05 14:28:33] [root@nfs_test2 ~]# mkfs.ext4 -F /dev/sdb
[2025-02-05 14:30:56] mke2fs 1.46.5 (30-Dec-2021)
[2025-02-05 14:30:56] /dev/sdb contains a ext4 file system
[2025-02-05 14:30:56] last mounted on /mnt/sdb on Wed Feb 5
11:23:04 2025
[2025-02-05 14:30:56] Discarding device blocks: 0/5242880
done
[2025-02-05 14:30:56] Creating filesystem with 5242880 4k blocks and
1310720 inodes
[2025-02-05 14:30:56] Filesystem UUID: 5acab554-5ba5-4ad4-a449-8cc69b69bcf1
[2025-02-05 14:30:56] Superblock backups stored on blocks:
[2025-02-05 14:30:56] 32768, 98304, 163840, 229376, 294912, 819200,
884736, 1605632, 2654208,
[2025-02-05 14:30:56] 4096000
[2025-02-05 14:30:56]
[2025-02-05 14:30:56] Allocating group tables: 0/160 done
[2025-02-05 14:30:56] Writing inode tables: 0/160 done
[2025-02-05 14:30:56] Creating journal (32768 blocks): done
[2025-02-05 14:30:56] Writing superblocks and filesystem accounting
information: 0/160 done
[2025-02-05 14:30:57]
[2025-02-05 14:30:57] [root@nfs_test2 ~]# mount /dev/sdb /mnt/sdb
[2025-02-05 14:30:57] [root@nfs_test2 ~]# echo 123 >
/mnt/sdb/nfs_testfile1209
[2025-02-05 14:30:57] [root@nfs_test2 ~]# echo "/mnt
*(rw,no_root_squash,fsid=0)" > /etc/exports
[2025-02-05 14:30:57] [root@nfs_test2 ~]# echo "/mnt/sdb
*(rw,no_root_squash,fsid=1)" >> /etc/exports
[2025-02-05 14:30:57] [root@nfs_test2 ~]# systemctl restart nfs-server
[2025-02-05 14:31:00] [root@nfs_test2 ~]# mount -t nfs -o
rw,soft,vers=4.1 127.0.0.1:/sdb /mnt/sdbb
[2025-02-05 14:31:08] [root@nfs_test2 ~]# cat /mnt/sdbb/nfs_testfile1209 &
[2025-02-05 14:31:11] [1] 2815
[2025-02-05 14:31:11] [root@nfs_test2 ~]# 123
[2025-02-05 14:31:11] [ 744.234368][ T121] nfs4_close_done 3661
inject err to tk_status
[2025-02-05 14:31:16]
[2025-02-05 14:31:16] [root@nfs_test2 ~]# [ 749.297829][ T121]
nfs4_close_done 3710 sleep before restart task
ffff88812718b080(tk_rpc_status 0)...
[2025-02-05 14:31:18]
[2025-02-05 14:31:18] [root@nfs_test2 ~]# umount -f /mnt/sdbb
[2025-02-05 14:31:19] umount.nfs4: /mnt/sdbb: device is busy
[2025-02-05 14:31:19] [root@nfs_test2 ~]# [ 759.537900][ T121]
nfs4_close_done 3712 sleep done
[2025-02-05 14:31:27] [ 759.610608][ T121] call encode for task
ffff88812718b080 10000 times
[2025-02-05 14:31:27] [ 759.671807][ T121] call encode for task
ffff88812718b080 10000 times
[2025-02-05 14:31:27] [ 759.733124][ T121] call encode for task
ffff88812718b080 10000 times
[2025-02-05 14:31:27] [ 759.794334][ T121] call encode for task
ffff88812718b080 10000 times
[2025-02-05 14:31:27] [ 759.855741][ T121] call encode for task
ffff88812718b080 10000 times
[2025-02-05 14:31:27] [ 759.917215][ T121] call encode for task
ffff88812718b080 10000 times
[2025-02-05 14:31:27] [ 759.978567][ T121] call encode for task
ffff88812718b080 10000 times
[2025-02-05 14:31:27] [ 760.039793][ T121] call encode for task
ffff88812718b080 10000 times
[2025-02-05 14:31:27] [ 760.101126][ T121] call encode for task
ffff88812718b080 10000 times
[2025-02-05 14:31:27] [ 760.162415][ T121] call encode for task
ffff88812718b080 10000 times
[2025-02-05 14:31:27] [ 760.223674][ T121] call encode for task
ffff88812718b080 10000 times
[2025-02-05 14:31:27] [ 760.284931][ T121] call encode for task
ffff88812718b080 10000 times
[2025-02-05 14:31:28] [ 760.346262][ T121] call encode for task
ffff88812718b080 10000 times
[2025-02-05 14:31:28] [ 760.407437][ T121] call encode for task
ffff88812718b080 10000 times
[2025-02-05 14:31:28] [ 760.468764][ T121] call encode for task
ffff88812718b080 10000 times
[2025-02-05 14:31:28] [ 760.530053][ T121] call encode for task
ffff88812718b080 10000 times
[2025-02-05 14:31:28] [ 760.591361][ T121] call encode for task
ffff88812718b080 10000 times
[2025-02-05 14:31:28] [ 760.652477][ T121] call encode for task
ffff88812718b080 10000 times
[2025-02-05 14:31:28] [ 760.713855][ T121] call encode for task
ffff88812718b080 10000 times
[2025-02-05 14:31:28] [ 760.775173][ T121] call encode for task
ffff88812718b080 10000 times
[2025-02-05 14:31:28] [ 760.836712][ T121] call encode for task
ffff88812718b080 10000 times
[2025-02-05 14:31:28] [ 760.897993][ T121] call encode for task
ffff88812718b080 10000 times
[2025-02-05 14:31:28] [ 760.959219][ T121] call encode for task
ffff88812718b080 10000 times
[2025-02-05 14:31:28] [ 761.020468][ T121] call encode for task
ffff88812718b080 10000 times
[2025-02-05 14:31:28] [ 761.081690][ T121] call encode for task
ffff88812718b080 10000 times
[2025-02-05 14:31:28] [ 761.142917][ T121] call encode for task
ffff88812718b080 10000 times
[2025-02-05 14:31:28] [ 761.204055][ T121] call encode for task
ffff88812718b080 10000 times
[2025-02-05 14:31:28] [ 761.265389][ T121] call encode for task
ffff88812718b080 10000 times
[2025-02-05 14:31:29] [ 761.326620][ T121] call encode for task
ffff88812718b080 10000 times
[2025-02-05 14:31:29] [ 761.387846][ T121] call encode for task
ffff88812718b080 10000 times
[2025-02-05 14:31:29] [ 761.449004][ T121] call encode for task
ffff88812718b080 10000 times
[2025-02-05 14:31:29] [ 761.510219][ T121] call encode for task
ffff88812718b080 10000 times
[2025-02-05 14:31:29] [ 761.571452][ T121] call encode for task
ffff88812718b080 10000 times
[2025-02-05 14:31:29] [ 761.632751][ T121] call encode for task
ffff88812718b080 10000 times
2) verify whether the problem has been solved -- no rpc_task is looping
[2025-02-05 14:51:38] Fedora 26 (Twenty Six)
[2025-02-05 14:51:38] Kernel 6.14.0-rc1-00028-g5c8c229261f1-dirty on an
x86_64 (ttyS0)
[2025-02-05 14:51:38]
[2025-02-05 14:51:38] nfs_test2 login: root
[2025-02-05 15:01:15] Password:
[2025-02-05 15:01:17] Last login: Wed Feb 5 14:22:35 on ttyS0
[2025-02-05 15:01:20] [root@nfs_test2 ~]# mkfs.ext4 -F /dev/sdb
[2025-02-05 15:01:25] mke2fs 1.46.5 (30-Dec-2021)
[2025-02-05 15:01:25] /dev/sdb contains a ext4 file system
[2025-02-05 15:01:25] last mounted on /mnt/sdb on Wed Feb 5
14:25:01 2025
[2025-02-05 15:01:25] Discarding device blocks: 0/5242880
done
[2025-02-05 15:01:25] Creating filesystem with 5242880 4k blocks and
1310720 inodes
[2025-02-05 15:01:25] Filesystem UUID: ba1443f4-2501-4002-b860-95480332c82a
[2025-02-05 15:01:25] Superblock backups stored on blocks:
[2025-02-05 15:01:25] 32768, 98304, 163840, 229376, 294912, 819200,
884736, 1605632, 2654208,
[2025-02-05 15:01:25] 4096000
[2025-02-05 15:01:25]
[2025-02-05 15:01:25] Allocating group tables: 0/160 done
[2025-02-05 15:01:25] Writing inode tables: 0/160 done
[2025-02-05 15:01:25] Creating journal (32768 blocks): done
[2025-02-05 15:01:25] Writing superblocks and filesystem accounting
information: 0/160 done
[2025-02-05 15:01:25]
[2025-02-05 15:01:25] [root@nfs_test2 ~]# mount /dev/sdb /mnt/sdb
[2025-02-05 15:01:25] [root@nfs_test2 ~]# echo 123 >
/mnt/sdb/nfs_testfile1209
[2025-02-05 15:01:25] [root@nfs_test2 ~]# echo "/mnt
*(rw,no_root_squash,fsid=0)" > /etc/exports
[2025-02-05 15:01:25] [root@nfs_test2 ~]# echo "/mnt/sdb
*(rw,no_root_squash,fsid=1)" >> /etc/exports
[2025-02-05 15:01:25] [root@nfs_test2 ~]# systemctl restart nfs-server
[2025-02-05 15:01:29] [root@nfs_test2 ~]#
[2025-02-05 15:01:31] [root@nfs_test2 ~]# mount -t nfs -o
rw,soft,vers=4.1 127.0.0.1:/sdb /mnt/sdbb
[2025-02-05 15:01:37] [root@nfs_test2 ~]# cat /mnt/sdbb/nfs_testfile1209 &
[2025-02-05 15:01:44] [1] 2818
[2025-02-05 15:01:44] [root@nfs_test2 ~]# 123
[2025-02-05 15:01:44] [ 668.110248][ T115] nfs4_close_done 3661
inject err to tk_status
[2025-02-05 15:01:44]
[2025-02-05 15:01:44] [root@nfs_test2 ~]# [ 673.366194][ T115]
nfs4_close_done 3710 sleep before restart task
ffff88811d5d1080(tk_rpc_status 0)...
[2025-02-05 15:01:50]
[2025-02-05 15:01:50] [root@nfs_test2 ~]#
[2025-02-05 15:01:50] [root@nfs_test2 ~]# umount -f /mnt/sdbb
[2025-02-05 15:01:51] umount.nfs4: /mnt/sdbb: device is busy
[2025-02-05 15:01:51] [root@nfs_test2 ~]#
[2025-02-05 15:01:53] [root@nfs_test2 ~]# [ 683.606203][ T115]
nfs4_close_done 3712 sleep done
[2025-02-05 15:02:02]
[2025-02-05 15:02:02] [1]+ Done cat
/mnt/sdbb/nfs_testfile1209
[2025-02-05 15:02:02] [root@nfs_test2 ~]#
[2025-02-05 15:02:28] [root@nfs_test2 ~]#
[2025-02-05 15:02:29] [root@nfs_test2 ~]#
[2025-02-05 15:02:29] [root@nfs_test2 ~]#
在 2025/2/2 4:00, trondmy@xxxxxxxxxx 写道:
From: Trond Myklebust <trond.myklebust@xxxxxxxxxxxxxxx>
If rpc_signal_task() is called while a task is in an rpc_call_done()
callback function, and the latter calls rpc_restart_call(), the task can
end up looping due to the RPC_TASK_SIGNALLED flag being set without the
tk_rpc_status being set.
Removing the redundant mechanism for signalling the task fixes the
looping behaviour.
Reported-by: Li Lingfeng <lilingfeng3@xxxxxxxxxx>
Fixes: 39494194f93b ("SUNRPC: Fix races with rpc_killall_tasks()")
Signed-off-by: Trond Myklebust <trond.myklebust@xxxxxxxxxxxxxxx>
---
include/linux/sunrpc/sched.h | 3 +--
include/trace/events/sunrpc.h | 3 +--
net/sunrpc/sched.c | 2 --
3 files changed, 2 insertions(+), 6 deletions(-)
diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h
index fec1e8a1570c..eac57914dcf3 100644
--- a/include/linux/sunrpc/sched.h
+++ b/include/linux/sunrpc/sched.h
@@ -158,7 +158,6 @@ enum {
RPC_TASK_NEED_XMIT,
RPC_TASK_NEED_RECV,
RPC_TASK_MSG_PIN_WAIT,
- RPC_TASK_SIGNALLED,
};
#define rpc_test_and_set_running(t) \
@@ -171,7 +170,7 @@ enum {
#define RPC_IS_ACTIVATED(t) test_bit(RPC_TASK_ACTIVE, &(t)->tk_runstate)
-#define RPC_SIGNALLED(t) test_bit(RPC_TASK_SIGNALLED, &(t)->tk_runstate)
+#define RPC_SIGNALLED(t) (READ_ONCE(task->tk_rpc_status) == -ERESTARTSYS)
/*
* Task priorities.
diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h
index b13dc275ef4a..851841336ee6 100644
--- a/include/trace/events/sunrpc.h
+++ b/include/trace/events/sunrpc.h
@@ -360,8 +360,7 @@ TRACE_EVENT(rpc_request,
{ (1UL << RPC_TASK_ACTIVE), "ACTIVE" }, \
{ (1UL << RPC_TASK_NEED_XMIT), "NEED_XMIT" }, \
{ (1UL << RPC_TASK_NEED_RECV), "NEED_RECV" }, \
- { (1UL << RPC_TASK_MSG_PIN_WAIT), "MSG_PIN_WAIT" }, \
- { (1UL << RPC_TASK_SIGNALLED), "SIGNALLED" })
+ { (1UL << RPC_TASK_MSG_PIN_WAIT), "MSG_PIN_WAIT" })
DECLARE_EVENT_CLASS(rpc_task_running,
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index cef623ea1506..9b45fbdc90ca 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -864,8 +864,6 @@ void rpc_signal_task(struct rpc_task *task)
if (!rpc_task_set_rpc_status(task, -ERESTARTSYS))
return;
trace_rpc_task_signalled(task, task->tk_action);
- set_bit(RPC_TASK_SIGNALLED, &task->tk_runstate);
- smp_mb__after_atomic();
queue = READ_ONCE(task->tk_waitqueue);
if (queue)
rpc_wake_up_queued_task(queue, task);