Before Kernel 6.1, we observed the following OOPS in the stress test caused by reorder on set bit NFSD_FILE_HASHED and NFSD_FILE_PENDING, and smp_mb__after_atomic() should be a paire. Task A: Task B: nfsd_file_acquire: new = nfsd_file_alloc() open_file: refcount_inc(&nf->nf_ref); nf = nfsd_file_find_locked(); wait_for_construction: since nf_flags is zero it will not wait wait_on_bit(&nf->nf_flags, NFSD_FILE_PENDING); if (status == nfs_ok) { *pnf = nf; //OOPS happen! Unable to handle kernel NULL pointer at virtual address 0000000000000028 Mem abort info: ESR = 0x96000004 EC = 0x25: DABT (current EL), IL = 32 bits SET = 0, FnV = 0 EA = 0, S1PTW = 0 Data abort info: ISV = 0, ISS = 0x00000004 CM = 0, WnR = 0 user pgtable: 4k pages, 48-bit VAs, pgdp=0000000152546000 [0000000000000028] pgd=0000000000000000, p4d=0000000000000000 Internal error: Oops: 96000004 [#1] PREEMPT_RT SMP CPU: 7 PID: 1767 Comm: nfsd Not tainted 5.10.104 #1 pstate: 40c00005 (nZcv daif +PAN +UAO -TCO BTYPE=--) pc : nfsd_read+0x78/0x280 [nfsd] lr : nfsd_read+0x68/0x280 [nfsd] sp : ffff80001c0b3c70 x29: ffff80001c0b3c70 x28: 0000000000000000 x27: 0000000000000002 x26: ffff0000c8a3ca70 x25: ffff0000c8a45180 x24: 0000000000002000 x23: ffff0000c8a45178 x22: ffff0000c8a45008 x21: ffff0000c31aac40 x20: ffff0000c8a3c000 x19: 0000000000000000 x18: 0000000000000001 x17: 0000000000000007 x16: 00000000b35db681 x15: 0000000000000156 x14: ffff0000c3f91300 x13: 0000000000000000 x12: 0000000000000000 x11: 0000000000000000 x10: 0000000000000000 x9 : 0000000000000000 x8 : ffff000118014a80 x7 : 0000000000000002 x6 : ffff0002559142dc x5 : ffff0000c31aac40 x4 : 0000000000000004 x3 : 0000000000000001 x2 : 0000000000000000 x1 : 0000000000000001 x0 : ffff000255914280 Call trace: nfsd_read+0x78/0x280 [nfsd] nfsd3_proc_read+0x98/0xc0 [nfsd] nfsd_dispatch+0xc8/0x160 [nfsd] svc_process_common+0x440/0x790 svc_process+0xb0/0xd0 nfsd+0xfc/0x160 [nfsd] kthread+0x17c/0x1a0 ret_from_fork+0x10/0x18 Signed-off-by: Haodong Wong <haydenw.kernel@xxxxxxxxx> --- fs/nfsd/filecache.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c index e30e1ddc1ace..ba980369e6b4 100644 --- a/fs/nfsd/filecache.c +++ b/fs/nfsd/filecache.c @@ -974,8 +974,12 @@ nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, nfsd_file_slab_free(&new->nf_rcu); wait_for_construction: + /* In case of set bit NFSD_FILE_PENDING and NFSD_FILE_HASHED reorder */ + smp_rmb(); wait_on_bit(&nf->nf_flags, NFSD_FILE_PENDING, TASK_UNINTERRUPTIBLE); + /* Be a paire of smp_mb after clear bit NFSD_FILE_PENDING */ + smp_mb__after_atomic(); /* Did construction of this file fail? */ if (!test_bit(NFSD_FILE_HASHED, &nf->nf_flags)) { if (!retry) { @@ -1018,8 +1022,11 @@ nfsd_file_acquire(struct svc_rqst *rqstp, struct svc_fh *fhp, nf = new; /* Take reference for the hashtable */ refcount_inc(&nf->nf_ref); - __set_bit(NFSD_FILE_HASHED, &nf->nf_flags); __set_bit(NFSD_FILE_PENDING, &nf->nf_flags); + /* Ensure set bit order set NFSD_FILE_HASHED after set NFSD_FILE_PENDING */ + smp_wmb(); + __set_bit(NFSD_FILE_HASHED, &nf->nf_flags); + list_lru_add(&nfsd_file_lru, &nf->nf_lru); hlist_add_head_rcu(&nf->nf_node, &nfsd_file_hashtbl[hashval].nfb_head); ++nfsd_file_hashtbl[hashval].nfb_count; -- 2.25.1