FIXME: currently there's only one path that requires async layout return to avoid blocking of the rpciod like the stack below. We really neee the pnfs state machine instead. rpciod/0 D 0000003d8c2d83b0 0 1029 2 0x00000000 778ccdd0 60264f00 77a6a000 778ccb20 77a6ba20 6001375b 77a6ba20 77a6a000 77a6a000 778cc8a0 77a6ba70 601b192f 77a6ba50 601b35a6 6ed537f0 77a6a000 77a6bb00 63506fd8 7b1870c8 77a6bb10 77a6ba90 7b187100 63506fd8 77a6a000 Call Trace: 77a6b9f8: [<6001375b>] _switch_to+0x5e/0xae 77a6ba28: [<601b192f>] schedule+0x1dd/0x21b 77a6ba38: [<601b35a6>] _raw_spin_unlock_irqrestore+0x18/0x1c 77a6ba60: [<7b1870c8>] rpc_wait_bit_killable+0x0/0x3c [sunrpc] 77a6ba78: [<7b187100>] rpc_wait_bit_killable+0x38/0x3c [sunrpc] 77a6ba98: [<601b1d9f>] __wait_on_bit+0x43/0x76 77a6bae8: [<601b1e43>] out_of_line_wait_on_bit+0x71/0x7c 77a6baf8: [<7b1870c8>] rpc_wait_bit_killable+0x0/0x3c [sunrpc] 77a6bb20: [<600430d6>] wake_bit_function+0x0/0x2e 77a6bb68: [<7b18709d>] __rpc_wait_for_completion_task+0x34/0x36 [sunrpc] 77a6bb78: [<7bf36048>] nfs4_wait_for_completion_rpc_task+0xb/0xd [nfs] 77a6bb88: [<7bf369ac>] pnfs4_proc_layoutreturn+0xae/0xe6 [nfs] 77a6bbc8: [<6007c8b1>] kmem_cache_alloc+0xaf/0xbe 77a6bc08: [<7bf4dc39>] _pnfs_return_layout+0x450/0x4d4 [nfs] 77a6bc38: [<7bf3cb79>] decode_attr_time+0x17/0x4d [nfs] 77a6bc58: [<7bf429d7>] decode_getfattr+0xaae/0xc02 [nfs] 77a6bcb8: [<7bf4601b>] __nfs4_close+0x15d/0x17a [nfs] 77a6bd28: [<7bf46053>] nfs4_close_state+0xb/0xd [nfs] 77a6bd38: [<7bf34183>] nfs4_close_context+0x26/0x28 [nfs] 77a6bd48: [<7bf2315c>] __put_nfs_open_context+0x79/0xa1 [nfs] 77a6bd78: [<7bf23227>] put_nfs_open_context+0xb/0xd [nfs] 77a6bd88: [<7bf4b763>] pnfs_layoutcommit_done+0xa5/0xad [nfs] 77a6bdb8: [<7bf3b12e>] pnfs_layoutcommit_rpc_done+0x39/0x56 [nfs] 77a6bde8: [<7b18712b>] rpc_exit_task+0x27/0x52 [sunrpc] 77a6be08: [<7b187817>] __rpc_execute+0x88/0x23a [sunrpc] 77a6be30: [<7b1879ee>] rpc_async_schedule+0x0/0x12 [sunrpc] 77a6be48: [<7b1879fe>] rpc_async_schedule+0x10/0x12 [sunrpc] 77a6be58: [<6003fc45>] worker_thread+0x114/0x1a5 77a6be80: [<600430a2>] autoremove_wake_function+0x0/0x34 77a6beb8: [<6003fb31>] worker_thread+0x0/0x1a5 77a6bed8: [<60042cf7>] kthread+0x8e/0x96 77a6bf48: [<60021429>] run_kernel_thread+0x41/0x4a 77a6bf58: [<60042c69>] kthread+0x0/0x96 77a6bf98: [<60021410>] run_kernel_thread+0x28/0x4a 77a6bfc8: [<600136d3>] new_thread_handler+0x71/0x9b [missing from FIXME: async layout return] Signed-off-by: Benny Halevy <bhalevy@xxxxxxxxxxx> --- fs/nfs/callback_proc.c | 7 ++++--- fs/nfs/inode.c | 2 +- fs/nfs/nfs4proc.c | 17 ++++++++++------- fs/nfs/nfs4state.c | 4 ++-- fs/nfs/pnfs.c | 21 +++++++++++++-------- fs/nfs/pnfs.h | 9 +++++---- 6 files changed, 35 insertions(+), 25 deletions(-) diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c index b3aa9e4..05e4592 100644 --- a/fs/nfs/callback_proc.c +++ b/fs/nfs/callback_proc.c @@ -209,7 +209,7 @@ static int pnfs_recall_layout(void *data) if (rl.cbl_recall_type == RETURN_FILE) { status = pnfs_return_layout(inode, &rl.cbl_seg, &rl.cbl_stateid, - RETURN_FILE); + RETURN_FILE, true); if (status) dprintk("%s RETURN_FILE error: %d\n", __func__, status); goto out; @@ -221,12 +221,13 @@ static int pnfs_recall_layout(void *data) /* FIXME: This loop is inefficient, running in O(|s_inodes|^2) */ while ((ino = nfs_layoutrecall_find_inode(clp, &rl)) != NULL) { /* XXX need to check status on pnfs_return_layout */ - pnfs_return_layout(ino, &rl.cbl_seg, NULL, RETURN_FILE); + pnfs_return_layout(ino, &rl.cbl_seg, NULL, RETURN_FILE, true); iput(ino); } /* send final layoutreturn */ - status = pnfs_return_layout(inode, &rl.cbl_seg, NULL, rl.cbl_recall_type); + status = pnfs_return_layout(inode, &rl.cbl_seg, NULL, + rl.cbl_recall_type, true); if (status) printk(KERN_INFO "%s: ignoring pnfs_return_layout status=%d\n", __func__, status); diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index a574492..8661036 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -1388,7 +1388,7 @@ void nfs4_clear_inode(struct inode *inode) /* First call standard NFS clear_inode() code */ nfs_clear_inode(inode); #ifdef CONFIG_PNFS - pnfs_return_layout(inode, NULL, NULL, RETURN_FILE); + pnfs_return_layout(inode, NULL, NULL, RETURN_FILE, true); #endif /* CONFIG_PNFS */ } #endif diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 162a119..3324a35 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -1079,7 +1079,8 @@ static void pnfs4_layout_reclaim(struct nfs4_state *state) /* FIXME: send gratuitous layout commits and return with the reclaim * flag during grace period */ - pnfs_return_layout(state->inode, NULL, &state->open_stateid, RETURN_FILE); + pnfs_return_layout(state->inode, NULL, &state->open_stateid, + RETURN_FILE, true); pnfs_set_layout_stateid(&NFS_I(state->inode)->layout, &zero_stateid); #endif /* CONFIG_PNFS */ } @@ -2371,7 +2372,7 @@ pnfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr, if (pnfs_enabled_sb(server) && has_layout(nfsi) && pnfs_ld_layoutret_on_setattr(server->pnfs_curr_ld)) - pnfs_return_layout(inode, NULL, NULL, RETURN_FILE); + pnfs_return_layout(inode, NULL, NULL, RETURN_FILE, true); return nfs4_proc_setattr(dentry, fattr, sattr); } #endif /* CONFIG_PNFS */ @@ -5725,7 +5726,7 @@ static const struct rpc_call_ops nfs4_pnfs_layoutreturn_call_ops = { .rpc_release = nfs4_pnfs_layoutreturn_release, }; -int pnfs4_proc_layoutreturn(struct nfs4_pnfs_layoutreturn *lrp) +int pnfs4_proc_layoutreturn(struct nfs4_pnfs_layoutreturn *lrp, bool wait) { struct inode *ino = lrp->args.inode; struct nfs_server *server = NFS_SERVER(ino); @@ -5742,7 +5743,7 @@ int pnfs4_proc_layoutreturn(struct nfs4_pnfs_layoutreturn *lrp) .callback_data = lrp, .flags = RPC_TASK_ASYNC, }; - int status; + int status = 0; dprintk("--> %s\n", __func__); lrp->res.seq_res.sr_slotid = NFS4_MAX_SLOT_TABLE; @@ -5751,9 +5752,11 @@ int pnfs4_proc_layoutreturn(struct nfs4_pnfs_layoutreturn *lrp) status = PTR_ERR(task); goto out; } - status = nfs4_wait_for_completion_rpc_task(task); - if (status == 0) - status = task->tk_status; + if (wait) { + status = nfs4_wait_for_completion_rpc_task(task); + if (status == 0) + status = task->tk_status; + } rpc_put_task(task); out: dprintk("<-- %s\n", __func__); diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 30bff3b..a415e9d 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -590,7 +590,7 @@ static void __nfs4_close(struct path *path, struct nfs4_state *state, fmode_t fm struct nfs_inode *nfsi = NFS_I(state->inode); if (nfsi->layoutcommit_ctx) - pnfs_layoutcommit_inode(state->inode, 0); + pnfs_layoutcommit_inode(state->inode, wait); if (has_layout(nfsi) && nfsi->layout.roc_iomode) { struct nfs4_pnfs_layout_segment range; @@ -598,7 +598,7 @@ static void __nfs4_close(struct path *path, struct nfs4_state *state, fmode_t fm range.offset = 0; range.length = NFS4_MAX_UINT64; pnfs_return_layout(state->inode, &range, NULL, - RETURN_FILE); + RETURN_FILE, wait); } #endif /* CONFIG_PNFS */ nfs4_do_close(path, state, wait); diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 6819509..51a4a9c 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -680,7 +680,8 @@ pnfs_return_layout_barrier(struct nfs_inode *nfsi, static int return_layout(struct inode *ino, struct nfs4_pnfs_layout_segment *range, const nfs4_stateid *stateid, /* optional */ - enum pnfs_layoutreturn_type type, struct pnfs_layout_type *lo) + enum pnfs_layoutreturn_type type, struct pnfs_layout_type *lo, + bool wait) { struct nfs4_pnfs_layoutreturn *lrp; struct nfs_server *server = NFS_SERVER(ino); @@ -705,7 +706,7 @@ return_layout(struct inode *ino, struct nfs4_pnfs_layout_segment *range, else if (lo) pnfs_get_layout_stateid(&lrp->args.stateid, lo); - status = pnfs4_proc_layoutreturn(lrp); + status = pnfs4_proc_layoutreturn(lrp, wait); out: dprintk("<-- %s status: %d\n", __func__, status); return status; @@ -714,7 +715,8 @@ out: int _pnfs_return_layout(struct inode *ino, struct nfs4_pnfs_layout_segment *range, const nfs4_stateid *stateid, /* optional */ - enum pnfs_layoutreturn_type type) + enum pnfs_layoutreturn_type type, + bool wait) { struct pnfs_layout_type *lo = NULL; struct nfs_inode *nfsi = NFS_I(ino); @@ -760,7 +762,7 @@ _pnfs_return_layout(struct inode *ino, struct nfs4_pnfs_layout_segment *range, } if (nfsi->layoutcommit_ctx) { - status = pnfs_layoutcommit_inode(ino, 1); + status = pnfs_layoutcommit_inode(ino, wait); if (status) { dprintk("%s: layoutcommit failed, status=%d. " "Returning layout anyway\n", @@ -770,7 +772,7 @@ _pnfs_return_layout(struct inode *ino, struct nfs4_pnfs_layout_segment *range, } } send_return: - status = return_layout(ino, &arg, stateid, type, lo); + status = return_layout(ino, &arg, stateid, type, lo, wait); out: dprintk("<-- %s status: %d\n", __func__, status); return status; @@ -1697,7 +1699,8 @@ pnfs_writeback_done(struct nfs_write_data *data) .length = data->args.count, }; dprintk("%s: retrying\n", __func__); - _pnfs_return_layout(data->inode, &range, NULL, RETURN_FILE); + _pnfs_return_layout(data->inode, &range, NULL, RETURN_FILE, + true); pnfs_initiate_write(data, NFS_CLIENT(data->inode), pdata->call_ops, pdata->how); } @@ -1828,7 +1831,8 @@ pnfs_read_done(struct nfs_read_data *data) .length = data->args.count, }; dprintk("%s: retrying\n", __func__); - _pnfs_return_layout(data->inode, &range, NULL, RETURN_FILE); + _pnfs_return_layout(data->inode, &range, NULL, RETURN_FILE, + true); pnfs_initiate_read(data, NFS_CLIENT(data->inode), pdata->call_ops); } @@ -2054,7 +2058,8 @@ pnfs_commit_done(struct nfs_write_data *data) .length = data->args.count, }; dprintk("%s: retrying\n", __func__); - _pnfs_return_layout(data->inode, &range, NULL, RETURN_FILE); + _pnfs_return_layout(data->inode, &range, NULL, RETURN_FILE, + true); pnfs_initiate_commit(data, NFS_CLIENT(data->inode), pdata->call_ops, pdata->how); } diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 1adf357..832ddf8 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -29,7 +29,7 @@ extern int nfs4_pnfs_getdeviceinfo(struct super_block *sb, struct pnfs_device *dev); extern int pnfs4_proc_layoutget(struct nfs4_pnfs_layoutget *lgp); extern int pnfs4_proc_layoutcommit(struct pnfs_layoutcommit_data *data); -extern int pnfs4_proc_layoutreturn(struct nfs4_pnfs_layoutreturn *lrp); +extern int pnfs4_proc_layoutreturn(struct nfs4_pnfs_layoutreturn *lrp, bool wait); /* pnfs.c */ extern const nfs4_stateid zero_stateid; @@ -40,7 +40,7 @@ int pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx, int _pnfs_return_layout(struct inode *, struct nfs4_pnfs_layout_segment *, const nfs4_stateid *stateid, /* optional */ - enum pnfs_layoutreturn_type); + enum pnfs_layoutreturn_type, bool wait); void set_pnfs_layoutdriver(struct super_block *sb, struct nfs_fh *fh, u32 id); void unmount_pnfs_layoutdriver(struct super_block *sb); int pnfs_use_read(struct inode *inode, ssize_t count); @@ -244,14 +244,15 @@ static inline void pnfs_modify_new_request(struct nfs_page *req, static inline int pnfs_return_layout(struct inode *ino, struct nfs4_pnfs_layout_segment *lseg, const nfs4_stateid *stateid, /* optional */ - enum pnfs_layoutreturn_type type) + enum pnfs_layoutreturn_type type, + bool wait) { struct nfs_inode *nfsi = NFS_I(ino); struct nfs_server *nfss = NFS_SERVER(ino); if (pnfs_enabled_sb(nfss) && (type != RETURN_FILE || has_layout(nfsi))) - return _pnfs_return_layout(ino, lseg, stateid, type); + return _pnfs_return_layout(ino, lseg, stateid, type, wait); return 0; } -- 1.6.6.1 -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html