The commit handling code is not safe against memory-pressure deadlocks when writing to swap. In particular, nfs_commitdata_alloc() blocks indefinitely waiting for memory, and this can consume all available workqueue threads. swap-out most likely uses STABLE writes anyway as COND_STABLE indicates that a stable write should be used if the write fits in a single request, and it normally does. However if we ever swap with a small wsize, or gather unusually large numbers of pages for a single write, this might change. For safety, make it explicit in the code that direct writes use for swap must always use FLUSH_COND_STABLE. Signed-off-by: NeilBrown <neilb@xxxxxxx> --- fs/nfs/direct.c | 12 +++++++----- fs/nfs/file.c | 2 +- include/linux/nfs_fs.h | 3 ++- 3 files changed, 10 insertions(+), 7 deletions(-) diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 1e80d243ba25..8d3b12402725 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -173,7 +173,7 @@ ssize_t nfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter) if (iov_iter_rw(iter) == READ) return nfs_file_direct_read(iocb, iter); - return nfs_file_direct_write(iocb, iter); + return nfs_file_direct_write(iocb, iter, FLUSH_STABLE); } static void nfs_direct_release_pages(struct page **pages, unsigned int npages) @@ -789,7 +789,7 @@ static const struct nfs_pgio_completion_ops nfs_direct_write_completion_ops = { */ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, struct iov_iter *iter, - loff_t pos) + loff_t pos, int ioflags) { struct nfs_pageio_descriptor desc; struct inode *inode = dreq->inode; @@ -797,7 +797,7 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, size_t requested_bytes = 0; size_t wsize = max_t(size_t, NFS_SERVER(inode)->wsize, PAGE_SIZE); - nfs_pageio_init_write(&desc, inode, FLUSH_COND_STABLE, false, + nfs_pageio_init_write(&desc, inode, ioflags, false, &nfs_direct_write_completion_ops); desc.pg_dreq = dreq; get_dreq(dreq); @@ -875,6 +875,7 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, * nfs_file_direct_write - file direct write operation for NFS files * @iocb: target I/O control block * @iter: vector of user buffers from which to write data + * @ioflags: flags for nfs_pageio_init_write() * * We use this function for direct writes instead of calling * generic_file_aio_write() in order to avoid taking the inode @@ -891,7 +892,8 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, * Note that O_APPEND is not supported for NFS direct writes, as there * is no atomic O_APPEND write facility in the NFS protocol. */ -ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter) +ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter, + int ioflags) { ssize_t result, requested; size_t count; @@ -935,7 +937,7 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter) nfs_start_io_direct(inode); - requested = nfs_direct_write_schedule_iovec(dreq, iter, pos); + requested = nfs_direct_write_schedule_iovec(dreq, iter, pos, ioflags); if (mapping->nrpages) { invalidate_inode_pages2_range(mapping, diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 59c271f42ea5..878a6a510a5e 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -630,7 +630,7 @@ ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from) result = generic_write_checks(iocb, from); if (result <= 0) return result; - return nfs_file_direct_write(iocb, from); + return nfs_file_direct_write(iocb, from, FLUSH_COND_STABLE); } dprintk("NFS: write(%pD2, %zu@%Ld)\n", diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 5a605e51f4b1..ca312aea6bec 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -509,7 +509,8 @@ extern ssize_t nfs_direct_IO(struct kiocb *, struct iov_iter *); extern ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter); extern ssize_t nfs_file_direct_write(struct kiocb *iocb, - struct iov_iter *iter); + struct iov_iter *iter, + int ioflags); /* * linux/fs/nfs/dir.c