[PATCH v6 22/34] nfs: Pin pages rather than ref'ing if appropriate

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Convert the NFS direct I/O code to use iov_iter_extract_pages() instead of
iov_iter_get_pages().  This will pin pages or leave them unaltered rather
than getting a ref on them as appropriate to the iterator.

The pages need to be pinned for DIO-read rather than having refs taken on
them to prevent VM copy-on-write from malfunctioning during a concurrent
fork() (the result of the I/O would otherwise end up only visible to the
child process and not the parent).

Signed-off-by: David Howells <dhowells@xxxxxxxxxx>
cc: Trond Myklebust <trond.myklebust@xxxxxxxxxxxxxxx>
cc: Anna Schumaker <anna@xxxxxxxxxx>
cc: Jeff Layton <jlayton@xxxxxxxxxx>
cc: linux-nfs@xxxxxxxxxxxxxxx
---

 fs/nfs/direct.c |   32 ++++++++++++++++++--------------
 1 file changed, 18 insertions(+), 14 deletions(-)

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 42af84685f20..4a3108db2cb6 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -142,11 +142,15 @@ int nfs_swap_rw(struct kiocb *iocb, struct iov_iter *iter)
 	return 0;
 }
 
-static void nfs_direct_release_pages(struct page **pages, unsigned int npages)
+static void nfs_direct_release_pages(struct page **pages, unsigned int npages,
+				     unsigned int cleanup_mode)
 {
 	unsigned int i;
-	for (i = 0; i < npages; i++)
-		put_page(pages[i]);
+
+	if (cleanup_mode) {
+		for (i = 0; i < npages; i++)
+			page_put_unpin(pages[i], cleanup_mode);
+	}
 }
 
 void nfs_init_cinfo_from_dreq(struct nfs_commit_info *cinfo,
@@ -327,17 +331,16 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
 	inode_dio_begin(inode);
 
 	while (iov_iter_count(iter)) {
-		struct page **pagevec;
+		struct page **pagevec = NULL;
 		size_t bytes;
 		size_t pgbase;
 		unsigned npages, i;
 
-		result = iov_iter_get_pages_alloc(iter, &pagevec,
-						  rsize, &pgbase,
-						  FOLL_DEST_BUF);
+		result = iov_iter_extract_pages(iter, &pagevec, rsize, INT_MAX,
+						FOLL_DEST_BUF, &pgbase);
 		if (result < 0)
 			break;
-	
+
 		bytes = result;
 		npages = (result + pgbase + PAGE_SIZE - 1) / PAGE_SIZE;
 		for (i = 0; i < npages; i++) {
@@ -363,7 +366,8 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
 			pos += req_len;
 			dreq->bytes_left -= req_len;
 		}
-		nfs_direct_release_pages(pagevec, npages);
+		nfs_direct_release_pages(pagevec, npages,
+					 iov_iter_extract_mode(iter, FOLL_DEST_BUF));
 		kvfree(pagevec);
 		if (result < 0)
 			break;
@@ -787,14 +791,13 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
 
 	NFS_I(inode)->write_io += iov_iter_count(iter);
 	while (iov_iter_count(iter)) {
-		struct page **pagevec;
+		struct page **pagevec = NULL;
 		size_t bytes;
 		size_t pgbase;
 		unsigned npages, i;
 
-		result = iov_iter_get_pages_alloc(iter, &pagevec,
-						  wsize, &pgbase,
-						  FOLL_SOURCE_BUF);
+		result = iov_iter_extract_pages(iter, &pagevec, wsize, INT_MAX,
+						FOLL_SOURCE_BUF, &pgbase);
 		if (result < 0)
 			break;
 
@@ -831,7 +834,8 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
 			pos += req_len;
 			dreq->bytes_left -= req_len;
 		}
-		nfs_direct_release_pages(pagevec, npages);
+		nfs_direct_release_pages(pagevec, npages,
+					 iov_iter_extract_mode(iter, FOLL_SOURCE_BUF));
 		kvfree(pagevec);
 		if (result < 0)
 			break;





[Index of Archives]     [Linux RAID]     [Linux SCSI]     [Linux ATA RAID]     [IDE]     [Linux Wireless]     [Linux Kernel]     [ATH6KL]     [Linux Bluetooth]     [Linux Netdev]     [Kernel Newbies]     [Security]     [Git]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Device Mapper]

  Powered by Linux