[PATCH 4/5] [net/9p] Achieve zero copy on write path.

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



This patch avoids copy_from_user by employing get_user_pages_fast() on the
udata buffer. This will eliminate an additonal copy of user buffer into
kernel buffer befre placing on the virtio ring.

Signed-off-by: Venkateswararao Jujjuri <jvrao@xxxxxxxxxxxxxxxxxx>
Signed-off-by: Badari Pulavarty <pbadari@xxxxxxxxxx>
---
 net/9p/client.c   |   32 ++++++++++++++++++++-
 net/9p/protocol.c |   83 +++++++++++++++++++++++++++++++++++++++++++++++------
 2 files changed, 105 insertions(+), 10 deletions(-)

diff --git a/net/9p/client.c b/net/9p/client.c
index 5487896..7ce58fb 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -36,6 +36,7 @@
 #include <linux/parser.h>
 #include <net/9p/client.h>
 #include <net/9p/transport.h>
+#include <linux/mm.h>
 #include "protocol.h"
 
 /*
@@ -521,6 +522,25 @@ static int p9_client_flush(struct p9_client *c, struct p9_req_t *oldreq)
 }
 
 /**
+ * p9_release_req_pages - Release pages after the transaction.
+ * @req - Request buffer.
+ *
+ */
+static void
+p9_release_req_pages(struct p9_req_t *req)
+{
+	int i = 0;
+	while (req->tc->pdata[i] && req->tc->pdata_mapped_pages--) {
+		put_page(req->tc->pdata[i]);
+		req->tc->pdata[i] = NULL;
+		i++;
+	}
+	req->tc->pdata_write_len = 0;
+	req->tc->pdata_read_len = 0;
+}
+
+
+/**
  * p9_client_rpc - issue a request and wait for a response
  * @c: client session
  * @type: type of request
@@ -575,6 +595,8 @@ p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...)
 	err = c->trans_mod->request(c, req);
 	if (err < 0) {
 		c->status = Disconnected;
+		if (req->tc->pdata_write_len || req->tc->pdata_read_len)
+			p9_release_req_pages(req);
 		goto reterr;
 	}
 
@@ -583,6 +605,8 @@ p9_client_rpc(struct p9_client *c, int8_t type, const char *fmt, ...)
 						req->status >= REQ_STATUS_RCVD);
 	P9_DPRINTK(P9_DEBUG_MUX, "wait %p tag: %d returned %d\n",
 						req->wq, tag, err);
+	if (req->tc->pdata_write_len || req->tc->pdata_read_len)
+		p9_release_req_pages(req);
 
 	if (req->status == REQ_STATUS_ERROR) {
 		P9_DPRINTK(P9_DEBUG_ERROR, "req_status error %d\n", req->t_err);
@@ -1331,9 +1355,15 @@ p9_client_write(struct p9_fid *fid, char *data, const char __user *udata,
 	if (data)
 		req = p9_client_rpc(clnt, P9_TWRITE, "dqD", fid->fid, offset,
 								rsize, data);
-	else
+	else {
+		if (clnt->trans_mod->capability &&
+			clnt->trans_mod->capability(P9_CAP_GET_MAX_SG_PAGES)) {
+
+			rsize = count;
+		}
 		req = p9_client_rpc(clnt, P9_TWRITE, "dqU", fid->fid, offset,
 								rsize, udata);
+	}
 	if (IS_ERR(req)) {
 		err = PTR_ERR(req);
 		goto error;
diff --git a/net/9p/protocol.c b/net/9p/protocol.c
index ca63aff..97f313d 100644
--- a/net/9p/protocol.c
+++ b/net/9p/protocol.c
@@ -31,9 +31,12 @@
 #include <linux/slab.h>
 #include <linux/sched.h>
 #include <linux/types.h>
+#include <linux/parser.h>
 #include <net/9p/9p.h>
 #include <net/9p/client.h>
 #include "protocol.h"
+#include <net/9p/transport.h>
+#include <linux/pagemap.h>
 
 #ifndef MIN
 #define MIN(a, b) (((a) < (b)) ? (a) : (b))
@@ -110,6 +113,51 @@ static size_t pdu_read(struct p9_fcall *pdu, void *data, size_t size)
 	return size - len;
 }
 
+static int
+pdu_fill_pages(struct p9_fcall *pdu, const char __user *udata, size_t size,
+		int rw, int max_sg_pages)
+{
+	int nr_pages;
+	uint32_t first_page_bytes = 0;
+	int pdata_len;
+
+	nr_pages = size >> PAGE_SHIFT;
+	pdu->pdata_off = (size_t)udata & (PAGE_SIZE-1);
+	if (pdu->pdata_off)
+		first_page_bytes = PAGE_SIZE - pdu->pdata_off;
+	if (size - (first_page_bytes + (nr_pages << PAGE_SHIFT))) {
+		/* trailing partial page */
+		nr_pages++;
+	}
+	if (first_page_bytes) {
+		/* leading partial page */
+		nr_pages++;
+	}
+	nr_pages = min(max_sg_pages, nr_pages);
+	pdu->pdata = (struct page **)(pdu->sdata + pdu->size);
+	pdu->pdata_write_len = 0;
+	pdu->pdata_read_len = 0;
+	pdu->pdata_mapped_pages = get_user_pages_fast((unsigned long)udata,
+			nr_pages, rw, pdu->pdata);
+	if (pdu->pdata_mapped_pages < 0) {
+		printk(KERN_WARNING "get_user_pages_fast failed:%d udata:%p"
+				"nr_pages:%d\n", pdu->pdata_mapped_pages,
+				udata, nr_pages);
+		pdu->pdata_mapped_pages = 0;
+		return -1;
+	}
+	if (pdu->pdata_off) {
+		pdata_len = first_page_bytes;
+		pdata_len += min((size - pdata_len),
+				((size_t)pdu->pdata_mapped_pages - 1) <<
+								PAGE_SHIFT);
+	} else {
+		pdata_len = min(size, (size_t)pdu->pdata_mapped_pages <<
+								PAGE_SHIFT);
+	}
+	return pdata_len;
+}
+
 static size_t pdu_write(struct p9_fcall *pdu, const void *data, size_t size)
 {
 	size_t len = MIN(pdu->capacity - pdu->size, size);
@@ -119,15 +167,31 @@ static size_t pdu_write(struct p9_fcall *pdu, const void *data, size_t size)
 }
 
 static size_t
-pdu_write_u(struct p9_fcall *pdu, const char __user *udata, size_t size)
+pdu_write_u(struct p9_fcall *pdu, struct p9_client *c, const char __user *udata,
+		size_t size)
 {
-	size_t len = MIN(pdu->capacity - pdu->size, size);
-	int err = copy_from_user(&pdu->sdata[pdu->size], udata, len);
-	if (err)
-		printk(KERN_WARNING "pdu_write_u returning: %d\n", err);
+	size_t len;
+	int err;
+	int max_req_sg_pages = 0;
 
-	pdu->size += len;
-	return size - len;
+	if (c->trans_mod->capability &&
+			(udata && !segment_eq(get_fs(), KERNEL_DS))) {
+		max_req_sg_pages =
+			c->trans_mod->capability(P9_CAP_GET_MAX_SG_PAGES);
+	}
+	if (max_req_sg_pages) {
+		len = pdu_fill_pages(pdu, udata, size, 0, max_req_sg_pages);
+		if (len < 0)
+			return len;
+		pdu->pdata_write_len = len;
+	} else {
+		len = MIN(pdu->capacity - pdu->size, size);
+		err = copy_from_user(&pdu->sdata[pdu->size], udata, len);
+		if (err)
+			printk(KERN_WARNING "pdu_write_u returning: %d\n", err);
+		pdu->size += len;
+	}
+	return len;
 }
 
 /*
@@ -467,7 +531,8 @@ p9pdu_vwritef(struct p9_fcall *pdu, struct p9_client *c, const char *fmt,
 				const char __user *udata =
 						va_arg(ap, const void __user *);
 				errcode = p9pdu_writef(pdu, c, "d", count);
-				if (!errcode && pdu_write_u(pdu, udata, count))
+				if (!errcode &&
+					pdu_write_u(pdu, c, udata, count) < 0)
 					errcode = -EFAULT;
 			}
 			break;
-- 
1.6.5.2

--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [Samba]     [Device Mapper]     [CEPH Development]
  Powered by Linux