Re: [Qemu-devel] [PATCH 2/2] 9p: v9fs new readpages.

Alexander Graf <agraf@xxxxxxx> · Tue, 25 Oct 2016 16:13:46 +0200

On 10/10/2016 07:24 PM, Edward Shishkin wrote:
Modify v9fs private ->readpages() method of address_space
operations for merging pages into long 9p messages.

Signed-off-by: Edward Shishkin <edward.shishkin@xxxxxxxxx>
---
  fs/9p/vfs_addr.c | 156 ++++++++++++++++++++++++++++++++++++++++++++++++++++++-
  1 file changed, 155 insertions(+), 1 deletion(-)

diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c
index e871886..4ad248e 100644
--- a/fs/9p/vfs_addr.c
+++ b/fs/9p/vfs_addr.c
@@ -34,6 +34,7 @@
  #include <linux/idr.h>
  #include <linux/sched.h>
  #include <linux/uio.h>
+#include <linux/slab.h>
  #include <net/9p/9p.h>
  #include <net/9p/client.h>
  #include <trace/events/writeback.h>
@@ -99,6 +100,148 @@ static int v9fs_vfs_readpage(struct file *filp, struct page *page)
  	return v9fs_fid_readpage(filp->private_data, page);
  }
  
+/*
+ * Context for "fast readpages"
+ */
+struct v9fs_readpages_ctx {
+	struct file *filp;
+	struct address_space *mapping;
+	pgoff_t start_index; /* index of the first page with actual data */
+	char *buf; /* buffer with actual data */
+	int len; /* length of the actual data */
+	int num_pages; /* maximal data chunk (in pages) that can be
+			  passed per transmission */
+};
+
+static int init_readpages_ctx(struct v9fs_readpages_ctx *ctx,
+			      struct file *filp,
+			      struct address_space *mapping,
+			      int num_pages)
+{
+	memset(ctx, 0, sizeof(*ctx));
+	ctx->buf = kmalloc(num_pages << PAGE_SHIFT, GFP_USER);

Doesn't this a) have potential information leak to user space and b) 
allow user space to allocate big amounts of kernel memory? A limited 
page pool would probably be better here.

I also don't quite grasp yet what pattern you're actually optimizing. 
Basically you're doing implicit read-ahead on behalf of the reader, 
right? So why would that be faster in a random 4k read scenario? Also, 
have you compared tmpfs hosted files to only benchmark the transmission 
path?

+	if (!ctx->buf)
+		return -ENOMEM;
+	ctx->filp = filp;
+	ctx->mapping = mapping;
+	ctx->num_pages = num_pages;
+	return 0;
+}
+
+static void done_readpages_ctx(struct v9fs_readpages_ctx *ctx)
+{
+	kfree(ctx->buf);
+}
+
+static int receive_buffer(struct file *filp,
+			  char *buf,
+			  off_t offset, /* offset in the file */
+			  int len,
+			  int *err)
+{
+	struct kvec kvec;
+	struct iov_iter iter;
+
+	kvec.iov_base = buf;
+	kvec.iov_len = len;
+	iov_iter_kvec(&iter, READ | ITER_KVEC, &kvec, 1, len);
+
+	return p9_client_read(filp->private_data, offset, &iter, err);
+}
+
+static int fast_filler(struct v9fs_readpages_ctx *ctx, struct page *page)
+{
+	int err;
+	int ret = 0;
+	char *kdata;
+	int to_page;
+	off_t off_in_buf;
+	struct inode *inode = page->mapping->host;
+
+	BUG_ON(!PageLocked(page));
+	/*
+	 * first, validate the buffer
+	 */
+	if (page->index < ctx->start_index ||
+	    ctx->start_index + ctx->num_pages < page->index) {
+		/*
+		 * No actual data in the buffer,
+		 * so actualize it
+		 */
+		ret = receive_buffer(ctx->filp,
+				     ctx->buf,
+				     page_offset(page),
+				     ctx->num_pages << PAGE_SHIFT,

Doesn't this potentially read beyond the end of the file?


Alex

+				     &err);
+		if (err) {
+			printk("failed to receive buffer off=%llu (%d)\n",
+			       (unsigned long long)page_offset(page),
+			       err);
+			ret = err;
+			goto done;
+		}
+		ctx->start_index = page->index;
+		ctx->len = ret;
+		ret = 0;
+	}
+	/*
+	 * fill the page with buffer's data
+	 */
+	off_in_buf = (page->index - ctx->start_index) << PAGE_SHIFT;
+	if (off_in_buf >= ctx->len) {
+		/*
+		 * No actual data to fill the page with
+		 */
+		ret = -1;
+		goto done;
+	}
+	to_page = ctx->len - off_in_buf;
+	if (to_page >= PAGE_SIZE)
+		to_page = PAGE_SIZE;
+
+	kdata = kmap_atomic(page);
+	memcpy(kdata, ctx->buf + off_in_buf, to_page);
+	memset(kdata + to_page, 0, PAGE_SIZE - to_page);
+	kunmap_atomic(kdata);
+
+	flush_dcache_page(page);
+	SetPageUptodate(page);
+	v9fs_readpage_to_fscache(inode, page);
+ done:
+	unlock_page(page);
+	return ret;
+}
+
+/**
+ * Try to read pages by groups. For every such group we issue only one
+ * read request to the server.
+ * @num_pages: maximal chunk of data (in pages) that can be passed per
+ * such request
+ */
+static int v9fs_readpages_tryfast(struct file *filp,
+				  struct address_space *mapping,
+				  struct list_head *pages,
+				  int num_pages)
+{
+	int ret;
+	struct v9fs_readpages_ctx ctx;
+
+	ret = init_readpages_ctx(&ctx, filp, mapping, num_pages);
+	if (ret)
+		/*
+		 * Can not allocate resources for the fast path,
+		 * so do it by slow way
+		 */
+		return read_cache_pages(mapping, pages,
+					(void *)v9fs_vfs_readpage, filp);
+
+	else
+		ret = read_cache_pages(mapping, pages,
+				       (void *)fast_filler, &ctx);
+	done_readpages_ctx(&ctx);
+	return ret;
+}
+
  /**
   * v9fs_vfs_readpages - read a set of pages from 9P
   *
@@ -114,6 +257,7 @@ static int v9fs_vfs_readpages(struct file *filp, struct address_space *mapping,
  {
  	int ret = 0;
  	struct inode *inode;
+	struct v9fs_flush_set *fset;
  
  	inode = mapping->host;
  	p9_debug(P9_DEBUG_VFS, "inode: %p file: %p\n", inode, filp);
@@ -122,7 +266,17 @@ static int v9fs_vfs_readpages(struct file *filp, struct address_space *mapping,
  	if (ret == 0)
  		return ret;
  
-	ret = read_cache_pages(mapping, pages, (void *)v9fs_vfs_readpage, filp);
+	fset = v9fs_inode2v9ses(mapping->host)->flush;
+	if (!fset)
+		/*
+		 * Do it by slow way
+		 */
+		ret = read_cache_pages(mapping, pages,
+				       (void *)v9fs_vfs_readpage, filp);
+	else
+		ret = v9fs_readpages_tryfast(filp, mapping,
+					     pages, fset->num_pages);
+
  	p9_debug(P9_DEBUG_VFS, "  = %d\n", ret);
  	return ret;
  }

--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html