Re: do_generic_mapping_read abuse in gfs2

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hi,

On Sat, 2007-07-28 at 19:42 +0200, Christoph Hellwig wrote:
> I was looking into cleaning up the the read actor mess now that sendfile
> uses splice and notices gfs2 now uses do_generic_mapping_read.
> 
> The use is rather odd because it's used for reading small structures
> from kernelspace and thus doesn't actually needs the fullblown
> do_generic_mapping_read at all.  Any chance you could switch it to
> a simple read_cache_page() so we can kill do_generic_mapping_read?

Something like the following perhaps? I wrote this last night & have
given it a very quick test today and it seems to work. It does mostly
whats required, but its missing readahead. Thats no problem when this is
called to deal with quota data, but it is an issue when the rindex is
involved. We read the rindex on mount and potentially again in the case
of file system expansion, it consists of a number of small structures
are you point out which are always read sequentially and the total
number of which is proportional to the filesystem size (potentially
quite large).

So although I've left it out for now, I'd like to be able to slot in
some kind of readahead at some stage,

Steve.

diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 34f7bcd..d2fc44f 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -31,7 +31,6 @@
 #include "log.h"
 #include "meta_io.h"
 #include "ops_address.h"
-#include "ops_file.h"
 #include "ops_inode.h"
 #include "quota.h"
 #include "rgrp.h"
diff --git a/fs/gfs2/ops_address.c b/fs/gfs2/ops_address.c
index 42a5f58..824e094 100644
--- a/fs/gfs2/ops_address.c
+++ b/fs/gfs2/ops_address.c
@@ -19,6 +19,7 @@
 #include <linux/writeback.h>
 #include <linux/gfs2_ondisk.h>
 #include <linux/lm_interface.h>
+#include <linux/swap.h>
 
 #include "gfs2.h"
 #include "incore.h"
@@ -31,7 +32,6 @@
 #include "quota.h"
 #include "trans.h"
 #include "rgrp.h"
-#include "ops_file.h"
 #include "super.h"
 #include "util.h"
 #include "glops.h"
@@ -230,62 +230,111 @@ static int stuffed_readpage(struct gfs2_inode *ip, struct page *page)
 
 
 /**
- * gfs2_readpage - readpage with locking
- * @file: The file to read a page for. N.B. This may be NULL if we are
- * reading an internal file.
+ * __gfs2_readpage - readpage
+ * @file: The file to read a page for
  * @page: The page to read
  *
- * Returns: errno
+ * This is the core of gfs2's readpage. Its used by the internal file
+ * reading code as in that case we already hold the glock. Also its
+ * called by gfs2_readpage() once the required lock has been granted.
+ *
  */
 
-static int gfs2_readpage(struct file *file, struct page *page)
+static int __gfs2_readpage(void *file, struct page *page)
 {
 	struct gfs2_inode *ip = GFS2_I(page->mapping->host);
 	struct gfs2_sbd *sdp = GFS2_SB(page->mapping->host);
-	struct gfs2_file *gf = NULL;
-	struct gfs2_holder gh;
 	int error;
-	int do_unlock = 0;
 
-	if (likely(file != &gfs2_internal_file_sentinel)) {
-		if (file) {
-			gf = file->private_data;
-			if (test_bit(GFF_EXLOCK, &gf->f_flags))
-				/* gfs2_sharewrite_fault has grabbed the ip->i_gl already */
-				goto skip_lock;
-		}
-		gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME|LM_FLAG_TRY_1CB, &gh);
-		do_unlock = 1;
-		error = gfs2_glock_nq_atime(&gh);
-		if (unlikely(error))
-			goto out_unlock;
-	}
-
-skip_lock:
 	if (gfs2_is_stuffed(ip)) {
 		error = stuffed_readpage(ip, page);
 		unlock_page(page);
-	} else
+	} else {
 		error = mpage_readpage(page, gfs2_get_block);
+	}
 
 	if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
-		error = -EIO;
+		return -EIO;
 
-	if (do_unlock) {
-		gfs2_glock_dq_m(1, &gh);
-		gfs2_holder_uninit(&gh);
-	}
-out:
 	return error;
-out_unlock:
-	unlock_page(page);
+}
+
+/**
+ * gfs2_readpage - read a page of a file
+ * @file: The file to read
+ * @page: The page of the file
+ *
+ * This deals with the locking required. If the GFF_EXLOCK flags is set
+ * then we already hold the glock (due to page fault) and thus we call
+ * __gfs2_readpage() directly. Otherwise we use a trylock in order to
+ * avoid the page lock / glock ordering problems returning AOP_TRUNCATED_PAGE
+ * in the event that we are unable to get the lock.
+ */
+
+static int gfs2_readpage(struct file *file, struct page *page)
+{
+	struct gfs2_inode *ip = GFS2_I(page->mapping->host);
+	struct gfs2_file *gf = file ? file->private_data : NULL;
+	struct gfs2_holder gh;
+	int error;
+
+	if (test_bit(GFF_EXLOCK, &gf->f_flags))
+		return __gfs2_readpage(file, page);
+
+	gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME|LM_FLAG_TRY_1CB, &gh);
+	error = gfs2_glock_nq_atime(&gh);
+	if (unlikely(error))
+		goto out;
+	error = __gfs2_readpage(file, page);
+	gfs2_glock_dq(&gh);
+out:
+	gfs2_holder_uninit(&gh);
 	if (error == GLR_TRYFAILED) {
-		error = AOP_TRUNCATED_PAGE;
 		yield();
+		return AOP_TRUNCATED_PAGE;
 	}
-	if (do_unlock)
-		gfs2_holder_uninit(&gh);
-	goto out;
+	return error;
+}
+
+/**
+ * gfs2_internal_read - read an internal file
+ * @ip: The gfs2 inode
+ * @ra_state: The readahead state (or NULL for no readahead)
+ * @buf: The buffer to fill
+ * @pos: The file position
+ * @size: The amount to read
+ *
+ */
+
+int gfs2_internal_read(struct gfs2_inode *ip, struct file_ra_state *ra_state,
+                       char *buf, loff_t *pos, unsigned size)
+{
+	struct address_space *mapping = ip->i_inode.i_mapping;
+	unsigned long index = *pos / PAGE_CACHE_SIZE;
+	unsigned offset = *pos & (PAGE_CACHE_SIZE - 1);
+	unsigned copied = 0;
+	unsigned amt;
+	struct page *page;
+	void *p;
+
+	do {
+		amt = size - copied;
+		if (offset + size > PAGE_CACHE_SIZE)
+			amt = PAGE_CACHE_SIZE - offset;
+		page = read_cache_page(mapping, index, __gfs2_readpage, NULL);
+		if (IS_ERR(page))
+			return PTR_ERR(page);
+		p = kmap_atomic(page, KM_USER0);
+		memcpy(buf + copied, p + offset, amt);
+		kunmap_atomic(p, KM_USER0);
+		mark_page_accessed(page);
+		page_cache_release(page);
+		copied += amt;
+		index++;
+		offset = 0;
+	} while(copied < size);
+	(*pos) += size;
+	return size;
 }
 
 /**
@@ -313,21 +362,19 @@ static int gfs2_readpages(struct file *file, struct address_space *mapping,
 	int ret = 0;
 	int do_unlock = 0;
 
-	if (likely(file != &gfs2_internal_file_sentinel)) {
-		if (file) {
-			struct gfs2_file *gf = file->private_data;
-			if (test_bit(GFF_EXLOCK, &gf->f_flags))
-				goto skip_lock;
-		}
-		gfs2_holder_init(ip->i_gl, LM_ST_SHARED,
-				 LM_FLAG_TRY_1CB|GL_ATIME, &gh);
-		do_unlock = 1;
-		ret = gfs2_glock_nq_atime(&gh);
-		if (ret == GLR_TRYFAILED)
-			goto out_noerror;
-		if (unlikely(ret))
-			goto out_unlock;
+	if (file) {
+		struct gfs2_file *gf = file->private_data;
+		if (test_bit(GFF_EXLOCK, &gf->f_flags))
+			goto skip_lock;
 	}
+	gfs2_holder_init(ip->i_gl, LM_ST_SHARED,
+			 LM_FLAG_TRY_1CB|GL_ATIME, &gh);
+	do_unlock = 1;
+	ret = gfs2_glock_nq_atime(&gh);
+	if (ret == GLR_TRYFAILED)
+		goto out_noerror;
+	if (unlikely(ret))
+		goto out_unlock;
 skip_lock:
 	if (!gfs2_is_stuffed(ip))
 		ret = mpage_readpages(mapping, pages, nr_pages, gfs2_get_block);
diff --git a/fs/gfs2/ops_address.h b/fs/gfs2/ops_address.h
index fa1b5b3..e8fe83f 100644
--- a/fs/gfs2/ops_address.h
+++ b/fs/gfs2/ops_address.h
@@ -18,5 +18,8 @@ extern const struct address_space_operations gfs2_file_aops;
 extern int gfs2_get_block(struct inode *inode, sector_t lblock,
 			  struct buffer_head *bh_result, int create);
 extern int gfs2_releasepage(struct page *page, gfp_t gfp_mask);
+extern int gfs2_internal_read(struct gfs2_inode *ip,
+			      struct file_ra_state *ra_state,
+			      char *buf, loff_t *pos, unsigned size);
 
 #endif /* __OPS_ADDRESS_DOT_H__ */
diff --git a/fs/gfs2/ops_file.c b/fs/gfs2/ops_file.c
index dec5ce9..2d946df 100644
--- a/fs/gfs2/ops_file.c
+++ b/fs/gfs2/ops_file.c
@@ -33,7 +33,6 @@
 #include "lm.h"
 #include "log.h"
 #include "meta_io.h"
-#include "ops_file.h"
 #include "ops_vm.h"
 #include "quota.h"
 #include "rgrp.h"
@@ -41,50 +40,6 @@
 #include "util.h"
 #include "eaops.h"
 
-/*
- * Most fields left uninitialised to catch anybody who tries to
- * use them. f_flags set to prevent file_accessed() from touching
- * any other part of this. Its use is purely as a flag so that we
- * know (in readpage()) whether or not do to locking.
- */
-struct file gfs2_internal_file_sentinel = {
-	.f_flags = O_NOATIME|O_RDONLY,
-};
-
-static int gfs2_read_actor(read_descriptor_t *desc, struct page *page,
-			   unsigned long offset, unsigned long size)
-{
-	char *kaddr;
-	unsigned long count = desc->count;
-
-	if (size > count)
-		size = count;
-
-	kaddr = kmap(page);
-	memcpy(desc->arg.data, kaddr + offset, size);
-	kunmap(page);
-
-	desc->count = count - size;
-	desc->written += size;
-	desc->arg.buf += size;
-	return size;
-}
-
-int gfs2_internal_read(struct gfs2_inode *ip, struct file_ra_state *ra_state,
-		       char *buf, loff_t *pos, unsigned size)
-{
-	struct inode *inode = &ip->i_inode;
-	read_descriptor_t desc;
-	desc.written = 0;
-	desc.arg.data = buf;
-	desc.count = size;
-	desc.error = 0;
-	do_generic_mapping_read(inode->i_mapping, ra_state,
-				&gfs2_internal_file_sentinel, pos, &desc,
-				gfs2_read_actor);
-	return desc.written ? desc.written : desc.error;
-}
-
 /**
  * gfs2_llseek - seek to a location in a file
  * @file: the file
diff --git a/fs/gfs2/ops_file.h b/fs/gfs2/ops_file.h
deleted file mode 100644
index 7e5d8ec..0000000
--- a/fs/gfs2/ops_file.h
+++ /dev/null
@@ -1,24 +0,0 @@
-/*
- * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
- * Copyright (C) 2004-2006 Red Hat, Inc.  All rights reserved.
- *
- * This copyrighted material is made available to anyone wishing to use,
- * modify, copy, or redistribute it subject to the terms and conditions
- * of the GNU General Public License version 2.
- */
-
-#ifndef __OPS_FILE_DOT_H__
-#define __OPS_FILE_DOT_H__
-
-#include <linux/fs.h>
-struct gfs2_inode;
-
-extern struct file gfs2_internal_file_sentinel;
-extern int gfs2_internal_read(struct gfs2_inode *ip,
-			      struct file_ra_state *ra_state,
-			      char *buf, loff_t *pos, unsigned size);
-extern void gfs2_set_inode_flags(struct inode *inode);
-extern const struct file_operations gfs2_file_fops;
-extern const struct file_operations gfs2_dir_fops;
-
-#endif /* __OPS_FILE_DOT_H__ */
diff --git a/fs/gfs2/ops_inode.h b/fs/gfs2/ops_inode.h
index 34f0caa..edb519c 100644
--- a/fs/gfs2/ops_inode.h
+++ b/fs/gfs2/ops_inode.h
@@ -16,5 +16,9 @@ extern const struct inode_operations gfs2_file_iops;
 extern const struct inode_operations gfs2_dir_iops;
 extern const struct inode_operations gfs2_symlink_iops;
 extern const struct inode_operations gfs2_dev_iops;
+extern const struct file_operations gfs2_file_fops;
+extern const struct file_operations gfs2_dir_fops;
+
+extern void gfs2_set_inode_flags(struct inode *inode);
 
 #endif /* __OPS_INODE_DOT_H__ */
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index 6e546ee..b56e194 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -59,7 +59,6 @@
 #include "super.h"
 #include "trans.h"
 #include "inode.h"
-#include "ops_file.h"
 #include "ops_address.h"
 #include "util.h"
 
@@ -780,11 +779,9 @@ static int do_glock(struct gfs2_quota_data *qd, int force_refresh,
 	struct gfs2_holder i_gh;
 	struct gfs2_quota_host q;
 	char buf[sizeof(struct gfs2_quota)];
-	struct file_ra_state ra_state;
 	int error;
 	struct gfs2_quota_lvb *qlvb;
 
-	file_ra_state_init(&ra_state, sdp->sd_quota_inode->i_mapping);
 restart:
 	error = gfs2_glock_nq_init(qd->qd_gl, LM_ST_SHARED, 0, q_gh);
 	if (error)
@@ -807,8 +804,8 @@ restart:
 
 		memset(buf, 0, sizeof(struct gfs2_quota));
 		pos = qd2offset(qd);
-		error = gfs2_internal_read(ip, &ra_state, buf,
-					   &pos, sizeof(struct gfs2_quota));
+		error = gfs2_internal_read(ip, NULL, buf, &pos,
+					   sizeof(struct gfs2_quota));
 		if (error < 0)
 			goto fail_gunlock;
 
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index b93ac45..b37472d 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -25,10 +25,10 @@
 #include "rgrp.h"
 #include "super.h"
 #include "trans.h"
-#include "ops_file.h"
 #include "util.h"
 #include "log.h"
 #include "inode.h"
+#include "ops_address.h"
 
 #define BFITNOENT ((u32)~0)
 #define NO_BLOCK ((u64)~0)


-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [Samba]     [Device Mapper]     [CEPH Development]
  Powered by Linux