Re: do_generic_mapping_read abuse in gfs2

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hi,

sorry for the late reply, I didn't get to it ASAP when you wrote
and then went on vacation.

On Sun, Jul 29, 2007 at 02:42:19PM +0100, Steven Whitehouse wrote:
> Something like the following perhaps? I wrote this last night & have
> given it a very quick test today and it seems to work. It does mostly
> whats required, but its missing readahead. Thats no problem when this is
> called to deal with quota data, but it is an issue when the rindex is
> involved. We read the rindex on mount and potentially again in the case
> of file system expansion, it consists of a number of small structures
> are you point out which are always read sequentially and the total
> number of which is proportional to the filesystem size (potentially
> quite large).
> 
> So although I've left it out for now, I'd like to be able to slot in
> some kind of readahead at some stage,

Yes, your patch looks like what I had in mind.  I didn't notice any
calls was for more than a page so I didn't think of the readahead issue.

You should get your readahead by using read_cache_pages() although
it's not optimally efficient as it's not using ->readpages.

> 
> Steve.
> 
> diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
> index 34f7bcd..d2fc44f 100644
> --- a/fs/gfs2/inode.c
> +++ b/fs/gfs2/inode.c
> @@ -31,7 +31,6 @@
>  #include "log.h"
>  #include "meta_io.h"
>  #include "ops_address.h"
> -#include "ops_file.h"
>  #include "ops_inode.h"
>  #include "quota.h"
>  #include "rgrp.h"
> diff --git a/fs/gfs2/ops_address.c b/fs/gfs2/ops_address.c
> index 42a5f58..824e094 100644
> --- a/fs/gfs2/ops_address.c
> +++ b/fs/gfs2/ops_address.c
> @@ -19,6 +19,7 @@
>  #include <linux/writeback.h>
>  #include <linux/gfs2_ondisk.h>
>  #include <linux/lm_interface.h>
> +#include <linux/swap.h>
>  
>  #include "gfs2.h"
>  #include "incore.h"
> @@ -31,7 +32,6 @@
>  #include "quota.h"
>  #include "trans.h"
>  #include "rgrp.h"
> -#include "ops_file.h"
>  #include "super.h"
>  #include "util.h"
>  #include "glops.h"
> @@ -230,62 +230,111 @@ static int stuffed_readpage(struct gfs2_inode *ip, struct page *page)
>  
>  
>  /**
> - * gfs2_readpage - readpage with locking
> - * @file: The file to read a page for. N.B. This may be NULL if we are
> - * reading an internal file.
> + * __gfs2_readpage - readpage
> + * @file: The file to read a page for
>   * @page: The page to read
>   *
> - * Returns: errno
> + * This is the core of gfs2's readpage. Its used by the internal file
> + * reading code as in that case we already hold the glock. Also its
> + * called by gfs2_readpage() once the required lock has been granted.
> + *
>   */
>  
> -static int gfs2_readpage(struct file *file, struct page *page)
> +static int __gfs2_readpage(void *file, struct page *page)
>  {
>  	struct gfs2_inode *ip = GFS2_I(page->mapping->host);
>  	struct gfs2_sbd *sdp = GFS2_SB(page->mapping->host);
> -	struct gfs2_file *gf = NULL;
> -	struct gfs2_holder gh;
>  	int error;
> -	int do_unlock = 0;
>  
> -	if (likely(file != &gfs2_internal_file_sentinel)) {
> -		if (file) {
> -			gf = file->private_data;
> -			if (test_bit(GFF_EXLOCK, &gf->f_flags))
> -				/* gfs2_sharewrite_fault has grabbed the ip->i_gl already */
> -				goto skip_lock;
> -		}
> -		gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME|LM_FLAG_TRY_1CB, &gh);
> -		do_unlock = 1;
> -		error = gfs2_glock_nq_atime(&gh);
> -		if (unlikely(error))
> -			goto out_unlock;
> -	}
> -
> -skip_lock:
>  	if (gfs2_is_stuffed(ip)) {
>  		error = stuffed_readpage(ip, page);
>  		unlock_page(page);
> -	} else
> +	} else {
>  		error = mpage_readpage(page, gfs2_get_block);
> +	}
>  
>  	if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
> -		error = -EIO;
> +		return -EIO;
>  
> -	if (do_unlock) {
> -		gfs2_glock_dq_m(1, &gh);
> -		gfs2_holder_uninit(&gh);
> -	}
> -out:
>  	return error;
> -out_unlock:
> -	unlock_page(page);
> +}
> +
> +/**
> + * gfs2_readpage - read a page of a file
> + * @file: The file to read
> + * @page: The page of the file
> + *
> + * This deals with the locking required. If the GFF_EXLOCK flags is set
> + * then we already hold the glock (due to page fault) and thus we call
> + * __gfs2_readpage() directly. Otherwise we use a trylock in order to
> + * avoid the page lock / glock ordering problems returning AOP_TRUNCATED_PAGE
> + * in the event that we are unable to get the lock.
> + */
> +
> +static int gfs2_readpage(struct file *file, struct page *page)
> +{
> +	struct gfs2_inode *ip = GFS2_I(page->mapping->host);
> +	struct gfs2_file *gf = file ? file->private_data : NULL;
> +	struct gfs2_holder gh;
> +	int error;
> +
> +	if (test_bit(GFF_EXLOCK, &gf->f_flags))
> +		return __gfs2_readpage(file, page);
> +
> +	gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME|LM_FLAG_TRY_1CB, &gh);
> +	error = gfs2_glock_nq_atime(&gh);
> +	if (unlikely(error))
> +		goto out;
> +	error = __gfs2_readpage(file, page);
> +	gfs2_glock_dq(&gh);
> +out:
> +	gfs2_holder_uninit(&gh);
>  	if (error == GLR_TRYFAILED) {
> -		error = AOP_TRUNCATED_PAGE;
>  		yield();
> +		return AOP_TRUNCATED_PAGE;
>  	}
> -	if (do_unlock)
> -		gfs2_holder_uninit(&gh);
> -	goto out;
> +	return error;
> +}
> +
> +/**
> + * gfs2_internal_read - read an internal file
> + * @ip: The gfs2 inode
> + * @ra_state: The readahead state (or NULL for no readahead)
> + * @buf: The buffer to fill
> + * @pos: The file position
> + * @size: The amount to read
> + *
> + */
> +
> +int gfs2_internal_read(struct gfs2_inode *ip, struct file_ra_state *ra_state,
> +                       char *buf, loff_t *pos, unsigned size)
> +{
> +	struct address_space *mapping = ip->i_inode.i_mapping;
> +	unsigned long index = *pos / PAGE_CACHE_SIZE;
> +	unsigned offset = *pos & (PAGE_CACHE_SIZE - 1);
> +	unsigned copied = 0;
> +	unsigned amt;
> +	struct page *page;
> +	void *p;
> +
> +	do {
> +		amt = size - copied;
> +		if (offset + size > PAGE_CACHE_SIZE)
> +			amt = PAGE_CACHE_SIZE - offset;
> +		page = read_cache_page(mapping, index, __gfs2_readpage, NULL);
> +		if (IS_ERR(page))
> +			return PTR_ERR(page);
> +		p = kmap_atomic(page, KM_USER0);
> +		memcpy(buf + copied, p + offset, amt);
> +		kunmap_atomic(p, KM_USER0);
> +		mark_page_accessed(page);
> +		page_cache_release(page);
> +		copied += amt;
> +		index++;
> +		offset = 0;
> +	} while(copied < size);
> +	(*pos) += size;
> +	return size;
>  }
>  
>  /**
> @@ -313,21 +362,19 @@ static int gfs2_readpages(struct file *file, struct address_space *mapping,
>  	int ret = 0;
>  	int do_unlock = 0;
>  
> -	if (likely(file != &gfs2_internal_file_sentinel)) {
> -		if (file) {
> -			struct gfs2_file *gf = file->private_data;
> -			if (test_bit(GFF_EXLOCK, &gf->f_flags))
> -				goto skip_lock;
> -		}
> -		gfs2_holder_init(ip->i_gl, LM_ST_SHARED,
> -				 LM_FLAG_TRY_1CB|GL_ATIME, &gh);
> -		do_unlock = 1;
> -		ret = gfs2_glock_nq_atime(&gh);
> -		if (ret == GLR_TRYFAILED)
> -			goto out_noerror;
> -		if (unlikely(ret))
> -			goto out_unlock;
> +	if (file) {
> +		struct gfs2_file *gf = file->private_data;
> +		if (test_bit(GFF_EXLOCK, &gf->f_flags))
> +			goto skip_lock;
>  	}
> +	gfs2_holder_init(ip->i_gl, LM_ST_SHARED,
> +			 LM_FLAG_TRY_1CB|GL_ATIME, &gh);
> +	do_unlock = 1;
> +	ret = gfs2_glock_nq_atime(&gh);
> +	if (ret == GLR_TRYFAILED)
> +		goto out_noerror;
> +	if (unlikely(ret))
> +		goto out_unlock;
>  skip_lock:
>  	if (!gfs2_is_stuffed(ip))
>  		ret = mpage_readpages(mapping, pages, nr_pages, gfs2_get_block);
> diff --git a/fs/gfs2/ops_address.h b/fs/gfs2/ops_address.h
> index fa1b5b3..e8fe83f 100644
> --- a/fs/gfs2/ops_address.h
> +++ b/fs/gfs2/ops_address.h
> @@ -18,5 +18,8 @@ extern const struct address_space_operations gfs2_file_aops;
>  extern int gfs2_get_block(struct inode *inode, sector_t lblock,
>  			  struct buffer_head *bh_result, int create);
>  extern int gfs2_releasepage(struct page *page, gfp_t gfp_mask);
> +extern int gfs2_internal_read(struct gfs2_inode *ip,
> +			      struct file_ra_state *ra_state,
> +			      char *buf, loff_t *pos, unsigned size);
>  
>  #endif /* __OPS_ADDRESS_DOT_H__ */
> diff --git a/fs/gfs2/ops_file.c b/fs/gfs2/ops_file.c
> index dec5ce9..2d946df 100644
> --- a/fs/gfs2/ops_file.c
> +++ b/fs/gfs2/ops_file.c
> @@ -33,7 +33,6 @@
>  #include "lm.h"
>  #include "log.h"
>  #include "meta_io.h"
> -#include "ops_file.h"
>  #include "ops_vm.h"
>  #include "quota.h"
>  #include "rgrp.h"
> @@ -41,50 +40,6 @@
>  #include "util.h"
>  #include "eaops.h"
>  
> -/*
> - * Most fields left uninitialised to catch anybody who tries to
> - * use them. f_flags set to prevent file_accessed() from touching
> - * any other part of this. Its use is purely as a flag so that we
> - * know (in readpage()) whether or not do to locking.
> - */
> -struct file gfs2_internal_file_sentinel = {
> -	.f_flags = O_NOATIME|O_RDONLY,
> -};
> -
> -static int gfs2_read_actor(read_descriptor_t *desc, struct page *page,
> -			   unsigned long offset, unsigned long size)
> -{
> -	char *kaddr;
> -	unsigned long count = desc->count;
> -
> -	if (size > count)
> -		size = count;
> -
> -	kaddr = kmap(page);
> -	memcpy(desc->arg.data, kaddr + offset, size);
> -	kunmap(page);
> -
> -	desc->count = count - size;
> -	desc->written += size;
> -	desc->arg.buf += size;
> -	return size;
> -}
> -
> -int gfs2_internal_read(struct gfs2_inode *ip, struct file_ra_state *ra_state,
> -		       char *buf, loff_t *pos, unsigned size)
> -{
> -	struct inode *inode = &ip->i_inode;
> -	read_descriptor_t desc;
> -	desc.written = 0;
> -	desc.arg.data = buf;
> -	desc.count = size;
> -	desc.error = 0;
> -	do_generic_mapping_read(inode->i_mapping, ra_state,
> -				&gfs2_internal_file_sentinel, pos, &desc,
> -				gfs2_read_actor);
> -	return desc.written ? desc.written : desc.error;
> -}
> -
>  /**
>   * gfs2_llseek - seek to a location in a file
>   * @file: the file
> diff --git a/fs/gfs2/ops_file.h b/fs/gfs2/ops_file.h
> deleted file mode 100644
> index 7e5d8ec..0000000
> --- a/fs/gfs2/ops_file.h
> +++ /dev/null
> @@ -1,24 +0,0 @@
> -/*
> - * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
> - * Copyright (C) 2004-2006 Red Hat, Inc.  All rights reserved.
> - *
> - * This copyrighted material is made available to anyone wishing to use,
> - * modify, copy, or redistribute it subject to the terms and conditions
> - * of the GNU General Public License version 2.
> - */
> -
> -#ifndef __OPS_FILE_DOT_H__
> -#define __OPS_FILE_DOT_H__
> -
> -#include <linux/fs.h>
> -struct gfs2_inode;
> -
> -extern struct file gfs2_internal_file_sentinel;
> -extern int gfs2_internal_read(struct gfs2_inode *ip,
> -			      struct file_ra_state *ra_state,
> -			      char *buf, loff_t *pos, unsigned size);
> -extern void gfs2_set_inode_flags(struct inode *inode);
> -extern const struct file_operations gfs2_file_fops;
> -extern const struct file_operations gfs2_dir_fops;
> -
> -#endif /* __OPS_FILE_DOT_H__ */
> diff --git a/fs/gfs2/ops_inode.h b/fs/gfs2/ops_inode.h
> index 34f0caa..edb519c 100644
> --- a/fs/gfs2/ops_inode.h
> +++ b/fs/gfs2/ops_inode.h
> @@ -16,5 +16,9 @@ extern const struct inode_operations gfs2_file_iops;
>  extern const struct inode_operations gfs2_dir_iops;
>  extern const struct inode_operations gfs2_symlink_iops;
>  extern const struct inode_operations gfs2_dev_iops;
> +extern const struct file_operations gfs2_file_fops;
> +extern const struct file_operations gfs2_dir_fops;
> +
> +extern void gfs2_set_inode_flags(struct inode *inode);
>  
>  #endif /* __OPS_INODE_DOT_H__ */
> diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
> index 6e546ee..b56e194 100644
> --- a/fs/gfs2/quota.c
> +++ b/fs/gfs2/quota.c
> @@ -59,7 +59,6 @@
>  #include "super.h"
>  #include "trans.h"
>  #include "inode.h"
> -#include "ops_file.h"
>  #include "ops_address.h"
>  #include "util.h"
>  
> @@ -780,11 +779,9 @@ static int do_glock(struct gfs2_quota_data *qd, int force_refresh,
>  	struct gfs2_holder i_gh;
>  	struct gfs2_quota_host q;
>  	char buf[sizeof(struct gfs2_quota)];
> -	struct file_ra_state ra_state;
>  	int error;
>  	struct gfs2_quota_lvb *qlvb;
>  
> -	file_ra_state_init(&ra_state, sdp->sd_quota_inode->i_mapping);
>  restart:
>  	error = gfs2_glock_nq_init(qd->qd_gl, LM_ST_SHARED, 0, q_gh);
>  	if (error)
> @@ -807,8 +804,8 @@ restart:
>  
>  		memset(buf, 0, sizeof(struct gfs2_quota));
>  		pos = qd2offset(qd);
> -		error = gfs2_internal_read(ip, &ra_state, buf,
> -					   &pos, sizeof(struct gfs2_quota));
> +		error = gfs2_internal_read(ip, NULL, buf, &pos,
> +					   sizeof(struct gfs2_quota));
>  		if (error < 0)
>  			goto fail_gunlock;
>  
> diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
> index b93ac45..b37472d 100644
> --- a/fs/gfs2/rgrp.c
> +++ b/fs/gfs2/rgrp.c
> @@ -25,10 +25,10 @@
>  #include "rgrp.h"
>  #include "super.h"
>  #include "trans.h"
> -#include "ops_file.h"
>  #include "util.h"
>  #include "log.h"
>  #include "inode.h"
> +#include "ops_address.h"
>  
>  #define BFITNOENT ((u32)~0)
>  #define NO_BLOCK ((u64)~0)
> 
---end quoted text---
-
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [Samba]     [Device Mapper]     [CEPH Development]
  Powered by Linux