Re: Thoughts about cache consistency and directories in particular.

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Tue, Apr 21, 2009 at 05:43:49PM -0400, bfields wrote:
> There's also the pre-/post- op attributes to take care of.  I'm thinking
> of applying something like the following.

Having heard no objections I'll go ahead and apply this, though I hope
we'll come up with a better solution for turning on the ext4 i_version
support in the future.  Also:

> Actually, I did this and then realized: I'm using IS_I_VERSION(inode) to
> turn on use of the i_version as the change attribute on both files and
> directories.  But it actually only makes a difference for files.
> 
> So I guess I should just be using i_version as the change attribute
> unconditionally for directories?  (Will that work on any filesystem?)

I'm still curious about this.

--b.

> commit 6bc1eaa4eb1bb8af1878be354962e6cedcaece60
> Author: J. Bruce Fields <bfields@xxxxxxxxxxxxxx>
> Date:   Thu Apr 16 17:33:25 2009 -0400
> 
>     nfsd: support ext4 i_version
>     
>     ext4 supports a real NFSv4 change attribute, which is bumped whenever
>     the ctime would be updated, including times when two updates arrive
>     within a jiffy of each other.  (Note that although ext4 has space for
>     nanosecond-precision ctime, the real resolution is lower: it actually
>     uses jiffies as the time-source.)  This ensures clients will invalidate
>     their caches when they need to.
>     
>     There is some fear that keeping the i_version up-to-date could have
>     performance drawbacks, so for now it's turned on only by a mount option.
>     We hope to do something better eventually.
>     
>     Signed-off-by: J. Bruce Fields <bfields@xxxxxxxxxxxxxx>
>     Cc: Theodore Tso <tytso@xxxxxxx>
> 
> diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
> index 17d0dd9..01d4ec1 100644
> --- a/fs/nfsd/nfs3xdr.c
> +++ b/fs/nfsd/nfs3xdr.c
> @@ -272,6 +272,7 @@ void fill_post_wcc(struct svc_fh *fhp)
>  
>  	err = vfs_getattr(fhp->fh_export->ex_path.mnt, fhp->fh_dentry,
>  			&fhp->fh_post_attr);
> +	fhp->fh_post_change = fhp->fh_dentry->d_inode->i_version;
>  	if (err)
>  		fhp->fh_post_saved = 0;
>  	else
> diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
> index 4a71fcd..12d36a7 100644
> --- a/fs/nfsd/nfs4xdr.c
> +++ b/fs/nfsd/nfs4xdr.c
> @@ -1490,13 +1490,41 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
>  	memcpy(p, ptr, nbytes);					\
>  	p += XDR_QUADLEN(nbytes);				\
>  }} while (0)
> -#define WRITECINFO(c)		do {				\
> -	*p++ = htonl(c.atomic);					\
> -	*p++ = htonl(c.before_ctime_sec);				\
> -	*p++ = htonl(c.before_ctime_nsec);				\
> -	*p++ = htonl(c.after_ctime_sec);				\
> -	*p++ = htonl(c.after_ctime_nsec);				\
> -} while (0)
> +
> +static void write32(__be32 **p, u32 n)
> +{
> +	*(*p)++ = n;
> +}
> +
> +static void write64(__be32 **p, u64 n)
> +{
> +	write32(p, (u32)(n >> 32));
> +	write32(p, (u32)n);
> +}
> +
> +static void write_change(__be32 **p, struct kstat *stat, struct inode *inode)
> +{
> +	if (IS_I_VERSION(inode)) {
> +		write64(p, inode->i_version);
> +	} else {
> +		write32(p, stat->ctime.tv_sec);
> +		write32(p, stat->ctime.tv_nsec);
> +	}
> +}
> +
> +static void write_cinfo(__be32 **p, struct nfsd4_change_info *c)
> +{
> +	write32(p, c->atomic);
> +	if (c->change_supported) {
> +		write64(p, c->before_change);
> +		write64(p, c->after_change);
> +	} else {
> +		write32(p, c->before_ctime_sec);
> +		write32(p, c->before_ctime_nsec);
> +		write32(p, c->after_ctime_sec);
> +		write32(p, c->after_ctime_nsec);
> +	}
> +}
>  
>  #define RESERVE_SPACE(nbytes)	do {				\
>  	p = resp->p;						\
> @@ -1849,16 +1877,9 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
>  			WRITE32(NFS4_FH_PERSISTENT|NFS4_FH_VOL_RENAME);
>  	}
>  	if (bmval0 & FATTR4_WORD0_CHANGE) {
> -		/*
> -		 * Note: This _must_ be consistent with the scheme for writing
> -		 * change_info, so any changes made here must be reflected there
> -		 * as well.  (See xdr4.h:set_change_info() and the WRITECINFO()
> -		 * macro above.)
> -		 */
>  		if ((buflen -= 8) < 0)
>  			goto out_resource;
> -		WRITE32(stat.ctime.tv_sec);
> -		WRITE32(stat.ctime.tv_nsec);
> +		write_change(&p, &stat, dentry->d_inode);
>  	}
>  	if (bmval0 & FATTR4_WORD0_SIZE) {
>  		if ((buflen -= 8) < 0)
> @@ -2364,7 +2385,7 @@ nfsd4_encode_create(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_
>  
>  	if (!nfserr) {
>  		RESERVE_SPACE(32);
> -		WRITECINFO(create->cr_cinfo);
> +		write_cinfo(&p, &create->cr_cinfo);
>  		WRITE32(2);
>  		WRITE32(create->cr_bmval[0]);
>  		WRITE32(create->cr_bmval[1]);
> @@ -2475,7 +2496,7 @@ nfsd4_encode_link(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_li
>  
>  	if (!nfserr) {
>  		RESERVE_SPACE(20);
> -		WRITECINFO(link->li_cinfo);
> +		write_cinfo(&p, &link->li_cinfo);
>  		ADJUST_ARGS();
>  	}
>  	return nfserr;
> @@ -2493,7 +2514,7 @@ nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_op
>  
>  	nfsd4_encode_stateid(resp, &open->op_stateid);
>  	RESERVE_SPACE(40);
> -	WRITECINFO(open->op_cinfo);
> +	write_cinfo(&p, &open->op_cinfo);
>  	WRITE32(open->op_rflags);
>  	WRITE32(2);
>  	WRITE32(open->op_bmval[0]);
> @@ -2771,7 +2792,7 @@ nfsd4_encode_remove(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_
>  
>  	if (!nfserr) {
>  		RESERVE_SPACE(20);
> -		WRITECINFO(remove->rm_cinfo);
> +		write_cinfo(&p, &remove->rm_cinfo);
>  		ADJUST_ARGS();
>  	}
>  	return nfserr;
> @@ -2784,8 +2805,8 @@ nfsd4_encode_rename(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_
>  
>  	if (!nfserr) {
>  		RESERVE_SPACE(40);
> -		WRITECINFO(rename->rn_sinfo);
> -		WRITECINFO(rename->rn_tinfo);
> +		write_cinfo(&p, &rename->rn_sinfo);
> +		write_cinfo(&p, &rename->rn_tinfo);
>  		ADJUST_ARGS();
>  	}
>  	return nfserr;
> diff --git a/include/linux/nfsd/nfsfh.h b/include/linux/nfsd/nfsfh.h
> index afa1901..8f641c9 100644
> --- a/include/linux/nfsd/nfsfh.h
> +++ b/include/linux/nfsd/nfsfh.h
> @@ -151,9 +151,15 @@ typedef struct svc_fh {
>  	__u64			fh_pre_size;	/* size before operation */
>  	struct timespec		fh_pre_mtime;	/* mtime before oper */
>  	struct timespec		fh_pre_ctime;	/* ctime before oper */
> +	/*
> +	 * pre-op nfsv4 change attr: note must check IS_I_VERSION(inode)
> +	 *  to find out if it is valid.
> +	 */
> +	u64			fh_pre_change;
>  
>  	/* Post-op attributes saved in fh_unlock */
>  	struct kstat		fh_post_attr;	/* full attrs after operation */
> +	u64			fh_post_change; /* nfsv4 change; see above */
>  #endif /* CONFIG_NFSD_V3 */
>  
>  } svc_fh;
> @@ -298,6 +304,7 @@ fill_pre_wcc(struct svc_fh *fhp)
>  		fhp->fh_pre_mtime = inode->i_mtime;
>  		fhp->fh_pre_ctime = inode->i_ctime;
>  		fhp->fh_pre_size  = inode->i_size;
> +		fhp->fh_pre_change = inode->i_version;
>  		fhp->fh_pre_saved = 1;
>  	}
>  }
> diff --git a/include/linux/nfsd/xdr4.h b/include/linux/nfsd/xdr4.h
> index f80d601..d0f050f 100644
> --- a/include/linux/nfsd/xdr4.h
> +++ b/include/linux/nfsd/xdr4.h
> @@ -64,10 +64,13 @@ static inline bool nfsd4_has_session(struct nfsd4_compound_state *cs)
>  
>  struct nfsd4_change_info {
>  	u32		atomic;
> +	bool		change_supported;
>  	u32		before_ctime_sec;
>  	u32		before_ctime_nsec;
> +	u64		before_change;
>  	u32		after_ctime_sec;
>  	u32		after_ctime_nsec;
> +	u64		after_change;
>  };
>  
>  struct nfsd4_access {
> @@ -503,10 +506,16 @@ set_change_info(struct nfsd4_change_info *cinfo, struct svc_fh *fhp)
>  {
>  	BUG_ON(!fhp->fh_pre_saved || !fhp->fh_post_saved);
>  	cinfo->atomic = 1;
> -	cinfo->before_ctime_sec = fhp->fh_pre_ctime.tv_sec;
> -	cinfo->before_ctime_nsec = fhp->fh_pre_ctime.tv_nsec;
> -	cinfo->after_ctime_sec = fhp->fh_post_attr.ctime.tv_sec;
> -	cinfo->after_ctime_nsec = fhp->fh_post_attr.ctime.tv_nsec;
> +	cinfo->change_supported = IS_I_VERSION(fhp->fh_dentry->d_inode);
> +	if (cinfo->change_supported) {
> +		cinfo->before_change = fhp->fh_pre_change;
> +		cinfo->after_change = fhp->fh_post_change;
> +	} else {
> +		cinfo->before_ctime_sec = fhp->fh_pre_ctime.tv_sec;
> +		cinfo->before_ctime_nsec = fhp->fh_pre_ctime.tv_nsec;
> +		cinfo->after_ctime_sec = fhp->fh_post_attr.ctime.tv_sec;
> +		cinfo->after_ctime_nsec = fhp->fh_post_attr.ctime.tv_nsec;
> +	}
>  }
>  
>  int nfs4svc_encode_voidres(struct svc_rqst *, __be32 *, void *);
--
To unsubscribe from this list: send the line "unsubscribe linux-nfs" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Index of Archives]     [Linux Filesystem Development]     [Linux USB Development]     [Linux Media Development]     [Video for Linux]     [Linux NILFS]     [Linux Audio Users]     [Yosemite Info]     [Linux SCSI]

  Powered by Linux