Again NACK... If you want to add support for new functionality to the NFS standard then do it through the IETF process, not the Linux kernel. Trond -- Trond Myklebust Linux NFS client maintainer NetApp Trond.Myklebust@xxxxxxxxxx www.netapp.com On Jun 23, 2012, at 6:47 AM, Namjae Jeon wrote: > From: Namjae Jeon <namjae.jeon@xxxxxxxxxxx> > > This patch disable readahead for a file on NFS Server when > posix_fadvise(fd..., POSIX_FADV_RANDOM) is called on NFS > Client. > > Readahead on the file can be enabled again with > posix_fadvise(fd..., POSIX_FADV_NORMAL) > > This patch is provides performance boost in a scenario like > traversing a directory with large number of files(e.g. ~5000 files), > showing thumbnail view for an image directory, file attributes of > various music files, preview dialog for video files etc. > > To simulate the scenario we performed a traverse test on a directory > containing 5000 files. When OPEN/READDIR/STAT/READ(4k) > is called on all 5000 files, total time is reduced by 19.6 %. > > Without this patch: > #> ./traverse_time huge_test_dir 4096 > Traversed Path : huge_test_dir/ read_size = 4096 > Total Time Taken : 22326 msec <-------- > IMPORTANT TIMINGS > OPEN DIR TIME (15915 usec) > READ DIR TIME (8092117 usec) > STAT TIME (118791 usec) > OPEN TIME (1361520 usec) > CLOSE TIME (27134 usec) > READ TIME(12647454 usec) > ADVISE TIME(9066 usec) > MEMSET TIME(7 usec) > READ DATA : 20480000 > READ SPEED: 1.54MB/sec > > With this patch: > #> ./traverse_time huge_test_dir 4096 > Traversed Path : huge_test_dir/ read_size = 4096 > Total Time Taken : 17949 msec <-------- > IMPORTANT TIMINGS > OPEN DIR TIME (15868 usec) > READ DIR TIME (7982147 usec) > STAT TIME (118780 usec) > OPEN TIME (1369376 usec) > CLOSE TIME (28216 usec) > READ TIME(8372115 usec) > ADVISE TIME(8923 usec) > MEMSET TIME(7 usec) > READ DATA : 20480000 > READ SPEED(only read): 2.33MB/sec > > Signed-off-by: Namjae Jeon <namjae.jeon@xxxxxxxxxxx> > Signed-off-by: Vivek Trivedi <t.vivek@xxxxxxxxxxx> > Signed-off-by: Amit Sahrawat <a.sahrawat@xxxxxxxxxxx> > --- > fs/nfs/file.c | 3 +++ > fs/nfs/nfs2xdr.c | 5 +++-- > fs/nfs/nfs3xdr.c | 5 +++-- > fs/nfs/nfs4xdr.c | 5 +++-- > fs/nfsd/nfs3proc.c | 2 +- > fs/nfsd/nfs3xdr.c | 1 + > fs/nfsd/nfs4xdr.c | 5 +++-- > fs/nfsd/nfsproc.c | 2 +- > fs/nfsd/nfsxdr.c | 1 + > fs/nfsd/vfs.c | 24 +++++++++++++++++++++--- > fs/nfsd/vfs.h | 6 ++++-- > fs/nfsd/xdr.h | 1 + > fs/nfsd/xdr3.h | 1 + > fs/nfsd/xdr4.h | 1 + > 14 files changed, 47 insertions(+), 15 deletions(-) > > diff --git a/fs/nfs/file.c b/fs/nfs/file.c > index a6708e6b..89fdc43 100644 > --- a/fs/nfs/file.c > +++ b/fs/nfs/file.c > @@ -192,6 +192,7 @@ nfs_file_read(struct kiocb *iocb, const struct iovec *iov, > struct dentry * dentry = iocb->ki_filp->f_path.dentry; > struct inode * inode = dentry->d_inode; > ssize_t result; > + struct nfs_open_context *ctx = nfs_file_open_context(iocb->ki_filp); > > if (iocb->ki_filp->f_flags & O_DIRECT) > return nfs_file_direct_read(iocb, iov, nr_segs, pos); > @@ -200,6 +201,8 @@ nfs_file_read(struct kiocb *iocb, const struct iovec *iov, > dentry->d_parent->d_name.name, dentry->d_name.name, > (unsigned long) iov_length(iov, nr_segs), (unsigned long) pos); > > + ctx->mode = iocb->ki_filp->f_mode; > + > result = nfs_revalidate_mapping(inode, iocb->ki_filp->f_mapping); > if (!result) { > result = generic_file_aio_read(iocb, iov, nr_segs, pos); > diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c > index d04f0df..76b3e48 100644 > --- a/fs/nfs/nfs2xdr.c > +++ b/fs/nfs/nfs2xdr.c > @@ -44,7 +44,7 @@ > #define NFS_removeargs_sz (NFS_fhandle_sz+NFS_filename_sz) > #define NFS_sattrargs_sz (NFS_fhandle_sz+NFS_sattr_sz) > #define NFS_readlinkargs_sz (NFS_fhandle_sz) > -#define NFS_readargs_sz (NFS_fhandle_sz+3) > +#define NFS_readargs_sz (NFS_fhandle_sz+4) > #define NFS_writeargs_sz (NFS_fhandle_sz+4) > #define NFS_createargs_sz (NFS_diropargs_sz+NFS_sattr_sz) > #define NFS_renameargs_sz (NFS_diropargs_sz+NFS_diropargs_sz) > @@ -612,10 +612,11 @@ static void encode_readargs(struct xdr_stream *xdr, > > encode_fhandle(xdr, args->fh); > > - p = xdr_reserve_space(xdr, 4 + 4 + 4); > + p = xdr_reserve_space(xdr, 4 + 4 + 4 + 4); > *p++ = cpu_to_be32(offset); > *p++ = cpu_to_be32(count); > *p = cpu_to_be32(count); > + *++p = cpu_to_be32(args->context->mode); > } > > static void nfs2_xdr_enc_readargs(struct rpc_rqst *req, > diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c > index 6cbe894..d0876cb 100644 > --- a/fs/nfs/nfs3xdr.c > +++ b/fs/nfs/nfs3xdr.c > @@ -49,7 +49,7 @@ > #define NFS3_lookupargs_sz (NFS3_fh_sz+NFS3_filename_sz) > #define NFS3_accessargs_sz (NFS3_fh_sz+1) > #define NFS3_readlinkargs_sz (NFS3_fh_sz) > -#define NFS3_readargs_sz (NFS3_fh_sz+3) > +#define NFS3_readargs_sz (NFS3_fh_sz+4) > #define NFS3_writeargs_sz (NFS3_fh_sz+5) > #define NFS3_createargs_sz (NFS3_diropargs_sz+NFS3_sattr_sz) > #define NFS3_mkdirargs_sz (NFS3_diropargs_sz+NFS3_sattr_sz) > @@ -951,9 +951,10 @@ static void encode_read3args(struct xdr_stream *xdr, > > encode_nfs_fh3(xdr, args->fh); > > - p = xdr_reserve_space(xdr, 8 + 4); > + p = xdr_reserve_space(xdr, 8 + 4 + 4); > p = xdr_encode_hyper(p, args->offset); > *p = cpu_to_be32(args->count); > + *++p = cpu_to_be32(args->context->mode); > } > > static void nfs3_xdr_enc_read3args(struct rpc_rqst *req, > diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c > index 610ebcc..fbdd8ce 100644 > --- a/fs/nfs/nfs4xdr.c > +++ b/fs/nfs/nfs4xdr.c > @@ -188,7 +188,7 @@ static int nfs4_stat_to_errno(int); > #define decode_setattr_maxsz (op_decode_hdr_maxsz + \ > nfs4_fattr_bitmap_maxsz) > #define encode_read_maxsz (op_encode_hdr_maxsz + \ > - encode_stateid_maxsz + 3) > + encode_stateid_maxsz + 4) > #define decode_read_maxsz (op_decode_hdr_maxsz + 2) > #define encode_readdir_maxsz (op_encode_hdr_maxsz + \ > 2 + encode_verifier_maxsz + 5) > @@ -1532,9 +1532,10 @@ static void encode_read(struct xdr_stream *xdr, const struct nfs_readargs *args, > encode_open_stateid(xdr, args->context, args->lock_context, > FMODE_READ, hdr->minorversion); > > - p = reserve_space(xdr, 12); > + p = reserve_space(xdr, 16); > p = xdr_encode_hyper(p, args->offset); > *p = cpu_to_be32(args->count); > + *++p = cpu_to_be32(args->context->mode); > } > > static void encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg *readdir, struct rpc_rqst *req, struct compound_hdr *hdr) > diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c > index 9095f3c..d599629 100644 > --- a/fs/nfsd/nfs3proc.c > +++ b/fs/nfsd/nfs3proc.c > @@ -171,7 +171,7 @@ nfsd3_proc_read(struct svc_rqst *rqstp, struct nfsd3_readargs *argp, > nfserr = nfsd_read(rqstp, &resp->fh, > argp->offset, > rqstp->rq_vec, argp->vlen, > - &resp->count); > + &resp->count, argp->f_mode); > if (nfserr == 0) { > struct inode *inode = resp->fh.fh_dentry->d_inode; > > diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c > index 43f46cd..d940bc9 100644 > --- a/fs/nfsd/nfs3xdr.c > +++ b/fs/nfsd/nfs3xdr.c > @@ -345,6 +345,7 @@ nfs3svc_decode_readargs(struct svc_rqst *rqstp, __be32 *p, > v++; > } > args->vlen = v; > + args->f_mode = ntohl(*p++); > return xdr_argsize_check(rqstp, p); > } > > diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c > index 4949667..34cb726 100644 > --- a/fs/nfsd/nfs4xdr.c > +++ b/fs/nfsd/nfs4xdr.c > @@ -875,9 +875,10 @@ nfsd4_decode_read(struct nfsd4_compoundargs *argp, struct nfsd4_read *read) > status = nfsd4_decode_stateid(argp, &read->rd_stateid); > if (status) > return status; > - READ_BUF(12); > + READ_BUF(16); > READ64(read->rd_offset); > READ32(read->rd_length); > + READ32(read->rd_f_mode); > > DECODE_TAIL; > } > @@ -2958,7 +2959,7 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, > > nfserr = nfsd_read_file(read->rd_rqstp, read->rd_fhp, read->rd_filp, > read->rd_offset, resp->rqstp->rq_vec, read->rd_vlen, > - &maxcount); > + &maxcount, read->rd_f_mode); > > if (nfserr) > return nfserr; > diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c > index e15dc45..8903975 100644 > --- a/fs/nfsd/nfsproc.c > +++ b/fs/nfsd/nfsproc.c > @@ -147,7 +147,7 @@ nfsd_proc_read(struct svc_rqst *rqstp, struct nfsd_readargs *argp, > nfserr = nfsd_read(rqstp, fh_copy(&resp->fh, &argp->fh), > argp->offset, > rqstp->rq_vec, argp->vlen, > - &resp->count); > + &resp->count, argp->f_mode); > > if (nfserr) return nfserr; > return nfserrno(vfs_getattr(resp->fh.fh_export->ex_path.mnt, > diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c > index 65ec595..3d76274 100644 > --- a/fs/nfsd/nfsxdr.c > +++ b/fs/nfsd/nfsxdr.c > @@ -269,6 +269,7 @@ nfssvc_decode_readargs(struct svc_rqst *rqstp, __be32 *p, > v++; > } > args->vlen = v; > + args->f_mode = ntohl(*p++); > return xdr_argsize_check(rqstp, p); > } > > diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c > index c8bd9c3..8fc82d7 100644 > --- a/fs/nfsd/vfs.c > +++ b/fs/nfsd/vfs.c > @@ -1067,7 +1067,8 @@ out_nfserr: > * N.B. After this call fhp needs an fh_put > */ > __be32 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp, > - loff_t offset, struct kvec *vec, int vlen, unsigned long *count) > + loff_t offset, struct kvec *vec, int vlen, unsigned long *count, > + unsigned long f_mode) > { > struct file *file; > struct inode *inode; > @@ -1078,6 +1079,12 @@ __be32 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp, > if (err) > return err; > > + if (f_mode & FMODE_RANDOM) { > + spin_lock(&file->f_lock); > + file->f_mode |= FMODE_RANDOM; > + spin_unlock(&file->f_lock); > + } > + > inode = file->f_path.dentry->d_inode; > > /* Get readahead parameters */ > @@ -1106,7 +1113,7 @@ __be32 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp, > __be32 > nfsd_read_file(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, > loff_t offset, struct kvec *vec, int vlen, > - unsigned long *count) > + unsigned long *count, unsigned long f_mode) > { > __be32 err; > > @@ -1115,9 +1122,20 @@ nfsd_read_file(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, > NFSD_MAY_READ|NFSD_MAY_OWNER_OVERRIDE); > if (err) > goto out; > + > + if (f_mode & FMODE_RANDOM) { > + spin_lock(&file->f_lock); > + file->f_mode |= FMODE_RANDOM; > + spin_unlock(&file->f_lock); > + } else { > + spin_lock(&file->f_lock); > + file->f_mode &= ~FMODE_RANDOM; > + spin_unlock(&file->f_lock); > + } > + > err = nfsd_vfs_read(rqstp, fhp, file, offset, vec, vlen, count); > } else /* Note file may still be NULL in NFSv4 special stateid case: */ > - err = nfsd_read(rqstp, fhp, offset, vec, vlen, count); > + err = nfsd_read(rqstp, fhp, offset, vec, vlen, count, f_mode); > out: > return err; > } > diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h > index ec0611b..c181cdb 100644 > --- a/fs/nfsd/vfs.h > +++ b/fs/nfsd/vfs.h > @@ -72,9 +72,11 @@ __be32 nfsd_open(struct svc_rqst *, struct svc_fh *, umode_t, > int, struct file **); > void nfsd_close(struct file *); > __be32 nfsd_read(struct svc_rqst *, struct svc_fh *, > - loff_t, struct kvec *, int, unsigned long *); > + loff_t, struct kvec *, int, unsigned long *, > + unsigned long); > __be32 nfsd_read_file(struct svc_rqst *, struct svc_fh *, struct file *, > - loff_t, struct kvec *, int, unsigned long *); > + loff_t, struct kvec *, int, unsigned long *, > + unsigned long); > __be32 nfsd_write(struct svc_rqst *, struct svc_fh *,struct file *, > loff_t, struct kvec *,int, unsigned long *, int *); > __be32 nfsd_readlink(struct svc_rqst *, struct svc_fh *, > diff --git a/fs/nfsd/xdr.h b/fs/nfsd/xdr.h > index 53b1863..520d5e6 100644 > --- a/fs/nfsd/xdr.h > +++ b/fs/nfsd/xdr.h > @@ -27,6 +27,7 @@ struct nfsd_readargs { > __u32 offset; > __u32 count; > int vlen; > + __u32 f_mode; > }; > > struct nfsd_writeargs { > diff --git a/fs/nfsd/xdr3.h b/fs/nfsd/xdr3.h > index 7df980e..15fdff4 100644 > --- a/fs/nfsd/xdr3.h > +++ b/fs/nfsd/xdr3.h > @@ -32,6 +32,7 @@ struct nfsd3_readargs { > __u64 offset; > __u32 count; > int vlen; > + __u32 f_mode; > }; > > struct nfsd3_writeargs { > diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h > index acd127d..49d6ce3 100644 > --- a/fs/nfsd/xdr4.h > +++ b/fs/nfsd/xdr4.h > @@ -270,6 +270,7 @@ struct nfsd4_read { > u32 rd_length; /* request */ > int rd_vlen; > struct file *rd_filp; > + u32 rd_f_mode; /* request */ > > struct svc_rqst *rd_rqstp; /* response */ > struct svc_fh * rd_fhp; /* response */ > -- > 1.7.9.5 > -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html