On Tue, 2016-05-24 at 22:09 -0700, Tom Haynes wrote: > Have a simple flex file server where the mds (NFSv4.1 or NFSv4.2) > is also the ds (NFSv3). I.e., the metadata and the data file are > the exact same file. > > This will allow testing of the flex file client. > > Simply add the "pnfs" export option to your export > in /etc/exports and mount from a client that supports > flex files. > > Signed-off-by: Tom Haynes <loghyr@xxxxxxxxxxxxxxx> > --- > fs/nfsd/Makefile | 1 + > fs/nfsd/flexfilelayout.c | 148 ++++++++++++++++++++++++++++++++++++++++++++ > fs/nfsd/flexfilelayoutxdr.c | 116 ++++++++++++++++++++++++++++++++++ > fs/nfsd/flexfilelayoutxdr.h | 50 +++++++++++++++ > fs/nfsd/nfs4layouts.c | 10 +++ > fs/nfsd/pnfs.h | 3 + > 6 files changed, 328 insertions(+) > create mode 100644 fs/nfsd/flexfilelayout.c > create mode 100644 fs/nfsd/flexfilelayoutxdr.c > create mode 100644 fs/nfsd/flexfilelayoutxdr.h > > diff --git a/fs/nfsd/Makefile b/fs/nfsd/Makefile > index 3ae5f3c..5f5d3a7 100644 > --- a/fs/nfsd/Makefile > +++ b/fs/nfsd/Makefile > @@ -20,3 +20,4 @@ nfsd-$(CONFIG_NFSD_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4idmap.o \ > nfsd-$(CONFIG_NFSD_PNFS) += nfs4layouts.o > nfsd-$(CONFIG_NFSD_BLOCKLAYOUT) += blocklayout.o blocklayoutxdr.o > nfsd-$(CONFIG_NFSD_SCSILAYOUT) += blocklayout.o blocklayoutxdr.o > +nfsd-$(CONFIG_NFSD_FLEXFILELAYOUT) += flexfilelayout.o flexfilelayoutxdr.o > diff --git a/fs/nfsd/flexfilelayout.c b/fs/nfsd/flexfilelayout.c > new file mode 100644 > index 0000000..d28b8a0 > --- /dev/null > +++ b/fs/nfsd/flexfilelayout.c > @@ -0,0 +1,148 @@ > +/* > + * Copyright (c) 2016 Tom Haynes <loghyr@xxxxxxxxxxxxxxx> > + * > + * The following implements a super-simple flex-file server > + * where the NFSv4.1 mds is also the ds. And the storage is > + * the same. I.e., writing to the mds via a NFSv4.1 WRITE > + * goes to the same location as the NFSv3 WRITE. > + */ > +#include > +#include > +#include > +#include > + > +#include > + > +#include > + > +#include "flexfilelayoutxdr.h" > +#include "pnfs.h" > + > +#define NFSDDBG_FACILITY NFSDDBG_PNFS > + > +static __be32 > +nfsd4_ff_proc_layoutget(struct inode *inode, const struct svc_fh *fhp, > + struct nfsd4_layoutget *args) > +{ > + struct nfsd4_layout_seg *seg = &args->lg_seg; > + u32 block_size = (1 << inode->i_blkbits); > + u32 device_generation = 0; > + int error; > + > + struct pnfs_ff_layout *fl; > + > + if (seg->offset & (block_size - 1)) { > + dprintk("pnfsd: I/O misaligned\n"); > + goto out_layoutunavailable; > + } > + > + /* > + * The super simple flex file server has 1 mirror, 1 data server, > + * and 1 file handle. So instead of 4 allocs, do 1 for now. > + * Zero it out for the stateid - don't want junk in there! > + */ > + error = -ENOMEM; > + fl = kzalloc(sizeof(*fl), GFP_KERNEL); > + if (!fl) > + goto out_error; > + args->lg_content = fl; > + > + /* > + * Avoid layout commit, try to force the I/O to the DS, > + * and for fun, cause all IOMODE_RW layout segments to > + * effectively be WRITE only. > + */ > + fl->flags = FF_FLAGS_NO_LAYOUTCOMMIT | FF_FLAGS_NO_IO_THRU_MDS | > + FF_FLAGS_NO_READ_IO; > + > + fl->uid = inode->i_uid; > + fl->gid = inode->i_gid; > + > + error = nfsd4_set_deviceid(&fl->deviceid, fhp, device_generation); > + if (error) > + goto out_error; > + > + fl->fh.size = fhp->fh_handle.fh_size; > + memcpy(fl->fh.data, &fhp->fh_handle.fh_base, fl->fh.size); > + > + /* Give whole file layout segments */ > + seg->offset = 0; > + seg->length = NFS4_MAX_UINT64; > + > + dprintk("GET: 0x%llx:0x%llx %d\n", seg->offset, seg->length, > + seg->iomode); > + return 0; > + > +out_error: > + kfree(fl); > + seg->length = 0; > + return nfserrno(error); > +out_layoutunavailable: > + seg->length = 0; > + return nfserr_layoutunavailable; > +} > + > +#ifdef CONFIG_NFSD_FLEXFILELAYOUT > +static __be32 > +nfsd4_ff_proc_getdeviceinfo(struct super_block *sb, > + struct svc_rqst *rqstp, > + struct nfs4_client *clp, > + struct nfsd4_getdeviceinfo *gdp) > +{ > + struct pnfs_ff_device_addr *da; > + > + u16 port; > + char addr[INET6_ADDRSTRLEN]; > + > + if (sb->s_bdev != sb->s_bdev->bd_contains) > + return nfserr_inval; > + > + da = kzalloc(sizeof(struct pnfs_ff_device_addr), GFP_KERNEL); > + if (!da) > + return nfserrno(-ENOMEM); > + > + gdp->gd_device = da; > + > + da->version = 3; > + da->minor_version = 0; > + > + /* FIXME: Get from export? */ > + da->rsize = 4096; > + da->wsize = 4096; > + > + rpc_ntop((struct sockaddr *)&rqstp->rq_daddr, > + addr, INET6_ADDRSTRLEN); > + if (rqstp->rq_daddr.ss_family == AF_INET) { > + struct sockaddr_in *sin; > + > + sin = (struct sockaddr_in *)&rqstp->rq_daddr; > + port = ntohs(sin->sin_port); > + snprintf(da->netaddr.netid, FF_NETID_LEN + 1, "tcp"); > + da->netaddr.netid_len = 3; > + } else { > + struct sockaddr_in6 *sin6; > + > + sin6 = (struct sockaddr_in6 *)&rqstp->rq_daddr; > + port = ntohs(sin6->sin6_port); > + snprintf(da->netaddr.netid, FF_NETID_LEN + 1, "tcp6"); > + da->netaddr.netid_len = 4; > + } > + > + da->netaddr.addr_len = > + snprintf(da->netaddr.addr, FF_ADDR_LEN + 1, > + "%s.%hhu.%hhu", addr, port >> 8, port & 0xff); > + > + da->tightly_coupled = false; > + > + return 0; > +} > + > +const struct nfsd4_layout_ops ff_layout_ops = { > + .notify_types = > + NOTIFY_DEVICEID4_DELETE | NOTIFY_DEVICEID4_CHANGE, > + .proc_getdeviceinfo = nfsd4_ff_proc_getdeviceinfo, > + .encode_getdeviceinfo = nfsd4_ff_encode_getdeviceinfo, > + .proc_layoutget = nfsd4_ff_proc_layoutget, > + .encode_layoutget = nfsd4_ff_encode_layoutget, > +}; > +#endif /* CONFIG_NFSD_FLEXFILELAYOUT */ > diff --git a/fs/nfsd/flexfilelayoutxdr.c b/fs/nfsd/flexfilelayoutxdr.c > new file mode 100644 > index 0000000..9d15ee0 > --- /dev/null > +++ b/fs/nfsd/flexfilelayoutxdr.c > @@ -0,0 +1,116 @@ > +/* > + * Copyright (c) 2016 Tom Haynes <loghyr@xxxxxxxxxxxxxxx> > + */ > +#include > +#include > +#include > + > +#include "nfsd.h" > +#include "flexfilelayoutxdr.h" > + > +#define NFSDDBG_FACILITY NFSDDBG_PNFS > + > +struct ff_idmap { > + char buf[11]; > + int len; > +}; > + > +__be32 > +nfsd4_ff_encode_layoutget(struct xdr_stream *xdr, > + struct nfsd4_layoutget *lgp) > +{ > + struct pnfs_ff_layout *fl = lgp->lg_content; > + int len, mirror_len, ds_len, fh_len; > + __be32 *p; > + > + /* > + * Unlike nfsd4_encode_user, we know these will > + * always be stringified. > + */ > + struct ff_idmap uid; > + struct ff_idmap gid; > + > + fh_len = 4 + fl->fh.size; > + > + uid.len = sprintf(uid.buf, "%u", from_kuid(&init_user_ns, fl->uid)); > + gid.len = sprintf(gid.buf, "%u", from_kgid(&init_user_ns, fl->gid)); > + > + /* 8 + len for recording the length, name, and padding */ > + ds_len = 20 + sizeof(stateid_opaque_t) + 4 + fh_len + > + 8 + uid.len + 8 + gid.len; > + > + mirror_len = 4 + ds_len; > + > + /* The layout segment */ > + len = 20 + mirror_len; > + > + p = xdr_reserve_space(xdr, sizeof(__be32) + len); > + if (!p) > + return nfserr_toosmall; > + > + *p++ = cpu_to_be32(len); > + p = xdr_encode_hyper(p, 1); /* stripe unit of 1 */ > + > + *p++ = cpu_to_be32(1); /* single mirror */ > + *p++ = cpu_to_be32(1); /* single data server */ > + > + p = xdr_encode_opaque_fixed(p, &fl->deviceid, > + sizeof(struct nfsd4_deviceid)); > + > + *p++ = cpu_to_be32(1); /* efficiency */ > + > + *p++ = cpu_to_be32(fl->stateid.si_generation); > + p = xdr_encode_opaque_fixed(p, &fl->stateid.si_opaque, > + sizeof(stateid_opaque_t)); > + > + *p++ = cpu_to_be32(1); /* single file handle */ > + p = xdr_encode_opaque(p, fl->fh.data, fl->fh.size); > + > + p = xdr_encode_opaque(p, uid.buf, uid.len); > + p = xdr_encode_opaque(p, gid.buf, gid.len); > + > + *p++ = cpu_to_be32(fl->flags); > + *p++ = cpu_to_be32(0); /* No stats collect hint */ > + > + return 0; > +} > + > +__be32 > +nfsd4_ff_encode_getdeviceinfo(struct xdr_stream *xdr, > + struct nfsd4_getdeviceinfo *gdp) > +{ > + struct pnfs_ff_device_addr *da = gdp->gd_device; > + int len; > + int ver_len; > + int addr_len; > + __be32 *p; > + > + /* len + padding for two strings */ > + addr_len = 16 + da->netaddr.netid_len + da->netaddr.addr_len; > + ver_len = 20; > + > + len = 4 + ver_len + 4 + addr_len; > + > + p = xdr_reserve_space(xdr, len + sizeof(__be32)); > + if (!p) > + return nfserr_resource; > + > + /* > + * Fill in the overall length and number of volumes at the beginning > + * of the layout. > + */ > + *p++ = cpu_to_be32(len); > + *p++ = cpu_to_be32(1); /* 1 netaddr */ > + p = xdr_encode_opaque(p, da->netaddr.netid, da->netaddr.netid_len); > + p = xdr_encode_opaque(p, da->netaddr.addr, da->netaddr.addr_len); > + > + *p++ = cpu_to_be32(1); /* 1 versions */ > + > + *p++ = cpu_to_be32(da->version); > + *p++ = cpu_to_be32(da->minor_version); > + *p++ = cpu_to_be32(da->rsize); > + *p++ = cpu_to_be32(da->wsize); > + *p++ = cpu_to_be32(da->tightly_coupled); > + > + return 0; > +} > diff --git a/fs/nfsd/flexfilelayoutxdr.h b/fs/nfsd/flexfilelayoutxdr.h > new file mode 100644 > index 0000000..40e6d1b > --- /dev/null > +++ b/fs/nfsd/flexfilelayoutxdr.h > @@ -0,0 +1,50 @@ > +/* > + * Copyright (c) 2016 Tom Haynes <loghyr@xxxxxxxxxxxxxxx> > + */ > +#ifndef _NFSD_FLEXFILELAYOUTXDR_H > +#define _NFSD_FLEXFILELAYOUTXDR_H 1 > + > +#include > +#include "xdr4.h" > + > +#define FF_FLAGS_NO_LAYOUTCOMMIT 1 > +#define FF_FLAGS_NO_IO_THRU_MDS 2 > +#define FF_FLAGS_NO_READ_IO 4 > + > +struct iomap; > +struct xdr_stream; > + > +#define FF_NETID_LEN (4) > +#define FF_ADDR_LEN (INET6_ADDRSTRLEN + 1) > +struct pnfs_ff_netaddr { > + char netid[FF_NETID_LEN + 1]; > + char addr[FF_ADDR_LEN + 1]; > + u32 netid_len; > + u32 addr_len; > +}; > + > +struct pnfs_ff_device_addr { > + struct pnfs_ff_netaddr netaddr; > + u32 version; > + u32 minor_version; > + u32 rsize; > + u32 wsize; > + bool tightly_coupled; > +}; > + > +struct pnfs_ff_layout { > + u32 flags; > + u32 stats_collect_hint; > + kuid_t uid; > + kgid_t gid; > + struct nfsd4_deviceid deviceid; > + stateid_t stateid; > + struct nfs_fh fh; > +}; > + > +__be32 nfsd4_ff_encode_getdeviceinfo(struct xdr_stream *xdr, > + struct nfsd4_getdeviceinfo *gdp); > +__be32 nfsd4_ff_encode_layoutget(struct xdr_stream *xdr, > + struct nfsd4_layoutget *lgp); > + > +#endif /* _NFSD_FLEXFILELAYOUTXDR_H */ > diff --git a/fs/nfsd/nfs4layouts.c b/fs/nfsd/nfs4layouts.c > index 825c7bc..7cbd56a 100644 > --- a/fs/nfsd/nfs4layouts.c > +++ b/fs/nfsd/nfs4layouts.c > @@ -27,6 +27,9 @@ static const struct nfsd4_callback_ops nfsd4_cb_layout_ops; > static const struct lock_manager_operations nfsd4_layouts_lm_ops; > > const struct nfsd4_layout_ops *nfsd4_layout_ops[LAYOUT_TYPE_MAX] = { > +#ifdef CONFIG_NFSD_FLEXFILELAYOUT > + [LAYOUT_FLEX_FILES] = &ff_layout_ops, > +#endif > #ifdef CONFIG_NFSD_BLOCKLAYOUT > [LAYOUT_BLOCK_VOLUME] = &bl_layout_ops, > #endif > @@ -122,7 +125,9 @@ nfsd4_set_deviceid(struct nfsd4_deviceid *id, const struct svc_fh *fhp, > > void nfsd4_setup_layout_type(struct svc_export *exp) > { > +#if defined(CONFIG_NFSD_BLOCKLAYOUT) || defined(CONFIG_NFSD_SCSILAYOUT) > struct super_block *sb = exp->ex_path.mnt->mnt_sb; > +#endif > > if (!(exp->ex_flags & NFSEXP_PNFS)) > return; > @@ -145,6 +150,11 @@ void nfsd4_setup_layout_type(struct svc_export *exp) > sb->s_bdev && sb->s_bdev->bd_disk->fops->pr_ops) > exp->ex_layout_type = LAYOUT_SCSI; > #endif > +#ifdef CONFIG_NFSD_FLEXFILELAYOUT > + // FIXME: How do we "export" this and how does it mingle with > + // the above types? > + exp->ex_layout_type = LAYOUT_FLEX_FILES; > +#endif > } > Maybe it's time to start thinking about how to support multiple layout types per export? It doesn't look like it would be that hard. I think we could convert ex_layout_type into a bitmap that shows which types are supported. The harder work looks to be on the client. You'd need some heuristic to choose when you get back multiple layout types and fix that to work properly. > static void > diff --git a/fs/nfsd/pnfs.h b/fs/nfsd/pnfs.h > index e855677..0c2a716 100644 > --- a/fs/nfsd/pnfs.h > +++ b/fs/nfsd/pnfs.h > @@ -45,6 +45,9 @@ extern const struct nfsd4_layout_ops bl_layout_ops; > #ifdef CONFIG_NFSD_SCSILAYOUT > extern const struct nfsd4_layout_ops scsi_layout_ops; > #endif > +#ifdef CONFIG_NFSD_FLEXFILELAYOUT > +extern const struct nfsd4_layout_ops ff_layout_ops; > +#endif > > __be32 nfsd4_preprocess_layout_stateid(struct svc_rqst *rqstp, > struct nfsd4_compound_state *cstate, stateid_t *stateid, -- Jeff Layton <jlayton@xxxxxxxxxxxxxxx> -- To unsubscribe from this list: send the line "unsubscribe linux-nfs" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html