On Mon, Aug 12, 2019 at 12:19 PM Olga Kornievskaia <olga.kornievskaia@xxxxxxxxx> wrote: > > On Thu, Aug 8, 2019 at 4:18 PM Olga Kornievskaia > <olga.kornievskaia@xxxxxxxxx> wrote: > > > > Introducing the COPY_NOTIFY operation. > > > > Create a new unique stateid that will keep track of the copy > > state and the upcoming READs that will use that stateid. Keep > > it in the list associated with parent stateid. When putting > > a reference on a stateid, check if there are associated copy > > notify stateids, if so, account for it and remove them on > > last reference. > > > > Laundromat thread will traverse globally stored copy notify > > stateid and notice if any haven't been referenced in the > > lease period, if so, it'll remove them. > > > > Return single netaddr to advertise to the copy. > > > > Signed-off-by: Andy Adamson <andros@xxxxxxxxxx> > > Signed-off-by: Olga Kornievskaia <kolga@xxxxxxxxxx> > > --- > > fs/nfsd/nfs4proc.c | 47 ++++++++++++++++++-- > > fs/nfsd/nfs4state.c | 122 ++++++++++++++++++++++++++++++++++++++++++++++------ > > fs/nfsd/state.h | 30 ++++++++++++- > > fs/nfsd/xdr4.h | 2 +- > > 4 files changed, 182 insertions(+), 19 deletions(-) > > > > diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c > > index 3a2805d..47f6b52 100644 > > --- a/fs/nfsd/nfs4proc.c > > +++ b/fs/nfsd/nfs4proc.c > > @@ -37,6 +37,7 @@ > > #include <linux/falloc.h> > > #include <linux/slab.h> > > #include <linux/kthread.h> > > +#include <linux/sunrpc/addr.h> > > > > #include "idmap.h" > > #include "cache.h" > > @@ -1229,7 +1230,7 @@ static void dup_copy_fields(struct nfsd4_copy *src, struct nfsd4_copy *dst) > > > > static void cleanup_async_copy(struct nfsd4_copy *copy) > > { > > - nfs4_free_cp_state(copy); > > + nfs4_free_copy_state(copy); > > fput(copy->file_dst); > > fput(copy->file_src); > > spin_lock(©->cp_clp->async_lock); > > @@ -1283,7 +1284,7 @@ static int nfsd4_do_async_copy(void *data) > > async_copy = kzalloc(sizeof(struct nfsd4_copy), GFP_KERNEL); > > if (!async_copy) > > goto out; > > - if (!nfs4_init_cp_state(nn, copy)) { > > + if (!nfs4_init_copy_state(nn, copy)) { > > kfree(async_copy); > > goto out; > > } > > @@ -1350,7 +1351,47 @@ struct nfsd4_copy * > > nfsd4_copy_notify(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, > > union nfsd4_op_u *u) > > { > > - return nfserr_notsupp; > > + struct nfsd4_copy_notify *cn = &u->copy_notify; > > + __be32 status; > > + struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); > > + struct nfs4_stid *stid; > > + struct nfs4_cpntf_state *cps; > > + struct nfs4_client *clp = cstate->clp; > > + > > + status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh, > > + &cn->cpn_src_stateid, RD_STATE, NULL, > > + NULL, &stid); > > + if (status) > > + return status; > > + > > + cn->cpn_sec = nn->nfsd4_lease; > > + cn->cpn_nsec = 0; > > + > > + status = nfserrno(-ENOMEM); > > + cps = nfs4_alloc_init_cpntf_state(nn, stid); > > + if (!cps) > > + goto out_err; > > + memcpy(&cn->cpn_cnr_stateid, &cps->cp_stateid.stid, sizeof(stateid_t)); > > + > > + /* For now, only return one server address in cpn_src, the > > + * address used by the client to connect to this server. > > + */ > > + cn->cpn_src.nl4_type = NL4_NETADDR; > > + status = nfsd4_set_netaddr((struct sockaddr *)&rqstp->rq_daddr, > > + &cn->cpn_src.u.nl4_addr); > > + WARN_ON_ONCE(status); > > + if (status) { > > + free_cpntf_state(nn, cps); > > + goto out; > > + } > > + spin_lock(&clp->cpntf_lock); > > + list_add(&cps->cpntf, &clp->copy_notifies); > > + spin_unlock(&clp->cpntf_lock); > > +out: > > + return status; > > +out_err: > > + nfs4_put_stid(stid); > > + goto out; > > } > > > > static __be32 > > diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c > > index 78926c6..bd962f1 100644 > > --- a/fs/nfsd/nfs4state.c > > +++ b/fs/nfsd/nfs4state.c > > @@ -720,6 +720,7 @@ struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct kmem_cache *sla > > /* Will be incremented before return to client: */ > > refcount_set(&stid->sc_count, 1); > > spin_lock_init(&stid->sc_lock); > > + INIT_LIST_HEAD(&stid->sc_cp_list); > > > > /* > > * It shouldn't be a problem to reuse an opaque stateid value. > > @@ -739,33 +740,89 @@ struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct kmem_cache *sla > > /* > > * Create a unique stateid_t to represent each COPY. > > */ > > -int nfs4_init_cp_state(struct nfsd_net *nn, struct nfsd4_copy *copy) > > +static int nfs4_init_cp_state(struct nfsd_net *nn, copy_stateid_t *stid, > > + unsigned char sc_type) > > { > > int new_id; > > > > + stid->stid.si_opaque.so_clid.cl_boot = nn->boot_time; > > + stid->stid.si_opaque.so_clid.cl_id = nn->s2s_cp_cl_id; > > + stid->sc_type = sc_type; > > + > > idr_preload(GFP_KERNEL); > > spin_lock(&nn->s2s_cp_lock); > > - new_id = idr_alloc_cyclic(&nn->s2s_cp_stateids, copy, 0, 0, GFP_NOWAIT); > > + new_id = idr_alloc_cyclic(&nn->s2s_cp_stateids, stid, 0, 0, GFP_NOWAIT); > > + stid->stid.si_opaque.so_id = new_id; > > spin_unlock(&nn->s2s_cp_lock); > > idr_preload_end(); > > if (new_id < 0) > > return 0; > > - copy->cp_stateid.si_opaque.so_id = new_id; > > - copy->cp_stateid.si_opaque.so_clid.cl_boot = nn->boot_time; > > - copy->cp_stateid.si_opaque.so_clid.cl_id = nn->s2s_cp_cl_id; > > return 1; > > } > > > > -void nfs4_free_cp_state(struct nfsd4_copy *copy) > > +int nfs4_init_copy_state(struct nfsd_net *nn, struct nfsd4_copy *copy) > > { > > - struct nfsd_net *nn; > > + return nfs4_init_cp_state(nn, ©->cp_stateid, NFS4_COPY_STID); > > +} > > > > - nn = net_generic(copy->cp_clp->net, nfsd_net_id); > > +struct nfs4_cpntf_state *nfs4_alloc_init_cpntf_state(struct nfsd_net *nn, > > + struct nfs4_stid *p_stid) > > +{ > > + struct nfs4_cpntf_state *cps; > > + > > + cps = kzalloc(sizeof(struct nfs4_cpntf_state), GFP_KERNEL); > > + if (!cps) > > + return NULL; > > + cps->cp_p_stid = p_stid; > > + cps->cpntf_time = get_seconds(); > > + cps->net = nn; > > + if (!nfs4_init_cp_state(nn, &cps->cp_stateid, NFS4_COPYNOTIFY_STID)) > > + goto out_free; > > + spin_lock(&p_stid->sc_lock); > > + list_add(&cps->cp_list, &p_stid->sc_cp_list); > > + p_stid->sc_cp_list_size++; > > + spin_unlock(&p_stid->sc_lock); > > + return cps; > > +out_free: > > + kfree(cps); > > + return NULL; > > +} > > +void _free_copy_cpntf_stateid(struct nfsd_net *nn, stateid_t *stid) > > +{ > > spin_lock(&nn->s2s_cp_lock); > > - idr_remove(&nn->s2s_cp_stateids, copy->cp_stateid.si_opaque.so_id); > > + idr_remove(&nn->s2s_cp_stateids, stid->si_opaque.so_id); > > spin_unlock(&nn->s2s_cp_lock); > > } > > > > +void nfs4_free_copy_state(struct nfsd4_copy *copy) > > +{ > > + struct nfsd_net *nn; > > + > > + nn = net_generic(copy->cp_clp->net, nfsd_net_id); > > + _free_copy_cpntf_stateid(nn, ©->cp_stateid.stid); > > +} > > + > > +static void nfs4_free_cpntf_statelist(struct net *net, struct nfs4_stid *stid) > > +{ > > + struct nfs4_cpntf_state *cps; > > + struct nfsd_net *nn; > > + > > + nn = net_generic(net, nfsd_net_id); > > + spin_lock(&stid->sc_lock); > > + while (!list_empty(&stid->sc_cp_list)) { > > + cps = list_first_entry(&stid->sc_cp_list, > > + struct nfs4_cpntf_state, cp_list); > > + stid->sc_cp_list_size--; > > + list_del(&cps->cp_list); > > + _free_copy_cpntf_stateid(nn, &cps->cp_stateid.stid); > > + spin_lock(&stid->sc_client->cpntf_lock); > > + list_del(&cps->cpntf); > > + spin_unlock(&stid->sc_client->cpntf_lock); > > + kfree(cps); > > + } > > + spin_unlock(&stid->sc_lock); > > +} > > + > > static struct nfs4_ol_stateid * nfs4_alloc_open_stateid(struct nfs4_client *clp) > > { > > struct nfs4_stid *stid; > > @@ -905,15 +962,25 @@ static void block_delegations(struct knfsd_fh *fh) > > { > > struct nfs4_file *fp = s->sc_file; > > struct nfs4_client *clp = s->sc_client; > > + size_t size = 0; > > + > > + spin_lock(&s->sc_lock); > > + size = s->sc_cp_list_size; > > + spin_unlock(&s->sc_lock); > > > > might_lock(&clp->cl_lock); > > > > if (!refcount_dec_and_lock(&s->sc_count, &clp->cl_lock)) { > > - wake_up_all(&close_wq); > > - return; > > + if (!refcount_sub_and_test_checked(s->sc_cp_list_size, > > + &s->sc_count)) { > > + refcount_add_checked(s->sc_cp_list_size, &s->sc_count); > > + wake_up_all(&close_wq); > > + return; > > + } > > } > > idr_remove(&clp->cl_stateids, s->sc_stateid.si_opaque.so_id); > > spin_unlock(&clp->cl_lock); > > + nfs4_free_cpntf_statelist(clp->net, s); > > s->sc_free(s); > > if (fp) > > put_nfs4_file(fp); > > While this passes my testing, in theory this allows for the race that > we get the copy notify size but then offload_cancel arrive and change > the value. Then refcount_sub_and test_check would have an incorrect > value (can subtract larger than an actual reference count). I have no > solution for that as there is no refcount_sub_and_lock() that will > allow to decrement by a multiple under a lock. Thoughts? I tried not to use the client's cl_lock but instead use a specific lock to protect the copy notifies stateid on the stateid list. But since stateid's reference counter (sc_count) is protected by it, I think by getting rid of the special lock and using cl_lock will solve the problem of coordinating access between the sc_count and the copy_notify stateid list. Are the any problems with using such a big lock? > > > @@ -1881,6 +1948,8 @@ static struct nfs4_client *alloc_client(struct xdr_netobj name) > > #endif > > INIT_LIST_HEAD(&clp->async_copies); > > spin_lock_init(&clp->async_lock); > > + INIT_LIST_HEAD(&clp->copy_notifies); > > + spin_lock_init(&clp->cpntf_lock); > > spin_lock_init(&clp->cl_lock); > > rpc_init_wait_queue(&clp->cl_cb_waitq, "Backchannel slot table"); > > return clp; > > @@ -2909,7 +2978,8 @@ static bool client_has_state(struct nfs4_client *clp) > > #endif > > || !list_empty(&clp->cl_delegations) > > || !list_empty(&clp->cl_sessions) > > - || !list_empty(&clp->async_copies); > > + || !list_empty(&clp->async_copies) > > + || !list_empty(&clp->copy_notifies); > > } > > > > static __be32 copy_impl_id(struct nfs4_client *clp, > > @@ -5184,9 +5254,10 @@ static bool clients_still_reclaiming(struct nfsd_net *nn) > > struct nfs4_delegation *dp; > > struct nfs4_ol_stateid *stp; > > struct nfsd4_blocked_lock *nbl; > > - struct list_head *pos, *next, reaplist; > > + struct list_head *pos, *next, *ppos, *pnext, reaplist, cpntflist; > > time_t cutoff = get_seconds() - nn->nfsd4_lease; > > time_t t, new_timeo = nn->nfsd4_lease; > > + struct nfs4_cpntf_state *cps; > > > > dprintk("NFSD: laundromat service - starting\n"); > > > > @@ -5197,9 +5268,19 @@ static bool clients_still_reclaiming(struct nfsd_net *nn) > > dprintk("NFSD: end of grace period\n"); > > nfsd4_end_grace(nn); > > INIT_LIST_HEAD(&reaplist); > > + INIT_LIST_HEAD(&cpntflist); > > + > > spin_lock(&nn->client_lock); > > list_for_each_safe(pos, next, &nn->client_lru) { > > clp = list_entry(pos, struct nfs4_client, cl_lru); > > + spin_lock(&clp->cpntf_lock); > > + list_for_each_safe(ppos, pnext, &clp->copy_notifies) { > > + cps = list_entry(ppos, struct nfs4_cpntf_state, cpntf); > > + if (!time_after((unsigned long)cps->cpntf_time, > > + (unsigned long)cutoff)) > > + list_move(&cps->cpntf, &cpntflist); > > + } > > + spin_unlock(&clp->cpntf_lock); > > if (time_after((unsigned long)clp->cl_time, (unsigned long)cutoff)) { > > t = clp->cl_time - cutoff; > > new_timeo = min(new_timeo, t); > > @@ -5213,6 +5294,11 @@ static bool clients_still_reclaiming(struct nfsd_net *nn) > > list_add(&clp->cl_lru, &reaplist); > > } > > spin_unlock(&nn->client_lock); > > + list_for_each_safe(pos, next, &cpntflist) { > > + cps = list_entry(pos, struct nfs4_cpntf_state, cpntf); > > + list_del_init(&cps->cpntf); > > + free_cpntf_state(cps->net, cps); > > + } > > list_for_each_safe(pos, next, &reaplist) { > > clp = list_entry(pos, struct nfs4_client, cl_lru); > > dprintk("NFSD: purging unused client (clientid %08x)\n", > > @@ -5576,6 +5662,16 @@ static __be32 nfsd4_validate_stateid(struct nfs4_client *cl, stateid_t *stateid) > > > > return 0; > > } > > +void free_cpntf_state(struct nfsd_net *nn, struct nfs4_cpntf_state *cps) > > +{ > > + spin_lock(&cps->cp_p_stid->sc_lock); > > + list_del(&cps->cp_list); > > + cps->cp_p_stid->sc_cp_list_size--; > > + spin_unlock(&cps->cp_p_stid->sc_lock); > > + _free_copy_cpntf_stateid(nn, &cps->cp_stateid.stid); > > + nfs4_put_stid(cps->cp_p_stid); > > + kfree(cps); > > +} > > > > /* > > * Checks for stateid operations > > diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h > > index 25c7a45..16be2f4 100644 > > --- a/fs/nfsd/state.h > > +++ b/fs/nfsd/state.h > > @@ -56,6 +56,13 @@ > > stateid_opaque_t si_opaque; > > } stateid_t; > > > > +typedef struct { > > + stateid_t stid; > > +#define NFS4_COPY_STID 1 > > +#define NFS4_COPYNOTIFY_STID 2 > > + unsigned char sc_type; > > +} copy_stateid_t; > > + > > #define STATEID_FMT "(%08x/%08x/%08x/%08x)" > > #define STATEID_VAL(s) \ > > (s)->si_opaque.so_clid.cl_boot, \ > > @@ -96,6 +103,8 @@ struct nfs4_stid { > > #define NFS4_REVOKED_DELEG_STID 16 > > #define NFS4_CLOSED_DELEG_STID 32 > > #define NFS4_LAYOUT_STID 64 > > + struct list_head sc_cp_list; > > + size_t sc_cp_list_size; > > unsigned char sc_type; > > stateid_t sc_stateid; > > spinlock_t sc_lock; > > @@ -104,6 +113,18 @@ struct nfs4_stid { > > void (*sc_free)(struct nfs4_stid *); > > }; > > > > +/* Keep a list of stateids issued by the COPY_NOTIFY, associate it with the > > + * parent OPEN/LOCK/DELEG stateid. > > + */ > > +struct nfs4_cpntf_state { > > + copy_stateid_t cp_stateid; > > + struct list_head cp_list; /* per parent nfs4_stid */ > > + struct nfs4_stid *cp_p_stid; /* pointer to parent */ > > + struct list_head cpntf; /* list of copy_notifies */ > > + time_t cpntf_time; /* last time stateid used */ > > + struct nfsd_net *net; > > +}; > > + > > /* > > * Represents a delegation stateid. The nfs4_client holds references to these > > * and they are put when it is being destroyed or when the delegation is > > @@ -367,6 +388,8 @@ struct nfs4_client { > > struct net *net; > > struct list_head async_copies; /* list of async copies */ > > spinlock_t async_lock; /* lock for async copies */ > > + struct list_head copy_notifies; /* list of copy notify stids */ > > + spinlock_t cpntf_lock; /* lock for copy_notifies */ > > }; > > > > /* struct nfs4_client_reset > > @@ -623,8 +646,10 @@ __be32 nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate, > > struct nfs4_stid **s, struct nfsd_net *nn); > > struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct kmem_cache *slab, > > void (*sc_free)(struct nfs4_stid *)); > > -int nfs4_init_cp_state(struct nfsd_net *nn, struct nfsd4_copy *copy); > > -void nfs4_free_cp_state(struct nfsd4_copy *copy); > > +int nfs4_init_copy_state(struct nfsd_net *nn, struct nfsd4_copy *copy); > > +void nfs4_free_copy_state(struct nfsd4_copy *copy); > > +struct nfs4_cpntf_state *nfs4_alloc_init_cpntf_state(struct nfsd_net *nn, > > + struct nfs4_stid *p_stid); > > void nfs4_unhash_stid(struct nfs4_stid *s); > > void nfs4_put_stid(struct nfs4_stid *s); > > void nfs4_inc_and_copy_stateid(stateid_t *dst, struct nfs4_stid *stid); > > @@ -654,6 +679,7 @@ extern struct nfs4_client_reclaim *nfs4_client_to_reclaim(struct xdr_netobj name > > extern void nfs4_put_copy(struct nfsd4_copy *copy); > > extern struct nfsd4_copy * > > find_async_copy(struct nfs4_client *clp, stateid_t *staetid); > > +extern void free_cpntf_state(struct nfsd_net *nn, struct nfs4_cpntf_state *cps); > > static inline void get_nfs4_file(struct nfs4_file *fi) > > { > > refcount_inc(&fi->fi_ref); > > diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h > > index c497032..c6c8b43 100644 > > --- a/fs/nfsd/xdr4.h > > +++ b/fs/nfsd/xdr4.h > > @@ -543,7 +543,7 @@ struct nfsd4_copy { > > struct file *file_src; > > struct file *file_dst; > > > > - stateid_t cp_stateid; > > + copy_stateid_t cp_stateid; > > > > struct list_head copies; > > struct task_struct *copy_task; > > -- > > 1.8.3.1 > >