Re: [PATCH v5 5/9] NFSD add COPY_NOTIFY operation

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Mon, Aug 12, 2019 at 12:19 PM Olga Kornievskaia
<olga.kornievskaia@xxxxxxxxx> wrote:
>
> On Thu, Aug 8, 2019 at 4:18 PM Olga Kornievskaia
> <olga.kornievskaia@xxxxxxxxx> wrote:
> >
> > Introducing the COPY_NOTIFY operation.
> >
> > Create a new unique stateid that will keep track of the copy
> > state and the upcoming READs that will use that stateid. Keep
> > it in the list associated with parent stateid. When putting
> > a reference on a stateid, check if there are associated copy
> > notify stateids, if so, account for it and remove them on
> > last reference.
> >
> > Laundromat thread will traverse globally stored copy notify
> > stateid and notice if any haven't been referenced in the
> > lease period, if so, it'll remove them.
> >
> > Return single netaddr to advertise to the copy.
> >
> > Signed-off-by: Andy Adamson <andros@xxxxxxxxxx>
> > Signed-off-by: Olga Kornievskaia <kolga@xxxxxxxxxx>
> > ---
> >  fs/nfsd/nfs4proc.c  |  47 ++++++++++++++++++--
> >  fs/nfsd/nfs4state.c | 122 ++++++++++++++++++++++++++++++++++++++++++++++------
> >  fs/nfsd/state.h     |  30 ++++++++++++-
> >  fs/nfsd/xdr4.h      |   2 +-
> >  4 files changed, 182 insertions(+), 19 deletions(-)
> >
> > diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
> > index 3a2805d..47f6b52 100644
> > --- a/fs/nfsd/nfs4proc.c
> > +++ b/fs/nfsd/nfs4proc.c
> > @@ -37,6 +37,7 @@
> >  #include <linux/falloc.h>
> >  #include <linux/slab.h>
> >  #include <linux/kthread.h>
> > +#include <linux/sunrpc/addr.h>
> >
> >  #include "idmap.h"
> >  #include "cache.h"
> > @@ -1229,7 +1230,7 @@ static void dup_copy_fields(struct nfsd4_copy *src, struct nfsd4_copy *dst)
> >
> >  static void cleanup_async_copy(struct nfsd4_copy *copy)
> >  {
> > -       nfs4_free_cp_state(copy);
> > +       nfs4_free_copy_state(copy);
> >         fput(copy->file_dst);
> >         fput(copy->file_src);
> >         spin_lock(&copy->cp_clp->async_lock);
> > @@ -1283,7 +1284,7 @@ static int nfsd4_do_async_copy(void *data)
> >                 async_copy = kzalloc(sizeof(struct nfsd4_copy), GFP_KERNEL);
> >                 if (!async_copy)
> >                         goto out;
> > -               if (!nfs4_init_cp_state(nn, copy)) {
> > +               if (!nfs4_init_copy_state(nn, copy)) {
> >                         kfree(async_copy);
> >                         goto out;
> >                 }
> > @@ -1350,7 +1351,47 @@ struct nfsd4_copy *
> >  nfsd4_copy_notify(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
> >                   union nfsd4_op_u *u)
> >  {
> > -       return nfserr_notsupp;
> > +       struct nfsd4_copy_notify *cn = &u->copy_notify;
> > +       __be32 status;
> > +       struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
> > +       struct nfs4_stid *stid;
> > +       struct nfs4_cpntf_state *cps;
> > +       struct nfs4_client *clp = cstate->clp;
> > +
> > +       status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
> > +                                       &cn->cpn_src_stateid, RD_STATE, NULL,
> > +                                       NULL, &stid);
> > +       if (status)
> > +               return status;
> > +
> > +       cn->cpn_sec = nn->nfsd4_lease;
> > +       cn->cpn_nsec = 0;
> > +
> > +       status = nfserrno(-ENOMEM);
> > +       cps = nfs4_alloc_init_cpntf_state(nn, stid);
> > +       if (!cps)
> > +               goto out_err;
> > +       memcpy(&cn->cpn_cnr_stateid, &cps->cp_stateid.stid, sizeof(stateid_t));
> > +
> > +       /* For now, only return one server address in cpn_src, the
> > +        * address used by the client to connect to this server.
> > +        */
> > +       cn->cpn_src.nl4_type = NL4_NETADDR;
> > +       status = nfsd4_set_netaddr((struct sockaddr *)&rqstp->rq_daddr,
> > +                                &cn->cpn_src.u.nl4_addr);
> > +       WARN_ON_ONCE(status);
> > +       if (status) {
> > +               free_cpntf_state(nn, cps);
> > +               goto out;
> > +       }
> > +       spin_lock(&clp->cpntf_lock);
> > +       list_add(&cps->cpntf, &clp->copy_notifies);
> > +       spin_unlock(&clp->cpntf_lock);
> > +out:
> > +       return status;
> > +out_err:
> > +       nfs4_put_stid(stid);
> > +       goto out;
> >  }
> >
> >  static __be32
> > diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> > index 78926c6..bd962f1 100644
> > --- a/fs/nfsd/nfs4state.c
> > +++ b/fs/nfsd/nfs4state.c
> > @@ -720,6 +720,7 @@ struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct kmem_cache *sla
> >         /* Will be incremented before return to client: */
> >         refcount_set(&stid->sc_count, 1);
> >         spin_lock_init(&stid->sc_lock);
> > +       INIT_LIST_HEAD(&stid->sc_cp_list);
> >
> >         /*
> >          * It shouldn't be a problem to reuse an opaque stateid value.
> > @@ -739,33 +740,89 @@ struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct kmem_cache *sla
> >  /*
> >   * Create a unique stateid_t to represent each COPY.
> >   */
> > -int nfs4_init_cp_state(struct nfsd_net *nn, struct nfsd4_copy *copy)
> > +static int nfs4_init_cp_state(struct nfsd_net *nn, copy_stateid_t *stid,
> > +                             unsigned char sc_type)
> >  {
> >         int new_id;
> >
> > +       stid->stid.si_opaque.so_clid.cl_boot = nn->boot_time;
> > +       stid->stid.si_opaque.so_clid.cl_id = nn->s2s_cp_cl_id;
> > +       stid->sc_type = sc_type;
> > +
> >         idr_preload(GFP_KERNEL);
> >         spin_lock(&nn->s2s_cp_lock);
> > -       new_id = idr_alloc_cyclic(&nn->s2s_cp_stateids, copy, 0, 0, GFP_NOWAIT);
> > +       new_id = idr_alloc_cyclic(&nn->s2s_cp_stateids, stid, 0, 0, GFP_NOWAIT);
> > +       stid->stid.si_opaque.so_id = new_id;
> >         spin_unlock(&nn->s2s_cp_lock);
> >         idr_preload_end();
> >         if (new_id < 0)
> >                 return 0;
> > -       copy->cp_stateid.si_opaque.so_id = new_id;
> > -       copy->cp_stateid.si_opaque.so_clid.cl_boot = nn->boot_time;
> > -       copy->cp_stateid.si_opaque.so_clid.cl_id = nn->s2s_cp_cl_id;
> >         return 1;
> >  }
> >
> > -void nfs4_free_cp_state(struct nfsd4_copy *copy)
> > +int nfs4_init_copy_state(struct nfsd_net *nn, struct nfsd4_copy *copy)
> >  {
> > -       struct nfsd_net *nn;
> > +       return nfs4_init_cp_state(nn, &copy->cp_stateid, NFS4_COPY_STID);
> > +}
> >
> > -       nn = net_generic(copy->cp_clp->net, nfsd_net_id);
> > +struct nfs4_cpntf_state *nfs4_alloc_init_cpntf_state(struct nfsd_net *nn,
> > +                                                    struct nfs4_stid *p_stid)
> > +{
> > +       struct nfs4_cpntf_state *cps;
> > +
> > +       cps = kzalloc(sizeof(struct nfs4_cpntf_state), GFP_KERNEL);
> > +       if (!cps)
> > +               return NULL;
> > +       cps->cp_p_stid = p_stid;
> > +       cps->cpntf_time = get_seconds();
> > +       cps->net = nn;
> > +       if (!nfs4_init_cp_state(nn, &cps->cp_stateid, NFS4_COPYNOTIFY_STID))
> > +               goto out_free;
> > +       spin_lock(&p_stid->sc_lock);
> > +       list_add(&cps->cp_list, &p_stid->sc_cp_list);
> > +       p_stid->sc_cp_list_size++;
> > +       spin_unlock(&p_stid->sc_lock);
> > +       return cps;
> > +out_free:
> > +       kfree(cps);
> > +       return NULL;
> > +}
> > +void _free_copy_cpntf_stateid(struct nfsd_net *nn, stateid_t *stid)
> > +{
> >         spin_lock(&nn->s2s_cp_lock);
> > -       idr_remove(&nn->s2s_cp_stateids, copy->cp_stateid.si_opaque.so_id);
> > +       idr_remove(&nn->s2s_cp_stateids, stid->si_opaque.so_id);
> >         spin_unlock(&nn->s2s_cp_lock);
> >  }
> >
> > +void nfs4_free_copy_state(struct nfsd4_copy *copy)
> > +{
> > +       struct nfsd_net *nn;
> > +
> > +       nn = net_generic(copy->cp_clp->net, nfsd_net_id);
> > +       _free_copy_cpntf_stateid(nn, &copy->cp_stateid.stid);
> > +}
> > +
> > +static void nfs4_free_cpntf_statelist(struct net *net, struct nfs4_stid *stid)
> > +{
> > +       struct nfs4_cpntf_state *cps;
> > +       struct nfsd_net *nn;
> > +
> > +       nn = net_generic(net, nfsd_net_id);
> > +       spin_lock(&stid->sc_lock);
> > +       while (!list_empty(&stid->sc_cp_list)) {
> > +               cps = list_first_entry(&stid->sc_cp_list,
> > +                                      struct nfs4_cpntf_state, cp_list);
> > +               stid->sc_cp_list_size--;
> > +               list_del(&cps->cp_list);
> > +               _free_copy_cpntf_stateid(nn, &cps->cp_stateid.stid);
> > +               spin_lock(&stid->sc_client->cpntf_lock);
> > +               list_del(&cps->cpntf);
> > +               spin_unlock(&stid->sc_client->cpntf_lock);
> > +               kfree(cps);
> > +       }
> > +       spin_unlock(&stid->sc_lock);
> > +}
> > +
> >  static struct nfs4_ol_stateid * nfs4_alloc_open_stateid(struct nfs4_client *clp)
> >  {
> >         struct nfs4_stid *stid;
> > @@ -905,15 +962,25 @@ static void block_delegations(struct knfsd_fh *fh)
> >  {
> >         struct nfs4_file *fp = s->sc_file;
> >         struct nfs4_client *clp = s->sc_client;
> > +       size_t size = 0;
> > +
> > +       spin_lock(&s->sc_lock);
> > +       size = s->sc_cp_list_size;
> > +       spin_unlock(&s->sc_lock);
> >
> >         might_lock(&clp->cl_lock);
> >
> >         if (!refcount_dec_and_lock(&s->sc_count, &clp->cl_lock)) {
> > -               wake_up_all(&close_wq);
> > -               return;
> > +               if (!refcount_sub_and_test_checked(s->sc_cp_list_size,
> > +                               &s->sc_count)) {
> > +                       refcount_add_checked(s->sc_cp_list_size, &s->sc_count);
> > +                       wake_up_all(&close_wq);
> > +                       return;
> > +               }
> >         }
> >         idr_remove(&clp->cl_stateids, s->sc_stateid.si_opaque.so_id);
> >         spin_unlock(&clp->cl_lock);
> > +       nfs4_free_cpntf_statelist(clp->net, s);
> >         s->sc_free(s);
> >         if (fp)
> >                 put_nfs4_file(fp);
>
> While this passes my testing, in theory this allows for the race that
> we get the copy notify size but then offload_cancel arrive and change
> the value. Then refcount_sub_and test_check would have an incorrect
> value (can subtract larger than an actual reference count). I have no
> solution for that as there is no refcount_sub_and_lock() that will
> allow to decrement by a multiple under a lock. Thoughts?

I tried not to use the client's cl_lock but instead use a specific
lock to protect the copy notifies stateid on the stateid list. But
since stateid's reference counter (sc_count) is protected by it, I
think by getting rid of the special lock and using cl_lock will solve
the problem of coordinating access between the sc_count and the
copy_notify stateid list. Are the any problems with using such a big
lock?


>
> > @@ -1881,6 +1948,8 @@ static struct nfs4_client *alloc_client(struct xdr_netobj name)
> >  #endif
> >         INIT_LIST_HEAD(&clp->async_copies);
> >         spin_lock_init(&clp->async_lock);
> > +       INIT_LIST_HEAD(&clp->copy_notifies);
> > +       spin_lock_init(&clp->cpntf_lock);
> >         spin_lock_init(&clp->cl_lock);
> >         rpc_init_wait_queue(&clp->cl_cb_waitq, "Backchannel slot table");
> >         return clp;
> > @@ -2909,7 +2978,8 @@ static bool client_has_state(struct nfs4_client *clp)
> >  #endif
> >                 || !list_empty(&clp->cl_delegations)
> >                 || !list_empty(&clp->cl_sessions)
> > -               || !list_empty(&clp->async_copies);
> > +               || !list_empty(&clp->async_copies)
> > +               || !list_empty(&clp->copy_notifies);
> >  }
> >
> >  static __be32 copy_impl_id(struct nfs4_client *clp,
> > @@ -5184,9 +5254,10 @@ static bool clients_still_reclaiming(struct nfsd_net *nn)
> >         struct nfs4_delegation *dp;
> >         struct nfs4_ol_stateid *stp;
> >         struct nfsd4_blocked_lock *nbl;
> > -       struct list_head *pos, *next, reaplist;
> > +       struct list_head *pos, *next, *ppos, *pnext, reaplist, cpntflist;
> >         time_t cutoff = get_seconds() - nn->nfsd4_lease;
> >         time_t t, new_timeo = nn->nfsd4_lease;
> > +       struct nfs4_cpntf_state *cps;
> >
> >         dprintk("NFSD: laundromat service - starting\n");
> >
> > @@ -5197,9 +5268,19 @@ static bool clients_still_reclaiming(struct nfsd_net *nn)
> >         dprintk("NFSD: end of grace period\n");
> >         nfsd4_end_grace(nn);
> >         INIT_LIST_HEAD(&reaplist);
> > +       INIT_LIST_HEAD(&cpntflist);
> > +
> >         spin_lock(&nn->client_lock);
> >         list_for_each_safe(pos, next, &nn->client_lru) {
> >                 clp = list_entry(pos, struct nfs4_client, cl_lru);
> > +               spin_lock(&clp->cpntf_lock);
> > +               list_for_each_safe(ppos, pnext, &clp->copy_notifies) {
> > +                       cps = list_entry(ppos, struct nfs4_cpntf_state, cpntf);
> > +                       if (!time_after((unsigned long)cps->cpntf_time,
> > +                                       (unsigned long)cutoff))
> > +                               list_move(&cps->cpntf, &cpntflist);
> > +               }
> > +               spin_unlock(&clp->cpntf_lock);
> >                 if (time_after((unsigned long)clp->cl_time, (unsigned long)cutoff)) {
> >                         t = clp->cl_time - cutoff;
> >                         new_timeo = min(new_timeo, t);
> > @@ -5213,6 +5294,11 @@ static bool clients_still_reclaiming(struct nfsd_net *nn)
> >                 list_add(&clp->cl_lru, &reaplist);
> >         }
> >         spin_unlock(&nn->client_lock);
> > +       list_for_each_safe(pos, next, &cpntflist) {
> > +               cps = list_entry(pos, struct nfs4_cpntf_state, cpntf);
> > +               list_del_init(&cps->cpntf);
> > +               free_cpntf_state(cps->net, cps);
> > +       }
> >         list_for_each_safe(pos, next, &reaplist) {
> >                 clp = list_entry(pos, struct nfs4_client, cl_lru);
> >                 dprintk("NFSD: purging unused client (clientid %08x)\n",
> > @@ -5576,6 +5662,16 @@ static __be32 nfsd4_validate_stateid(struct nfs4_client *cl, stateid_t *stateid)
> >
> >         return 0;
> >  }
> > +void free_cpntf_state(struct nfsd_net *nn, struct nfs4_cpntf_state *cps)
> > +{
> > +       spin_lock(&cps->cp_p_stid->sc_lock);
> > +       list_del(&cps->cp_list);
> > +       cps->cp_p_stid->sc_cp_list_size--;
> > +       spin_unlock(&cps->cp_p_stid->sc_lock);
> > +       _free_copy_cpntf_stateid(nn, &cps->cp_stateid.stid);
> > +       nfs4_put_stid(cps->cp_p_stid);
> > +       kfree(cps);
> > +}
> >
> >  /*
> >   * Checks for stateid operations
> > diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
> > index 25c7a45..16be2f4 100644
> > --- a/fs/nfsd/state.h
> > +++ b/fs/nfsd/state.h
> > @@ -56,6 +56,13 @@
> >         stateid_opaque_t        si_opaque;
> >  } stateid_t;
> >
> > +typedef struct {
> > +       stateid_t               stid;
> > +#define NFS4_COPY_STID 1
> > +#define NFS4_COPYNOTIFY_STID 2
> > +       unsigned char           sc_type;
> > +} copy_stateid_t;
> > +
> >  #define STATEID_FMT    "(%08x/%08x/%08x/%08x)"
> >  #define STATEID_VAL(s) \
> >         (s)->si_opaque.so_clid.cl_boot, \
> > @@ -96,6 +103,8 @@ struct nfs4_stid {
> >  #define NFS4_REVOKED_DELEG_STID 16
> >  #define NFS4_CLOSED_DELEG_STID 32
> >  #define NFS4_LAYOUT_STID 64
> > +       struct list_head        sc_cp_list;
> > +       size_t                  sc_cp_list_size;
> >         unsigned char           sc_type;
> >         stateid_t               sc_stateid;
> >         spinlock_t              sc_lock;
> > @@ -104,6 +113,18 @@ struct nfs4_stid {
> >         void                    (*sc_free)(struct nfs4_stid *);
> >  };
> >
> > +/* Keep a list of stateids issued by the COPY_NOTIFY, associate it with the
> > + * parent OPEN/LOCK/DELEG stateid.
> > + */
> > +struct nfs4_cpntf_state {
> > +       copy_stateid_t          cp_stateid;
> > +       struct list_head        cp_list;        /* per parent nfs4_stid */
> > +       struct nfs4_stid        *cp_p_stid;     /* pointer to parent */
> > +       struct list_head        cpntf;          /* list of copy_notifies */
> > +       time_t                  cpntf_time;     /* last time stateid used */
> > +       struct nfsd_net         *net;
> > +};
> > +
> >  /*
> >   * Represents a delegation stateid. The nfs4_client holds references to these
> >   * and they are put when it is being destroyed or when the delegation is
> > @@ -367,6 +388,8 @@ struct nfs4_client {
> >         struct net              *net;
> >         struct list_head        async_copies;   /* list of async copies */
> >         spinlock_t              async_lock;     /* lock for async copies */
> > +       struct list_head        copy_notifies;  /* list of copy notify stids */
> > +       spinlock_t              cpntf_lock;     /* lock for copy_notifies */
> >  };
> >
> >  /* struct nfs4_client_reset
> > @@ -623,8 +646,10 @@ __be32 nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,
> >                      struct nfs4_stid **s, struct nfsd_net *nn);
> >  struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct kmem_cache *slab,
> >                                   void (*sc_free)(struct nfs4_stid *));
> > -int nfs4_init_cp_state(struct nfsd_net *nn, struct nfsd4_copy *copy);
> > -void nfs4_free_cp_state(struct nfsd4_copy *copy);
> > +int nfs4_init_copy_state(struct nfsd_net *nn, struct nfsd4_copy *copy);
> > +void nfs4_free_copy_state(struct nfsd4_copy *copy);
> > +struct nfs4_cpntf_state *nfs4_alloc_init_cpntf_state(struct nfsd_net *nn,
> > +                       struct nfs4_stid *p_stid);
> >  void nfs4_unhash_stid(struct nfs4_stid *s);
> >  void nfs4_put_stid(struct nfs4_stid *s);
> >  void nfs4_inc_and_copy_stateid(stateid_t *dst, struct nfs4_stid *stid);
> > @@ -654,6 +679,7 @@ extern struct nfs4_client_reclaim *nfs4_client_to_reclaim(struct xdr_netobj name
> >  extern void nfs4_put_copy(struct nfsd4_copy *copy);
> >  extern struct nfsd4_copy *
> >  find_async_copy(struct nfs4_client *clp, stateid_t *staetid);
> > +extern void free_cpntf_state(struct nfsd_net *nn, struct nfs4_cpntf_state *cps);
> >  static inline void get_nfs4_file(struct nfs4_file *fi)
> >  {
> >         refcount_inc(&fi->fi_ref);
> > diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
> > index c497032..c6c8b43 100644
> > --- a/fs/nfsd/xdr4.h
> > +++ b/fs/nfsd/xdr4.h
> > @@ -543,7 +543,7 @@ struct nfsd4_copy {
> >         struct file             *file_src;
> >         struct file             *file_dst;
> >
> > -       stateid_t               cp_stateid;
> > +       copy_stateid_t          cp_stateid;
> >
> >         struct list_head        copies;
> >         struct task_struct      *copy_task;
> > --
> > 1.8.3.1
> >



[Index of Archives]     [Linux Filesystem Development]     [Linux USB Development]     [Linux Media Development]     [Video for Linux]     [Linux NILFS]     [Linux Audio Users]     [Yosemite Info]     [Linux SCSI]

  Powered by Linux