Re: [PATCH v5 5/9] NFSD add COPY_NOTIFY operation

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Thu, Aug 8, 2019 at 4:18 PM Olga Kornievskaia
<olga.kornievskaia@xxxxxxxxx> wrote:
>
> Introducing the COPY_NOTIFY operation.
>
> Create a new unique stateid that will keep track of the copy
> state and the upcoming READs that will use that stateid. Keep
> it in the list associated with parent stateid. When putting
> a reference on a stateid, check if there are associated copy
> notify stateids, if so, account for it and remove them on
> last reference.
>
> Laundromat thread will traverse globally stored copy notify
> stateid and notice if any haven't been referenced in the
> lease period, if so, it'll remove them.
>
> Return single netaddr to advertise to the copy.
>
> Signed-off-by: Andy Adamson <andros@xxxxxxxxxx>
> Signed-off-by: Olga Kornievskaia <kolga@xxxxxxxxxx>
> ---
>  fs/nfsd/nfs4proc.c  |  47 ++++++++++++++++++--
>  fs/nfsd/nfs4state.c | 122 ++++++++++++++++++++++++++++++++++++++++++++++------
>  fs/nfsd/state.h     |  30 ++++++++++++-
>  fs/nfsd/xdr4.h      |   2 +-
>  4 files changed, 182 insertions(+), 19 deletions(-)
>
> diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
> index 3a2805d..47f6b52 100644
> --- a/fs/nfsd/nfs4proc.c
> +++ b/fs/nfsd/nfs4proc.c
> @@ -37,6 +37,7 @@
>  #include <linux/falloc.h>
>  #include <linux/slab.h>
>  #include <linux/kthread.h>
> +#include <linux/sunrpc/addr.h>
>
>  #include "idmap.h"
>  #include "cache.h"
> @@ -1229,7 +1230,7 @@ static void dup_copy_fields(struct nfsd4_copy *src, struct nfsd4_copy *dst)
>
>  static void cleanup_async_copy(struct nfsd4_copy *copy)
>  {
> -       nfs4_free_cp_state(copy);
> +       nfs4_free_copy_state(copy);
>         fput(copy->file_dst);
>         fput(copy->file_src);
>         spin_lock(&copy->cp_clp->async_lock);
> @@ -1283,7 +1284,7 @@ static int nfsd4_do_async_copy(void *data)
>                 async_copy = kzalloc(sizeof(struct nfsd4_copy), GFP_KERNEL);
>                 if (!async_copy)
>                         goto out;
> -               if (!nfs4_init_cp_state(nn, copy)) {
> +               if (!nfs4_init_copy_state(nn, copy)) {
>                         kfree(async_copy);
>                         goto out;
>                 }
> @@ -1350,7 +1351,47 @@ struct nfsd4_copy *
>  nfsd4_copy_notify(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
>                   union nfsd4_op_u *u)
>  {
> -       return nfserr_notsupp;
> +       struct nfsd4_copy_notify *cn = &u->copy_notify;
> +       __be32 status;
> +       struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id);
> +       struct nfs4_stid *stid;
> +       struct nfs4_cpntf_state *cps;
> +       struct nfs4_client *clp = cstate->clp;
> +
> +       status = nfs4_preprocess_stateid_op(rqstp, cstate, &cstate->current_fh,
> +                                       &cn->cpn_src_stateid, RD_STATE, NULL,
> +                                       NULL, &stid);
> +       if (status)
> +               return status;
> +
> +       cn->cpn_sec = nn->nfsd4_lease;
> +       cn->cpn_nsec = 0;
> +
> +       status = nfserrno(-ENOMEM);
> +       cps = nfs4_alloc_init_cpntf_state(nn, stid);
> +       if (!cps)
> +               goto out_err;
> +       memcpy(&cn->cpn_cnr_stateid, &cps->cp_stateid.stid, sizeof(stateid_t));
> +
> +       /* For now, only return one server address in cpn_src, the
> +        * address used by the client to connect to this server.
> +        */
> +       cn->cpn_src.nl4_type = NL4_NETADDR;
> +       status = nfsd4_set_netaddr((struct sockaddr *)&rqstp->rq_daddr,
> +                                &cn->cpn_src.u.nl4_addr);
> +       WARN_ON_ONCE(status);
> +       if (status) {
> +               free_cpntf_state(nn, cps);
> +               goto out;
> +       }
> +       spin_lock(&clp->cpntf_lock);
> +       list_add(&cps->cpntf, &clp->copy_notifies);
> +       spin_unlock(&clp->cpntf_lock);
> +out:
> +       return status;
> +out_err:
> +       nfs4_put_stid(stid);
> +       goto out;
>  }
>
>  static __be32
> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> index 78926c6..bd962f1 100644
> --- a/fs/nfsd/nfs4state.c
> +++ b/fs/nfsd/nfs4state.c
> @@ -720,6 +720,7 @@ struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct kmem_cache *sla
>         /* Will be incremented before return to client: */
>         refcount_set(&stid->sc_count, 1);
>         spin_lock_init(&stid->sc_lock);
> +       INIT_LIST_HEAD(&stid->sc_cp_list);
>
>         /*
>          * It shouldn't be a problem to reuse an opaque stateid value.
> @@ -739,33 +740,89 @@ struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct kmem_cache *sla
>  /*
>   * Create a unique stateid_t to represent each COPY.
>   */
> -int nfs4_init_cp_state(struct nfsd_net *nn, struct nfsd4_copy *copy)
> +static int nfs4_init_cp_state(struct nfsd_net *nn, copy_stateid_t *stid,
> +                             unsigned char sc_type)
>  {
>         int new_id;
>
> +       stid->stid.si_opaque.so_clid.cl_boot = nn->boot_time;
> +       stid->stid.si_opaque.so_clid.cl_id = nn->s2s_cp_cl_id;
> +       stid->sc_type = sc_type;
> +
>         idr_preload(GFP_KERNEL);
>         spin_lock(&nn->s2s_cp_lock);
> -       new_id = idr_alloc_cyclic(&nn->s2s_cp_stateids, copy, 0, 0, GFP_NOWAIT);
> +       new_id = idr_alloc_cyclic(&nn->s2s_cp_stateids, stid, 0, 0, GFP_NOWAIT);
> +       stid->stid.si_opaque.so_id = new_id;
>         spin_unlock(&nn->s2s_cp_lock);
>         idr_preload_end();
>         if (new_id < 0)
>                 return 0;
> -       copy->cp_stateid.si_opaque.so_id = new_id;
> -       copy->cp_stateid.si_opaque.so_clid.cl_boot = nn->boot_time;
> -       copy->cp_stateid.si_opaque.so_clid.cl_id = nn->s2s_cp_cl_id;
>         return 1;
>  }
>
> -void nfs4_free_cp_state(struct nfsd4_copy *copy)
> +int nfs4_init_copy_state(struct nfsd_net *nn, struct nfsd4_copy *copy)
>  {
> -       struct nfsd_net *nn;
> +       return nfs4_init_cp_state(nn, &copy->cp_stateid, NFS4_COPY_STID);
> +}
>
> -       nn = net_generic(copy->cp_clp->net, nfsd_net_id);
> +struct nfs4_cpntf_state *nfs4_alloc_init_cpntf_state(struct nfsd_net *nn,
> +                                                    struct nfs4_stid *p_stid)
> +{
> +       struct nfs4_cpntf_state *cps;
> +
> +       cps = kzalloc(sizeof(struct nfs4_cpntf_state), GFP_KERNEL);
> +       if (!cps)
> +               return NULL;
> +       cps->cp_p_stid = p_stid;
> +       cps->cpntf_time = get_seconds();
> +       cps->net = nn;
> +       if (!nfs4_init_cp_state(nn, &cps->cp_stateid, NFS4_COPYNOTIFY_STID))
> +               goto out_free;
> +       spin_lock(&p_stid->sc_lock);
> +       list_add(&cps->cp_list, &p_stid->sc_cp_list);
> +       p_stid->sc_cp_list_size++;
> +       spin_unlock(&p_stid->sc_lock);
> +       return cps;
> +out_free:
> +       kfree(cps);
> +       return NULL;
> +}
> +void _free_copy_cpntf_stateid(struct nfsd_net *nn, stateid_t *stid)
> +{
>         spin_lock(&nn->s2s_cp_lock);
> -       idr_remove(&nn->s2s_cp_stateids, copy->cp_stateid.si_opaque.so_id);
> +       idr_remove(&nn->s2s_cp_stateids, stid->si_opaque.so_id);
>         spin_unlock(&nn->s2s_cp_lock);
>  }
>
> +void nfs4_free_copy_state(struct nfsd4_copy *copy)
> +{
> +       struct nfsd_net *nn;
> +
> +       nn = net_generic(copy->cp_clp->net, nfsd_net_id);
> +       _free_copy_cpntf_stateid(nn, &copy->cp_stateid.stid);
> +}
> +
> +static void nfs4_free_cpntf_statelist(struct net *net, struct nfs4_stid *stid)
> +{
> +       struct nfs4_cpntf_state *cps;
> +       struct nfsd_net *nn;
> +
> +       nn = net_generic(net, nfsd_net_id);
> +       spin_lock(&stid->sc_lock);
> +       while (!list_empty(&stid->sc_cp_list)) {
> +               cps = list_first_entry(&stid->sc_cp_list,
> +                                      struct nfs4_cpntf_state, cp_list);
> +               stid->sc_cp_list_size--;
> +               list_del(&cps->cp_list);
> +               _free_copy_cpntf_stateid(nn, &cps->cp_stateid.stid);
> +               spin_lock(&stid->sc_client->cpntf_lock);
> +               list_del(&cps->cpntf);
> +               spin_unlock(&stid->sc_client->cpntf_lock);
> +               kfree(cps);
> +       }
> +       spin_unlock(&stid->sc_lock);
> +}
> +
>  static struct nfs4_ol_stateid * nfs4_alloc_open_stateid(struct nfs4_client *clp)
>  {
>         struct nfs4_stid *stid;
> @@ -905,15 +962,25 @@ static void block_delegations(struct knfsd_fh *fh)
>  {
>         struct nfs4_file *fp = s->sc_file;
>         struct nfs4_client *clp = s->sc_client;
> +       size_t size = 0;
> +
> +       spin_lock(&s->sc_lock);
> +       size = s->sc_cp_list_size;
> +       spin_unlock(&s->sc_lock);
>
>         might_lock(&clp->cl_lock);
>
>         if (!refcount_dec_and_lock(&s->sc_count, &clp->cl_lock)) {
> -               wake_up_all(&close_wq);
> -               return;
> +               if (!refcount_sub_and_test_checked(s->sc_cp_list_size,
> +                               &s->sc_count)) {
> +                       refcount_add_checked(s->sc_cp_list_size, &s->sc_count);
> +                       wake_up_all(&close_wq);
> +                       return;
> +               }
>         }
>         idr_remove(&clp->cl_stateids, s->sc_stateid.si_opaque.so_id);
>         spin_unlock(&clp->cl_lock);
> +       nfs4_free_cpntf_statelist(clp->net, s);
>         s->sc_free(s);
>         if (fp)
>                 put_nfs4_file(fp);

While this passes my testing, in theory this allows for the race that
we get the copy notify size but then offload_cancel arrive and change
the value. Then refcount_sub_and test_check would have an incorrect
value (can subtract larger than an actual reference count). I have no
solution for that as there is no refcount_sub_and_lock() that will
allow to decrement by a multiple under a lock. Thoughts?

> @@ -1881,6 +1948,8 @@ static struct nfs4_client *alloc_client(struct xdr_netobj name)
>  #endif
>         INIT_LIST_HEAD(&clp->async_copies);
>         spin_lock_init(&clp->async_lock);
> +       INIT_LIST_HEAD(&clp->copy_notifies);
> +       spin_lock_init(&clp->cpntf_lock);
>         spin_lock_init(&clp->cl_lock);
>         rpc_init_wait_queue(&clp->cl_cb_waitq, "Backchannel slot table");
>         return clp;
> @@ -2909,7 +2978,8 @@ static bool client_has_state(struct nfs4_client *clp)
>  #endif
>                 || !list_empty(&clp->cl_delegations)
>                 || !list_empty(&clp->cl_sessions)
> -               || !list_empty(&clp->async_copies);
> +               || !list_empty(&clp->async_copies)
> +               || !list_empty(&clp->copy_notifies);
>  }
>
>  static __be32 copy_impl_id(struct nfs4_client *clp,
> @@ -5184,9 +5254,10 @@ static bool clients_still_reclaiming(struct nfsd_net *nn)
>         struct nfs4_delegation *dp;
>         struct nfs4_ol_stateid *stp;
>         struct nfsd4_blocked_lock *nbl;
> -       struct list_head *pos, *next, reaplist;
> +       struct list_head *pos, *next, *ppos, *pnext, reaplist, cpntflist;
>         time_t cutoff = get_seconds() - nn->nfsd4_lease;
>         time_t t, new_timeo = nn->nfsd4_lease;
> +       struct nfs4_cpntf_state *cps;
>
>         dprintk("NFSD: laundromat service - starting\n");
>
> @@ -5197,9 +5268,19 @@ static bool clients_still_reclaiming(struct nfsd_net *nn)
>         dprintk("NFSD: end of grace period\n");
>         nfsd4_end_grace(nn);
>         INIT_LIST_HEAD(&reaplist);
> +       INIT_LIST_HEAD(&cpntflist);
> +
>         spin_lock(&nn->client_lock);
>         list_for_each_safe(pos, next, &nn->client_lru) {
>                 clp = list_entry(pos, struct nfs4_client, cl_lru);
> +               spin_lock(&clp->cpntf_lock);
> +               list_for_each_safe(ppos, pnext, &clp->copy_notifies) {
> +                       cps = list_entry(ppos, struct nfs4_cpntf_state, cpntf);
> +                       if (!time_after((unsigned long)cps->cpntf_time,
> +                                       (unsigned long)cutoff))
> +                               list_move(&cps->cpntf, &cpntflist);
> +               }
> +               spin_unlock(&clp->cpntf_lock);
>                 if (time_after((unsigned long)clp->cl_time, (unsigned long)cutoff)) {
>                         t = clp->cl_time - cutoff;
>                         new_timeo = min(new_timeo, t);
> @@ -5213,6 +5294,11 @@ static bool clients_still_reclaiming(struct nfsd_net *nn)
>                 list_add(&clp->cl_lru, &reaplist);
>         }
>         spin_unlock(&nn->client_lock);
> +       list_for_each_safe(pos, next, &cpntflist) {
> +               cps = list_entry(pos, struct nfs4_cpntf_state, cpntf);
> +               list_del_init(&cps->cpntf);
> +               free_cpntf_state(cps->net, cps);
> +       }
>         list_for_each_safe(pos, next, &reaplist) {
>                 clp = list_entry(pos, struct nfs4_client, cl_lru);
>                 dprintk("NFSD: purging unused client (clientid %08x)\n",
> @@ -5576,6 +5662,16 @@ static __be32 nfsd4_validate_stateid(struct nfs4_client *cl, stateid_t *stateid)
>
>         return 0;
>  }
> +void free_cpntf_state(struct nfsd_net *nn, struct nfs4_cpntf_state *cps)
> +{
> +       spin_lock(&cps->cp_p_stid->sc_lock);
> +       list_del(&cps->cp_list);
> +       cps->cp_p_stid->sc_cp_list_size--;
> +       spin_unlock(&cps->cp_p_stid->sc_lock);
> +       _free_copy_cpntf_stateid(nn, &cps->cp_stateid.stid);
> +       nfs4_put_stid(cps->cp_p_stid);
> +       kfree(cps);
> +}
>
>  /*
>   * Checks for stateid operations
> diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
> index 25c7a45..16be2f4 100644
> --- a/fs/nfsd/state.h
> +++ b/fs/nfsd/state.h
> @@ -56,6 +56,13 @@
>         stateid_opaque_t        si_opaque;
>  } stateid_t;
>
> +typedef struct {
> +       stateid_t               stid;
> +#define NFS4_COPY_STID 1
> +#define NFS4_COPYNOTIFY_STID 2
> +       unsigned char           sc_type;
> +} copy_stateid_t;
> +
>  #define STATEID_FMT    "(%08x/%08x/%08x/%08x)"
>  #define STATEID_VAL(s) \
>         (s)->si_opaque.so_clid.cl_boot, \
> @@ -96,6 +103,8 @@ struct nfs4_stid {
>  #define NFS4_REVOKED_DELEG_STID 16
>  #define NFS4_CLOSED_DELEG_STID 32
>  #define NFS4_LAYOUT_STID 64
> +       struct list_head        sc_cp_list;
> +       size_t                  sc_cp_list_size;
>         unsigned char           sc_type;
>         stateid_t               sc_stateid;
>         spinlock_t              sc_lock;
> @@ -104,6 +113,18 @@ struct nfs4_stid {
>         void                    (*sc_free)(struct nfs4_stid *);
>  };
>
> +/* Keep a list of stateids issued by the COPY_NOTIFY, associate it with the
> + * parent OPEN/LOCK/DELEG stateid.
> + */
> +struct nfs4_cpntf_state {
> +       copy_stateid_t          cp_stateid;
> +       struct list_head        cp_list;        /* per parent nfs4_stid */
> +       struct nfs4_stid        *cp_p_stid;     /* pointer to parent */
> +       struct list_head        cpntf;          /* list of copy_notifies */
> +       time_t                  cpntf_time;     /* last time stateid used */
> +       struct nfsd_net         *net;
> +};
> +
>  /*
>   * Represents a delegation stateid. The nfs4_client holds references to these
>   * and they are put when it is being destroyed or when the delegation is
> @@ -367,6 +388,8 @@ struct nfs4_client {
>         struct net              *net;
>         struct list_head        async_copies;   /* list of async copies */
>         spinlock_t              async_lock;     /* lock for async copies */
> +       struct list_head        copy_notifies;  /* list of copy notify stids */
> +       spinlock_t              cpntf_lock;     /* lock for copy_notifies */
>  };
>
>  /* struct nfs4_client_reset
> @@ -623,8 +646,10 @@ __be32 nfsd4_lookup_stateid(struct nfsd4_compound_state *cstate,
>                      struct nfs4_stid **s, struct nfsd_net *nn);
>  struct nfs4_stid *nfs4_alloc_stid(struct nfs4_client *cl, struct kmem_cache *slab,
>                                   void (*sc_free)(struct nfs4_stid *));
> -int nfs4_init_cp_state(struct nfsd_net *nn, struct nfsd4_copy *copy);
> -void nfs4_free_cp_state(struct nfsd4_copy *copy);
> +int nfs4_init_copy_state(struct nfsd_net *nn, struct nfsd4_copy *copy);
> +void nfs4_free_copy_state(struct nfsd4_copy *copy);
> +struct nfs4_cpntf_state *nfs4_alloc_init_cpntf_state(struct nfsd_net *nn,
> +                       struct nfs4_stid *p_stid);
>  void nfs4_unhash_stid(struct nfs4_stid *s);
>  void nfs4_put_stid(struct nfs4_stid *s);
>  void nfs4_inc_and_copy_stateid(stateid_t *dst, struct nfs4_stid *stid);
> @@ -654,6 +679,7 @@ extern struct nfs4_client_reclaim *nfs4_client_to_reclaim(struct xdr_netobj name
>  extern void nfs4_put_copy(struct nfsd4_copy *copy);
>  extern struct nfsd4_copy *
>  find_async_copy(struct nfs4_client *clp, stateid_t *staetid);
> +extern void free_cpntf_state(struct nfsd_net *nn, struct nfs4_cpntf_state *cps);
>  static inline void get_nfs4_file(struct nfs4_file *fi)
>  {
>         refcount_inc(&fi->fi_ref);
> diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
> index c497032..c6c8b43 100644
> --- a/fs/nfsd/xdr4.h
> +++ b/fs/nfsd/xdr4.h
> @@ -543,7 +543,7 @@ struct nfsd4_copy {
>         struct file             *file_src;
>         struct file             *file_dst;
>
> -       stateid_t               cp_stateid;
> +       copy_stateid_t          cp_stateid;
>
>         struct list_head        copies;
>         struct task_struct      *copy_task;
> --
> 1.8.3.1
>



[Index of Archives]     [Linux Filesystem Development]     [Linux USB Development]     [Linux Media Development]     [Video for Linux]     [Linux NILFS]     [Linux Audio Users]     [Yosemite Info]     [Linux SCSI]

  Powered by Linux