Re: [PATCH 11/20] rbd: introduce copyup state machine

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Tue, Jun 25, 2019 at 10:42 AM Ilya Dryomov <idryomov@xxxxxxxxx> wrote:
>
> Both write and copyup paths will get more complex with object map.
> Factor copyup code out into a separate state machine.
>
> While at it, take advantage of obj_req->osd_reqs list and issue empty
> and current snapc OSD requests together, one after another.
>
> Signed-off-by: Ilya Dryomov <idryomov@xxxxxxxxx>
> ---
>  drivers/block/rbd.c | 187 +++++++++++++++++++++++++++++---------------
>  1 file changed, 123 insertions(+), 64 deletions(-)
>
> diff --git a/drivers/block/rbd.c b/drivers/block/rbd.c
> index 2bafdee61dbd..34bd45d336e6 100644
> --- a/drivers/block/rbd.c
> +++ b/drivers/block/rbd.c
> @@ -226,6 +226,7 @@ enum obj_operation_type {
>
>  #define RBD_OBJ_FLAG_DELETION                  (1U << 0)
>  #define RBD_OBJ_FLAG_COPYUP_ENABLED            (1U << 1)
> +#define RBD_OBJ_FLAG_COPYUP_ZEROS              (1U << 2)
>
>  enum rbd_obj_read_state {
>         RBD_OBJ_READ_START = 1,
> @@ -261,9 +262,15 @@ enum rbd_obj_read_state {
>  enum rbd_obj_write_state {
>         RBD_OBJ_WRITE_START = 1,
>         RBD_OBJ_WRITE_OBJECT,
> -       RBD_OBJ_WRITE_READ_FROM_PARENT,
> -       RBD_OBJ_WRITE_COPYUP_EMPTY_SNAPC,
> -       RBD_OBJ_WRITE_COPYUP_OPS,
> +       __RBD_OBJ_WRITE_COPYUP,
> +       RBD_OBJ_WRITE_COPYUP,
> +};

Nit: should the state diagram above this enum be updated to match this change?

> +enum rbd_obj_copyup_state {
> +       RBD_OBJ_COPYUP_START = 1,
> +       RBD_OBJ_COPYUP_READ_PARENT,
> +       __RBD_OBJ_COPYUP_WRITE_OBJECT,
> +       RBD_OBJ_COPYUP_WRITE_OBJECT,
>  };
>
>  struct rbd_obj_request {
> @@ -286,12 +293,15 @@ struct rbd_obj_request {
>                         u32                     bvec_idx;
>                 };
>         };
> +
> +       enum rbd_obj_copyup_state copyup_state;
>         struct bio_vec          *copyup_bvecs;
>         u32                     copyup_bvec_count;
>
>         struct list_head        osd_reqs;       /* w/ r_unsafe_item */
>
>         struct mutex            state_mutex;
> +       struct pending_result   pending;
>         struct kref             kref;
>  };
>
> @@ -2568,8 +2578,8 @@ static bool is_zero_bvecs(struct bio_vec *bvecs, u32 bytes)
>
>  #define MODS_ONLY      U32_MAX
>
> -static int rbd_obj_issue_copyup_empty_snapc(struct rbd_obj_request *obj_req,
> -                                           u32 bytes)
> +static int rbd_obj_copyup_empty_snapc(struct rbd_obj_request *obj_req,
> +                                     u32 bytes)
>  {
>         struct ceph_osd_request *osd_req;
>         int ret;
> @@ -2595,7 +2605,8 @@ static int rbd_obj_issue_copyup_empty_snapc(struct rbd_obj_request *obj_req,
>         return 0;
>  }
>
> -static int rbd_obj_issue_copyup_ops(struct rbd_obj_request *obj_req, u32 bytes)
> +static int rbd_obj_copyup_current_snapc(struct rbd_obj_request *obj_req,
> +                                       u32 bytes)
>  {
>         struct ceph_osd_request *osd_req;
>         int num_ops = count_write_ops(obj_req);
> @@ -2628,33 +2639,6 @@ static int rbd_obj_issue_copyup_ops(struct rbd_obj_request *obj_req, u32 bytes)
>         return 0;
>  }
>
> -static int rbd_obj_issue_copyup(struct rbd_obj_request *obj_req, u32 bytes)
> -{
> -       /*
> -        * Only send non-zero copyup data to save some I/O and network
> -        * bandwidth -- zero copyup data is equivalent to the object not
> -        * existing.
> -        */
> -       if (is_zero_bvecs(obj_req->copyup_bvecs, bytes)) {
> -               dout("%s obj_req %p detected zeroes\n", __func__, obj_req);
> -               bytes = 0;
> -       }
> -
> -       if (obj_req->img_request->snapc->num_snaps && bytes > 0) {
> -               /*
> -                * Send a copyup request with an empty snapshot context to
> -                * deep-copyup the object through all existing snapshots.
> -                * A second request with the current snapshot context will be
> -                * sent for the actual modification.
> -                */
> -               obj_req->write_state = RBD_OBJ_WRITE_COPYUP_EMPTY_SNAPC;
> -               return rbd_obj_issue_copyup_empty_snapc(obj_req, bytes);
> -       }
> -
> -       obj_req->write_state = RBD_OBJ_WRITE_COPYUP_OPS;
> -       return rbd_obj_issue_copyup_ops(obj_req, bytes);
> -}
> -
>  static int setup_copyup_bvecs(struct rbd_obj_request *obj_req, u64 obj_overlap)
>  {
>         u32 i;
> @@ -2688,7 +2672,7 @@ static int setup_copyup_bvecs(struct rbd_obj_request *obj_req, u64 obj_overlap)
>   * target object up to the overlap point (if any) from the parent,
>   * so we can use it for a copyup.
>   */
> -static int rbd_obj_handle_write_guard(struct rbd_obj_request *obj_req)
> +static int rbd_obj_copyup_read_parent(struct rbd_obj_request *obj_req)
>  {
>         struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev;
>         int ret;
> @@ -2703,22 +2687,111 @@ static int rbd_obj_handle_write_guard(struct rbd_obj_request *obj_req)
>                  * request -- pass MODS_ONLY since the copyup isn't needed
>                  * anymore.
>                  */
> -               obj_req->write_state = RBD_OBJ_WRITE_COPYUP_OPS;
> -               return rbd_obj_issue_copyup_ops(obj_req, MODS_ONLY);
> +               return rbd_obj_copyup_current_snapc(obj_req, MODS_ONLY);
>         }
>
>         ret = setup_copyup_bvecs(obj_req, rbd_obj_img_extents_bytes(obj_req));
>         if (ret)
>                 return ret;
>
> -       obj_req->write_state = RBD_OBJ_WRITE_READ_FROM_PARENT;
>         return rbd_obj_read_from_parent(obj_req);
>  }
>
> +static void rbd_obj_copyup_write_object(struct rbd_obj_request *obj_req)
> +{
> +       u32 bytes = rbd_obj_img_extents_bytes(obj_req);
> +       int ret;
> +
> +       rbd_assert(!obj_req->pending.result && !obj_req->pending.num_pending);
> +
> +       /*
> +        * Only send non-zero copyup data to save some I/O and network
> +        * bandwidth -- zero copyup data is equivalent to the object not
> +        * existing.
> +        */
> +       if (obj_req->flags & RBD_OBJ_FLAG_COPYUP_ZEROS)
> +               bytes = 0;
> +
> +       if (obj_req->img_request->snapc->num_snaps && bytes > 0) {
> +               /*
> +                * Send a copyup request with an empty snapshot context to
> +                * deep-copyup the object through all existing snapshots.
> +                * A second request with the current snapshot context will be
> +                * sent for the actual modification.
> +                */
> +               ret = rbd_obj_copyup_empty_snapc(obj_req, bytes);
> +               if (ret) {
> +                       obj_req->pending.result = ret;
> +                       return;
> +               }
> +
> +               obj_req->pending.num_pending++;
> +               bytes = MODS_ONLY;
> +       }
> +
> +       ret = rbd_obj_copyup_current_snapc(obj_req, bytes);
> +       if (ret) {
> +               obj_req->pending.result = ret;
> +               return;
> +       }
> +
> +       obj_req->pending.num_pending++;
> +}
> +
> +static bool rbd_obj_advance_copyup(struct rbd_obj_request *obj_req, int *result)
> +{
> +       int ret;
> +
> +again:
> +       switch (obj_req->copyup_state) {
> +       case RBD_OBJ_COPYUP_START:
> +               rbd_assert(!*result);
> +
> +               ret = rbd_obj_copyup_read_parent(obj_req);
> +               if (ret) {
> +                       *result = ret;
> +                       return true;
> +               }
> +               if (obj_req->num_img_extents)
> +                       obj_req->copyup_state = RBD_OBJ_COPYUP_READ_PARENT;
> +               else
> +                       obj_req->copyup_state = RBD_OBJ_COPYUP_WRITE_OBJECT;
> +               return false;
> +       case RBD_OBJ_COPYUP_READ_PARENT:
> +               if (*result)
> +                       return true;
> +
> +               if (is_zero_bvecs(obj_req->copyup_bvecs,
> +                                 rbd_obj_img_extents_bytes(obj_req))) {
> +                       dout("%s %p detected zeros\n", __func__, obj_req);
> +                       obj_req->flags |= RBD_OBJ_FLAG_COPYUP_ZEROS;
> +               }
> +
> +               rbd_obj_copyup_write_object(obj_req);
> +               if (!obj_req->pending.num_pending) {
> +                       *result = obj_req->pending.result;
> +                       obj_req->copyup_state = RBD_OBJ_COPYUP_WRITE_OBJECT;
> +                       goto again;
> +               }
> +               obj_req->copyup_state = __RBD_OBJ_COPYUP_WRITE_OBJECT;
> +               return false;
> +       case __RBD_OBJ_COPYUP_WRITE_OBJECT:
> +               if (!pending_result_dec(&obj_req->pending, result))
> +                       return false;
> +               /* fall through */
> +       case RBD_OBJ_COPYUP_WRITE_OBJECT:
> +               return true;
> +       default:
> +               BUG();
> +       }
> +}
> +
>  static bool rbd_obj_advance_write(struct rbd_obj_request *obj_req, int *result)
>  {
> +       struct rbd_device *rbd_dev = obj_req->img_request->rbd_dev;
>         int ret;
>
> +again:
>         switch (obj_req->write_state) {
>         case RBD_OBJ_WRITE_START:
>                 rbd_assert(!*result);
> @@ -2733,12 +2806,10 @@ static bool rbd_obj_advance_write(struct rbd_obj_request *obj_req, int *result)
>         case RBD_OBJ_WRITE_OBJECT:
>                 if (*result == -ENOENT) {
>                         if (obj_req->flags & RBD_OBJ_FLAG_COPYUP_ENABLED) {
> -                               ret = rbd_obj_handle_write_guard(obj_req);
> -                               if (ret) {
> -                                       *result = ret;
> -                                       return true;
> -                               }
> -                               return false;
> +                               *result = 0;
> +                               obj_req->copyup_state = RBD_OBJ_COPYUP_START;
> +                               obj_req->write_state = __RBD_OBJ_WRITE_COPYUP;
> +                               goto again;
>                         }
>                         /*
>                          * On a non-existent object:
> @@ -2747,31 +2818,19 @@ static bool rbd_obj_advance_write(struct rbd_obj_request *obj_req, int *result)
>                         if (obj_req->flags & RBD_OBJ_FLAG_DELETION)
>                                 *result = 0;
>                 }
> -               /* fall through */
> -       case RBD_OBJ_WRITE_COPYUP_OPS:
> -               return true;
> -       case RBD_OBJ_WRITE_READ_FROM_PARENT:
>                 if (*result)
>                         return true;
>
> -               ret = rbd_obj_issue_copyup(obj_req,
> -                                          rbd_obj_img_extents_bytes(obj_req));
> -               if (ret) {
> -                       *result = ret;
> -                       return true;
> -               }
> -               return false;
> -       case RBD_OBJ_WRITE_COPYUP_EMPTY_SNAPC:
> +               obj_req->write_state = RBD_OBJ_WRITE_COPYUP;
> +               goto again;
> +       case __RBD_OBJ_WRITE_COPYUP:
> +               if (!rbd_obj_advance_copyup(obj_req, result))
> +                       return false;
> +               /* fall through */
> +       case RBD_OBJ_WRITE_COPYUP:
>                 if (*result)
> -                       return true;
> -
> -               obj_req->write_state = RBD_OBJ_WRITE_COPYUP_OPS;
> -               ret = rbd_obj_issue_copyup_ops(obj_req, MODS_ONLY);
> -               if (ret) {
> -                       *result = ret;
> -                       return true;
> -               }
> -               return false;
> +                       rbd_warn(rbd_dev, "copyup failed: %d", *result);
> +               return true;
>         default:
>                 BUG();
>         }
> --
> 2.19.2
>


-- 
Jason



[Index of Archives]     [CEPH Users]     [Ceph Large]     [Ceph Dev]     [Information on CEPH]     [Linux BTRFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux