Re: [PATCH 01/11] libceph: support prefix and suffix in bio_iter

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Mon, Dec 3, 2018 at 1:51 PM Dongsheng Yang
<dongsheng.yang@xxxxxxxxxxxx> wrote:
>
> When we want to add some prefix to the bio data, such as journal header,
> we don't want to copy it from bio into a new larger memory. Instead, we need
> a prefix page and suffix page to store the header and footer in bio_iter.
>
> Signed-off-by: Dongsheng Yang <dongsheng.yang@xxxxxxxxxxxx>
> ---
>  include/linux/ceph/messenger.h |  9 ++++
>  net/ceph/messenger.c           | 96 ++++++++++++++++++++++++++++++++----------
>  2 files changed, 83 insertions(+), 22 deletions(-)
>
> diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h
> index 800a212..0da6a4b 100644
> --- a/include/linux/ceph/messenger.h
> +++ b/include/linux/ceph/messenger.h
> @@ -87,6 +87,15 @@ enum ceph_msg_data_type {
>  struct ceph_bio_iter {
>         struct bio *bio;
>         struct bvec_iter iter;
> +       size_t bio_len;
> +
> +       struct page *prefix_page;
> +       unsigned int prefix_offset;
> +       unsigned int prefix_len;
> +
> +       struct page *suffix_page;
> +       unsigned int suffix_offset;
> +       unsigned int suffix_len;
>  };
>
>  #define __ceph_bio_iter_advance_step(it, n, STEP) do {                       \
> diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
> index 57fcc6b..64c70c5 100644
> --- a/net/ceph/messenger.c
> +++ b/net/ceph/messenger.c
> @@ -828,23 +828,46 @@ static void ceph_msg_data_bio_cursor_init(struct ceph_msg_data_cursor *cursor,
>
>         cursor->resid = min_t(size_t, length, data->bio_length);
>         *it = data->bio_pos;
> -       if (cursor->resid < it->iter.bi_size)
> -               it->iter.bi_size = cursor->resid;
>
> -       BUG_ON(cursor->resid < bio_iter_len(it->bio, it->iter));
> -       cursor->last_piece = cursor->resid == bio_iter_len(it->bio, it->iter);
> +       if (cursor->resid < it->iter.bi_size + it->prefix_len + it->suffix_len)
> +               it->iter.bi_size = cursor->resid - it->prefix_len - it->suffix_len;
> +
> +       it->bio_len = cursor->resid - it->prefix_len - it->suffix_len;
> +       if (it->suffix_len) {
> +               cursor->last_piece = cursor->resid == it->suffix_len;
> +       } else if (it->bio_len) {
> +               cursor->last_piece = cursor->resid == bio_iter_len(it->bio, it->iter);
> +       } else if (it->prefix_len) {
> +               cursor->last_piece = cursor->resid == it->prefix_len;
> +       } else {
> +               BUG();
> +       }
>  }
>
>  static struct page *ceph_msg_data_bio_next(struct ceph_msg_data_cursor *cursor,
>                                                 size_t *page_offset,
>                                                 size_t *length)
>  {
> -       struct bio_vec bv = bio_iter_iovec(cursor->bio_iter.bio,
> -                                          cursor->bio_iter.iter);
> +       struct ceph_bio_iter *it = &cursor->bio_iter;
>
> -       *page_offset = bv.bv_offset;
> -       *length = bv.bv_len;
> -       return bv.bv_page;
> +       if (it->prefix_len) {
> +               *page_offset = it->prefix_offset;
> +               *length = it->prefix_len;
> +               return it->prefix_page;
> +       } else if (it->bio_len) {
> +               struct bio_vec bv = bio_iter_iovec(cursor->bio_iter.bio,
> +                                                  cursor->bio_iter.iter);
> +
> +               *page_offset = bv.bv_offset;
> +               *length = bv.bv_len;
> +               return bv.bv_page;
> +       } else {
> +               BUG_ON(!it->suffix_len);
> +
> +               *page_offset = it->suffix_offset;
> +               *length = it->suffix_len;
> +               return it->suffix_page;
> +       }
>  }
>
>  static bool ceph_msg_data_bio_advance(struct ceph_msg_data_cursor *cursor,
> @@ -852,29 +875,58 @@ static bool ceph_msg_data_bio_advance(struct ceph_msg_data_cursor *cursor,
>  {
>         struct ceph_bio_iter *it = &cursor->bio_iter;
>
> +       if (!bytes)
> +               return false;
> +
>         BUG_ON(bytes > cursor->resid);
> -       BUG_ON(bytes > bio_iter_len(it->bio, it->iter));
>         cursor->resid -= bytes;
> -       bio_advance_iter(it->bio, &it->iter, bytes);
> +       if (it->prefix_len) {
> +               BUG_ON(bytes > it->prefix_len);
> +               it->prefix_offset += bytes;
> +               it->prefix_len -= bytes;
> +               if (it->prefix_len)
> +                       return false;
> +       } else if (it->bio_len) {
> +               BUG_ON(bytes > bio_iter_len(it->bio, it->iter));
> +               bio_advance_iter(it->bio, &it->iter, bytes);
> +               it->bio_len -= bytes;
> +               if (!cursor->resid) {
> +                       BUG_ON(!cursor->last_piece);
> +                       return false;   /* no more data */
> +               }
> +               if (it->iter.bi_size && it->iter.bi_bvec_done)
> +                       return false;   /* more bytes to process in this segment */
> +
> +               if (it->bio_len && !it->iter.bi_size) {
> +                       it->bio = it->bio->bi_next;
> +                       it->iter = it->bio->bi_iter;
> +                       if (cursor->resid < it->iter.bi_size)
> +                               it->iter.bi_size = cursor->resid;
> +               }
> +       } else {
> +               BUG_ON(!it->suffix_len);
> +               it->suffix_offset += bytes;
> +               it->suffix_len -= bytes;
> +               if (it->suffix_len)
> +                       return false;
> +       }
>
>         if (!cursor->resid) {
>                 BUG_ON(!cursor->last_piece);
>                 return false;   /* no more data */
>         }
>
> -       if (!bytes || (it->iter.bi_size && it->iter.bi_bvec_done))
> -               return false;   /* more bytes to process in this segment */
> -
> -       if (!it->iter.bi_size) {
> -               it->bio = it->bio->bi_next;
> -               it->iter = it->bio->bi_iter;
> -               if (cursor->resid < it->iter.bi_size)
> -                       it->iter.bi_size = cursor->resid;
> +       BUG_ON(cursor->last_piece);
> +       if (it->suffix_len) {
> +               cursor->last_piece = cursor->resid == it->suffix_len;
> +       } else if (it->bio_len) {
> +               cursor->last_piece = cursor->resid == bio_iter_len(it->bio, it->iter);
> +       } else if (it->prefix_len) {
> +               cursor->last_piece = cursor->resid == it->prefix_len;
> +       } else {
> +               BUG();
>         }
>
> -       BUG_ON(cursor->last_piece);
> -       BUG_ON(cursor->resid < bio_iter_len(it->bio, it->iter));
> -       cursor->last_piece = cursor->resid == bio_iter_len(it->bio, it->iter);
>         return true;
>  }
>  #endif /* CONFIG_BLOCK */

Have you looked at message data items (ceph_msg_data) for this?
A message can have multiple data items, each of different type (BIO,
PAGES, etc) and with its own iterator.  The messenger will exhaust them
in order, so you can create a message with three data items: prefix
(type PAGES), data (type BIO), suffix (type PAGES).

If the data item framework isn't flexible enough, we should work on
extending it.  Special casing the bio iterator is the wrong thing to
do.

Thanks,

                Ilya



[Index of Archives]     [CEPH Users]     [Ceph Large]     [Information on CEPH]     [Linux BTRFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux