Re: [PATCH v5 3/3] libceph: just wait for more data to be available on the socket

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Tue, 2024-01-23 at 21:12 +0800, xiubli@xxxxxxxxxx wrote:
> From: Xiubo Li <xiubli@xxxxxxxxxx>
> 
> A short read may occur while reading the message footer from the
> socket.  Later, when the socket is ready for another read, the
> messenger shoudl invoke all read_partial* handlers, except the
> read_partial_sparse_msg_data().  The contract between the messenger
> and these handlers is that the handlers should bail if the area
> of the message is responsible for is already processed.  So,
> in this case, it's expected that read_sparse_msg_data() would bail,
> allowing the messenger to invoke read_partial() for the footer and
> pick up where it left off.
> 
> However read_partial_sparse_msg_data() violates that contract and
> ends up calling into the state machine in the OSD client. The
> sparse-read state machine just assumes that it's a new op and
> interprets some piece of the footer as the sparse-read extents/data
> and then returns bogus extents/data length, etc.
> 
> This will just reuse the 'total_resid' to determine whether should
> the read_partial_sparse_msg_data() bail out or not. Because once
> it reaches to zero that means all the extents and data have been
> successfully received in last read, else it could break out when
> partially reading any of the extents and data. And then the
> osd_sparse_read() could continue where it left off.
> 

Thanks for the detailed description. That really helps!

> URL: https://tracker.ceph.com/issues/63586
> Signed-off-by: Xiubo Li <xiubli@xxxxxxxxxx>
> ---
>  include/linux/ceph/messenger.h |  2 +-
>  net/ceph/messenger_v1.c        | 25 +++++++++++++------------
>  net/ceph/messenger_v2.c        |  4 ++--
>  net/ceph/osd_client.c          |  9 +++------
>  4 files changed, 19 insertions(+), 21 deletions(-)
> 
> diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h
> index 2eaaabbe98cb..1717cc57cdac 100644
> --- a/include/linux/ceph/messenger.h
> +++ b/include/linux/ceph/messenger.h
> @@ -283,7 +283,7 @@ struct ceph_msg {
>  	struct kref kref;
>  	bool more_to_follow;
>  	bool needs_out_seq;
> -	bool sparse_read;
> +	u64 sparse_read_total;
>  	int front_alloc_len;
>  
>  	struct ceph_msgpool *pool;
> diff --git a/net/ceph/messenger_v1.c b/net/ceph/messenger_v1.c
> index 4cb60bacf5f5..4c76c8390de1 100644
> --- a/net/ceph/messenger_v1.c
> +++ b/net/ceph/messenger_v1.c
> @@ -160,8 +160,9 @@ static size_t sizeof_footer(struct ceph_connection *con)
>  static void prepare_message_data(struct ceph_msg *msg, u32 data_len)
>  {
>  	/* Initialize data cursor if it's not a sparse read */
> -	if (!msg->sparse_read)
> -		ceph_msg_data_cursor_init(&msg->cursor, msg, data_len);
> +	u64 len = msg->sparse_read_total ? : data_len;
> +
> +	ceph_msg_data_cursor_init(&msg->cursor, msg, len);
>  }
>  
>  /*
> @@ -1036,7 +1037,7 @@ static int read_partial_sparse_msg_data(struct ceph_connection *con)
>  	if (do_datacrc)
>  		crc = con->in_data_crc;
>  
> -	do {
> +	while (cursor->total_resid) {
>  		if (con->v1.in_sr_kvec.iov_base)
>  			ret = read_partial_message_chunk(con,
>  							 &con->v1.in_sr_kvec,
> @@ -1044,23 +1045,23 @@ static int read_partial_sparse_msg_data(struct ceph_connection *con)
>  							 &crc);
>  		else if (cursor->sr_resid > 0)
>  			ret = read_partial_sparse_msg_extent(con, &crc);
> -
> -		if (ret <= 0) {
> -			if (do_datacrc)
> -				con->in_data_crc = crc;
> -			return ret;
> -		}
> +		if (ret <= 0)
> +			break;
>  
>  		memset(&con->v1.in_sr_kvec, 0, sizeof(con->v1.in_sr_kvec));
>  		ret = con->ops->sparse_read(con, cursor,
>  				(char **)&con->v1.in_sr_kvec.iov_base);
> +		if (ret <= 0) {
> +			ret = ret ? : 1; /* must return > 0 to indicate success */

nit: this syntax is a gcc-ism (AIUI) and is not preferred. It'd be
better spell it out in this case (particularly since it's only 4 extra
chars:

			ret = ret ? ret : 1;

> +			break;
> +		}
>  		con->v1.in_sr_len = ret;
> -	} while (ret > 0);
> +	}
>  
>  	if (do_datacrc)
>  		con->in_data_crc = crc;
>  
> -	return ret < 0 ? ret : 1;  /* must return > 0 to indicate success */
> +	return ret;
>  }
>  
>  static int read_partial_msg_data(struct ceph_connection *con)
> @@ -1253,7 +1254,7 @@ static int read_partial_message(struct ceph_connection *con)
>  		if (!m->num_data_items)
>  			return -EIO;
>  
> -		if (m->sparse_read)
> +		if (m->sparse_read_total)
>  			ret = read_partial_sparse_msg_data(con);
>  		else if (ceph_test_opt(from_msgr(con->msgr), RXBOUNCE))
>  			ret = read_partial_msg_data_bounce(con);
> diff --git a/net/ceph/messenger_v2.c b/net/ceph/messenger_v2.c
> index f8ec60e1aba3..a0ca5414b333 100644
> --- a/net/ceph/messenger_v2.c
> +++ b/net/ceph/messenger_v2.c
> @@ -1128,7 +1128,7 @@ static int decrypt_tail(struct ceph_connection *con)
>  	struct sg_table enc_sgt = {};
>  	struct sg_table sgt = {};
>  	struct page **pages = NULL;
> -	bool sparse = con->in_msg->sparse_read;
> +	bool sparse = !!con->in_msg->sparse_read_total;
>  	int dpos = 0;
>  	int tail_len;
>  	int ret;
> @@ -2060,7 +2060,7 @@ static int prepare_read_tail_plain(struct ceph_connection *con)
>  	}
>  
>  	if (data_len(msg)) {
> -		if (msg->sparse_read)
> +		if (msg->sparse_read_total)
>  			con->v2.in_state = IN_S_PREPARE_SPARSE_DATA;
>  		else
>  			con->v2.in_state = IN_S_PREPARE_READ_DATA;
> diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
> index 6beab9be51e2..1a5b1e1e24ca 100644
> --- a/net/ceph/osd_client.c
> +++ b/net/ceph/osd_client.c
> @@ -5510,7 +5510,7 @@ static struct ceph_msg *get_reply(struct ceph_connection *con,
>  	}
>  
>  	m = ceph_msg_get(req->r_reply);
> -	m->sparse_read = (bool)srlen;
> +	m->sparse_read_total = srlen;
>  
>  	dout("get_reply tid %lld %p\n", tid, m);
>  
> @@ -5777,11 +5777,8 @@ static int prep_next_sparse_read(struct ceph_connection *con,
>  	}
>  
>  	if (o->o_sparse_op_idx < 0) {
> -		u64 srlen = sparse_data_requested(req);
> -
> -		dout("%s: [%d] starting new sparse read req. srlen=0x%llx\n",
> -		     __func__, o->o_osd, srlen);
> -		ceph_msg_data_cursor_init(cursor, con->in_msg, srlen);
> +		dout("%s: [%d] starting new sparse read req\n",
> +		     __func__, o->o_osd);
>  	} else {
>  		u64 end;
>  

The patch itself looks fine though.

Reviewed-by: Jeff Layton <jlayton@xxxxxxxxxx>





[Index of Archives]     [CEPH Users]     [Ceph Large]     [Ceph Dev]     [Information on CEPH]     [Linux BTRFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux