Re: [PATCH V2 2/2] ublk_drv: add support for UBLK_IO_NEED_GET_DATA

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 2022/7/27 10:05, Ming Lei wrote:
> On Tue, Jul 26, 2022 at 07:44:05PM +0800, ZiyangZhang wrote:
>> UBLK_IO_NEED_GET_DATA is one ublk IO command. It is designed for a user
>> application who wants to allocate IO buffer and set IO buffer address
>> only after it receives an IO request from ublksrv. This is a reasonable
>> scenario because these users may use a RPC framework as one IO backend
>> to handle IO requests passed from ublksrv. And a RPC framework may
>> allocate its own buffer(or memory pool).
>>
>> This new feature (UBLK_F_NEED_GET_DATA) is optional for ublk users.
>> Related userspace code has been added in ublksrv[1] as one pull request.
>>
>> Test cases for this feature are added in ublksrv and all the tests pass.
>> The performance result shows that this new feature does bring additional
>> latency because one IO is issued back to ublk_drv once again to copy data
>> from bio vectors to user-provided data buffer. UBLK_IO_NEED_GET_DATA is
>> suitable for bigger block size such as 512B or 1MB.
>>
>> [1] https://github.com/ming1/ubdsrv
>>
>> Signed-off-by: ZiyangZhang <ZiyangZhang@xxxxxxxxxxxxxxxxx>
>> ---
>>  drivers/block/ublk_drv.c | 88 ++++++++++++++++++++++++++++++++++++----
>>  1 file changed, 79 insertions(+), 9 deletions(-)
>>
>> diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c
>> index 255b2de46a24..c2d2cd5ab25c 100644
>> --- a/drivers/block/ublk_drv.c
>> +++ b/drivers/block/ublk_drv.c
>> @@ -47,7 +47,9 @@
>>  #define UBLK_MINORS		(1U << MINORBITS)
>>  
>>  /* All UBLK_F_* have to be included into UBLK_F_ALL */
>> -#define UBLK_F_ALL (UBLK_F_SUPPORT_ZERO_COPY | UBLK_F_URING_CMD_COMP_IN_TASK)
>> +#define UBLK_F_ALL (UBLK_F_SUPPORT_ZERO_COPY \
>> +		| UBLK_F_URING_CMD_COMP_IN_TASK \
>> +		| UBLK_F_NEED_GET_DATA)
>>  
>>  struct ublk_rq_data {
>>  	struct callback_head work;
>> @@ -86,6 +88,15 @@ struct ublk_uring_cmd_pdu {
>>   */
>>  #define UBLK_IO_FLAG_ABORTED 0x04
>>  
>> +/*
>> + * UBLK_IO_FLAG_NEED_GET_DATA is set because IO command requires
>> + * get data buffer address from ublksrv.
>> + *
>> + * Then, bio data could be copied into this data buffer for a WRITE request
>> + * after the IO command is issued again and UBLK_IO_FLAG_NEED_GET_DATA is unset.
>> + */
>> +#define UBLK_IO_FLAG_NEED_GET_DATA 0x08
>> +
>>  struct ublk_io {
>>  	/* userspace buffer address from io cmd */
>>  	__u64	addr;
>> @@ -163,11 +174,19 @@ static struct miscdevice ublk_misc;
>>  static inline bool ublk_can_use_task_work(const struct ublk_queue *ubq)
>>  {
>>  	if (IS_BUILTIN(CONFIG_BLK_DEV_UBLK) &&
>> +			!(ubq->flags & UBLK_F_NEED_GET_DATA) &&
>>  			!(ubq->flags & UBLK_F_URING_CMD_COMP_IN_TASK))
>>  		return true;
>>  	return false;
> 
> NEED_GET_DATA isn't related with using task work.
> 
> That said if use task work return true, you use task work to handle
> GET_DATA, otherwise use io_uring_cmd_complete_in_task() to handle
> it, then either we use task work or use io_uring_cmd_complete_in_task,
> not mix the two.
> 
> BTW, in my test, it is observed reliably that task work can get better iops.

Ok, I will check whether UBLK_F_NEED_GET_DATA works with task_work_add().

I find that checking flags of ublk_io must stay in current ubq process.
Otherwise there may be potential data race while aborting IOs.
So whatever I use NEED_GET_DATA or not, task work is necessary. 

Using task_work_add() or io_uring_cmd_complete_in_task() depends on:
 (1) the module is built-in or not
 (2) the user requires the feature: UBLK_F_URING_CMD_COMP_IN_TASK

In the future, in our case, we may choose
UBLK_F_URING_CMD_COMP_IN_TASK | UBLK_F_NEED_GET_DATA as an option.

> 
>>  }
>>  
>> +static inline bool ublk_need_get_data(const struct ublk_queue *ubq)
>> +{
>> +	if (ubq->flags & UBLK_F_NEED_GET_DATA)
>> +		return true;
>> +	return false;
>> +}
>> +
>>  static struct ublk_device *ublk_get_device(struct ublk_device *ub)
>>  {
>>  	if (kobject_get_unless_zero(&ub->cdev_dev.kobj))
>> @@ -509,6 +528,21 @@ static void __ublk_fail_req(struct ublk_io *io, struct request *req)
>>  	}
>>  }
>>  
>> +static void ubq_complete_io_cmd(struct ublk_io *io, int res)
>> +{
>> +	/* mark this cmd owned by ublksrv */
>> +	io->flags |= UBLK_IO_FLAG_OWNED_BY_SRV;
>> +
>> +	/*
>> +	 * clear ACTIVE since we are done with this sqe/cmd slot
>> +	 * We can only accept io cmd in case of being not active.
>> +	 */
>> +	io->flags &= ~UBLK_IO_FLAG_ACTIVE;
>> +
>> +	/* tell ublksrv one io request is coming */
>> +	io_uring_cmd_done(io->cmd, res, 0);
>> +}
>> +
>>  #define UBLK_REQUEUE_DELAY_MS	3
>>  
>>  static inline void __ublk_rq_task_work(struct request *req)
>> @@ -531,6 +565,20 @@ static inline void __ublk_rq_task_work(struct request *req)
>>  		return;
>>  	}
>>  
>> +	if (ublk_need_get_data(ubq) &&
>> +			(req_op(req) == REQ_OP_WRITE ||
>> +			req_op(req) == REQ_OP_FLUSH) &&
>> +			!(io->flags & UBLK_IO_FLAG_NEED_GET_DATA)) {
>> +
>> +		io->flags |= UBLK_IO_FLAG_NEED_GET_DATA;
>> +
>> +		pr_devel("%s: ublk_need_get_data. op %d, qid %d tag %d io_flags %x\n",
>> +				__func__, io->cmd->cmd_op, ubq->q_id, req->tag, io->flags);
>> +
>> +		ubq_complete_io_cmd(io, UBLK_IO_RES_NEED_GET_DATA);
>> +		return;
>> +	}
>> +
>>  	mapped_bytes = ublk_map_io(ubq, req, io);
>>  
>>  	/* partially mapped, update io descriptor */
>> @@ -553,17 +601,13 @@ static inline void __ublk_rq_task_work(struct request *req)
>>  			mapped_bytes >> 9;
>>  	}
>>  
>> -	/* mark this cmd owned by ublksrv */
>> -	io->flags |= UBLK_IO_FLAG_OWNED_BY_SRV;
>> -
>>  	/*
>> -	 * clear ACTIVE since we are done with this sqe/cmd slot
>> -	 * We can only accept io cmd in case of being not active.
>> +	 * Anyway, we have handled UBLK_IO_NEED_GET_DATA for WRITE/FLUSH requests,
>> +	 * or we did nothing for other types requests.
>>  	 */
>> -	io->flags &= ~UBLK_IO_FLAG_ACTIVE;
>> +	io->flags &= ~UBLK_IO_FLAG_NEED_GET_DATA;
>>  
>> -	/* tell ublksrv one io request is coming */
>> -	io_uring_cmd_done(io->cmd, UBLK_IO_RES_OK, 0);
>> +	ubq_complete_io_cmd(io, UBLK_IO_RES_OK);
>>  }
>>  
>>  static void ublk_rq_task_work_cb(struct io_uring_cmd *cmd)
>> @@ -846,6 +890,16 @@ static void ublk_mark_io_ready(struct ublk_device *ub, struct ublk_queue *ubq)
>>  	mutex_unlock(&ub->mutex);
>>  }
>>  
>> +static void ublk_handle_need_get_data(struct ublk_device *ub, int q_id,
>> +		int tag, struct io_uring_cmd *cmd)
>> +{
>> +	struct ublk_uring_cmd_pdu *pdu = ublk_get_uring_cmd_pdu(cmd);
>> +	struct request *req = blk_mq_tag_to_rq(ub->tag_set.tags[q_id], tag);
>> +
>> +	pdu->req = req;
>> +	io_uring_cmd_complete_in_task(cmd, ublk_rq_task_work_cb);
>> +}
> 
> Here you can choose to use task work or io_uring_cmd_complete_in_task() by
> checking ublk_can_use_task_work(), just like what ublk_queue_rq() does.

OK, I will add a check on ublk_can_use_task_work().

> 
>> +
>>  static int ublk_ch_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags)
>>  {
>>  	struct ublksrv_io_cmd *ub_cmd = (struct ublksrv_io_cmd *)cmd->cmd;
>> @@ -884,6 +938,14 @@ static int ublk_ch_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags)
>>  		goto out;
>>  	}
>>  
>> +	/*
>> +	 * ensure that the user issues UBLK_IO_NEED_GET_DATA
>> +	 * iff the driver have set the UBLK_IO_FLAG_NEED_GET_DATA.
>> +	 */
>> +	if ((!!(io->flags & UBLK_IO_FLAG_NEED_GET_DATA))
>> +			^ (cmd_op == UBLK_IO_NEED_GET_DATA))
>> +		goto out;
>> +
>>  	switch (cmd_op) {
>>  	case UBLK_IO_FETCH_REQ:
>>  		/* UBLK_IO_FETCH_REQ is only allowed before queue is setup */
>> @@ -917,6 +979,14 @@ static int ublk_ch_uring_cmd(struct io_uring_cmd *cmd, unsigned int issue_flags)
>>  		io->cmd = cmd;
>>  		ublk_commit_completion(ub, ub_cmd);
>>  		break;
>> +	case UBLK_IO_NEED_GET_DATA:
>> +		if (!(io->flags & UBLK_IO_FLAG_OWNED_BY_SRV))
>> +			goto out;
>> +		io->addr = ub_cmd->addr;
>> +		io->cmd = cmd;
>> +		io->flags |= UBLK_IO_FLAG_ACTIVE;
>> +		ublk_handle_need_get_data(ub, ub_cmd->q_id, ub_cmd->tag, cmd);
> 
> You still need to clear UBLK_IO_FLAG_OWNED_BY_SRV here.

If ublk_drv gets one UBLK_IO_NEED_GET_DATA command, it should immediately call
io_uring_cmd_complete_in_task() and handling NEED_GET_DATA logic in the task work.

Since UBLK_IO_FLAG_OWNED_BY_SRV means the "slot" is owned by nbd driver or ublk driver,
I don't think UBLK_IO_FLAG_OWNED_BY_SRV should be cleared.

BTW, UBLK_IO_FLAG_OWNED_BY_SRV is set in the task work finally.
> 
> In future, UBLK_IO_FLAG_OWNED_BY_SRV can be removed actually.

Agree. UBLK_IO_FLAG_OWNED_BY_SRV should be integrated with UBLK_IO_FLAG_ACTIVE.

Now I am trying to implement crash recovery mechanism and I concern about memory order
while operating these IO flags. IMO, too many IO flags looks dangerous
and I made mistakes on flags while developing UBLK_NEED_GET_DATA. :(

Thanks,
Zhang



[Index of Archives]     [Linux RAID]     [Linux SCSI]     [Linux ATA RAID]     [IDE]     [Linux Wireless]     [Linux Kernel]     [ATH6KL]     [Linux Bluetooth]     [Linux Netdev]     [Kernel Newbies]     [Security]     [Git]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Device Mapper]

  Powered by Linux