Re: [PATCH 8/9] block/mq-deadline: Add I/O priority support

Hannes Reinecke <hare@xxxxxxx> · Thu, 27 May 2021 09:07:55 +0200

On 5/27/21 3:01 AM, Bart Van Assche wrote:
> Maintain one FIFO list per I/O priority: RT, BE and IDLE. Only dispatch
> requests for a lower priority after all higher priority requests have
> finished. Maintain statistics for each priority level. Split the debugfs
> attributes per priority level as follows:
> 
> $ ls /sys/kernel/debug/block/.../sched/
> async_depth  dispatch2        read_next_rq      write2_fifo_list
> batching     read0_fifo_list  starved           write_next_rq
> dispatch0    read1_fifo_list  write0_fifo_list
> dispatch1    read2_fifo_list  write1_fifo_list
> 
> Cc: Damien Le Moal <damien.lemoal@xxxxxxx>
> Cc: Christoph Hellwig <hch@xxxxxx>
> Cc: Ming Lei <ming.lei@xxxxxxxxxx>
> Signed-off-by: Bart Van Assche <bvanassche@xxxxxxx>
> ---
>  block/mq-deadline.c | 293 +++++++++++++++++++++++++++++++++-----------
>  1 file changed, 223 insertions(+), 70 deletions(-)
> 
Interesting question, though, wrt to request merging and realtime
priority: what takes priority?
Is the realtime priority more important than request merging?

And also the ioprio document states that there are 8 levels of class
data, determining how much time each class should spend on disk access.
Have these been taken into consideration?

> diff --git a/block/mq-deadline.c b/block/mq-deadline.c
> index 81f487d77e09..5a703e1228fa 100644
> --- a/block/mq-deadline.c
> +++ b/block/mq-deadline.c
> @@ -44,16 +44,27 @@ enum dd_data_dir {
>  
>  enum { DD_DIR_COUNT = 2 };
>  
> +enum dd_prio {
> +	DD_RT_PRIO	= 0,
> +	DD_BE_PRIO	= 1,
> +	DD_IDLE_PRIO	= 2,
> +	DD_PRIO_MAX	= 2,
> +} __packed;
> +
> +enum { DD_PRIO_COUNT = 3 };
> +
>  struct deadline_data {
>  	/*
>  	 * run time data
>  	 */
>  
>  	/*
> -	 * requests (deadline_rq s) are present on both sort_list and fifo_list
> +	 * Requests are present on both sort_list[] and fifo_list[][]. The
> +	 * first index of fifo_list[][] is the I/O priority class (DD_*_PRIO).
> +	 * The second index is the data direction (rq_data_dir(rq)).
>  	 */
>  	struct rb_root sort_list[DD_DIR_COUNT];
> -	struct list_head fifo_list[DD_DIR_COUNT];
> +	struct list_head fifo_list[DD_PRIO_COUNT][DD_DIR_COUNT];
>  
>  	/*
>  	 * next in sort order. read, write or both are NULL
> @@ -62,6 +73,12 @@ struct deadline_data {
>  	unsigned int batching;		/* number of sequential requests made */
>  	unsigned int starved;		/* times reads have starved writes */
>  
> +	/* statistics */
> +	atomic_t inserted[DD_PRIO_COUNT];
> +	atomic_t dispatched[DD_PRIO_COUNT];
> +	atomic_t merged[DD_PRIO_COUNT];
> +	atomic_t completed[DD_PRIO_COUNT];
> +
>  	/*
>  	 * settings that change how the i/o scheduler behaves
>  	 */
> @@ -73,7 +90,15 @@ struct deadline_data {
>  
>  	spinlock_t lock;
>  	spinlock_t zone_lock;
> -	struct list_head dispatch;
> +	struct list_head dispatch[DD_PRIO_COUNT];
> +};
> +
> +/* Maps an I/O priority class to a deadline scheduler priority. */
> +static const enum dd_prio ioprio_class_to_prio[] = {
> +	[IOPRIO_CLASS_NONE]	= DD_BE_PRIO,
> +	[IOPRIO_CLASS_RT]	= DD_RT_PRIO,
> +	[IOPRIO_CLASS_BE]	= DD_BE_PRIO,
> +	[IOPRIO_CLASS_IDLE]	= DD_IDLE_PRIO,
>  };
>  
>  static inline struct rb_root *
> @@ -149,12 +174,28 @@ static void dd_request_merged(struct request_queue *q, struct request *req,
>  	}
>  }
>  
> +/* Returns the I/O class that has been assigned by dd_insert_request(). */
> +static u8 dd_rq_ioclass(struct request *rq)
> +{
> +	return (uintptr_t)rq->elv.priv[1];
> +}
> +
>  /*
>   * Callback function that is invoked after @next has been merged into @req.
>   */
>  static void dd_merged_requests(struct request_queue *q, struct request *req,
>  			       struct request *next)
>  {
> +	struct deadline_data *dd = q->elevator->elevator_data;
> +	const u8 ioprio_class = dd_rq_ioclass(next);
> +	const enum dd_prio prio = ioprio_class_to_prio[ioprio_class];
> +
> +	if (next->elv.priv[0]) {
> +		atomic_inc(&dd->merged[prio]);
> +	} else {
> +		WARN_ON_ONCE(true);
> +	}
> +
>  	/*
>  	 * if next expires before rq, assign its expire time to rq
>  	 * and move into next position (next will be deleted) in fifo
> @@ -191,14 +232,22 @@ deadline_move_request(struct deadline_data *dd, struct request *rq)
>  	deadline_remove_request(rq->q, rq);
>  }
>  
> +/* Number of requests queued for a given priority level. */
> +static u64 dd_queued(struct deadline_data *dd, enum dd_prio prio)
> +{
> +	return atomic_read(&dd->inserted[prio]) -
> +		atomic_read(&dd->completed[prio]);
> +}
> +
>  /*
>   * deadline_check_fifo returns 0 if there are no expired requests on the fifo,
>   * 1 otherwise. Requires !list_empty(&dd->fifo_list[data_dir])
>   */
>  static inline int deadline_check_fifo(struct deadline_data *dd,
> +				      enum dd_prio prio,
>  				      enum dd_data_dir data_dir)
>  {
> -	struct request *rq = rq_entry_fifo(dd->fifo_list[data_dir].next);
> +	struct request *rq = rq_entry_fifo(dd->fifo_list[prio][data_dir].next);
>  
>  	/*
>  	 * rq is expired!

I am _so_ not a fan of arrays in C.
Can't you make this an accessor and use
dd->fifo_list[prio * 2 + data_dir] ?

Cheers,

Hannes
-- 
Dr. Hannes Reinecke		        Kernel Storage Architect
hare@xxxxxxx			               +49 911 74053 688
SUSE Software Solutions Germany GmbH, 90409 Nürnberg
GF: F. Imendörffer, HRB 36809 (AG Nürnberg)