[PATCH RFC] block: trace: add block alignment information

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



From: Daniel Gomez <da.gomez@xxxxxxxxxxx>

Report block alignment in terms of LBA and size during block tracing for
block_rq. Calculate alignment only for read/writes where the length is
greater than 0. Otherwise, report 0 to indicate no alignment calculated.

Suggested-by: Dave Chinner <dchinner@xxxxxxxxxx>
Signed-off-by: Daniel Gomez <da.gomez@xxxxxxxxxxx>
---
This patch introduces LBA and size alignment information for
the block_rq tracepoints (block_rq{insert, issue, merge} and
block_{io_start, io_done}).

The idea of reporting alignment in a tracepoint was first suggested in
this thread [1] by Dave Chinner. Additionally, an eBPF-based equivalent
tracing tool [2] was developed and used during LBS development, as
mentioned in the patch series [3] and in [1].

With this addition, users can check block alignment directly through the
block layer tracepoints without needing any additional tools.

In case we have a use case, this can be extended to other tracepoints,
such as complete and error.

Another potential enhancement could be the integration of this
information into blktrace. Would that be a feasible option to consider?

[1] https://lore.kernel.org/all/ZdvXAn1Q%2F+QX5sPQ@xxxxxxxxxxxxxxxxxxx/
[2] blkalgn tool written in eBPF/bcc:
https://github.com/dkruces/bcc/tree/lbs
[3] https://lore.kernel.org/all/20240822135018.1931258-1-kernel@xxxxxxxxxxxxxxxx/
---
 block/blk-mq.c               | 29 +++++++++++++++++++++++++++++
 include/linux/blk-mq.h       | 11 +++++++++++
 include/linux/blkdev.h       |  6 ++++++
 include/trace/events/block.h |  7 +++++--
 4 files changed, 51 insertions(+), 2 deletions(-)

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 831c5cf5d874..714452bc236b 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -4920,6 +4920,35 @@ int blk_rq_poll(struct request *rq, struct io_comp_batch *iob,
 }
 EXPORT_SYMBOL_GPL(blk_rq_poll);
 
+u32 __blk_rq_lba_algn(struct request *req)
+{
+	u32 lbs = queue_logical_block_size(req->q);
+	u32 lba_shift = ilog2(lbs);
+	u32 lba = req->__sector >> (lba_shift - SECTOR_SHIFT);
+	u32 len = req->__data_len;
+	u32 algn_len = len;
+	u32 algn_lba = len / lbs;
+	u32 alignment = lbs;
+
+	if (is_power_of_2(len) &&
+	    blk_rq_lba_aligned(len, algn_len, lba, algn_lba))
+		return len;
+
+	algn_len = lbs << 1U;
+	algn_lba = algn_len / lbs;
+
+	while (algn_len < len) {
+		if (!blk_rq_lba_aligned(len, algn_len, lba, algn_lba))
+			break;
+
+		alignment = algn_len;
+		algn_len = algn_len << 1U;
+		algn_lba = algn_len / lbs;
+	}
+
+	return alignment;
+}
+
 unsigned int blk_mq_rq_cpu(struct request *rq)
 {
 	return rq->mq_ctx->cpu;
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 8d304b1d16b1..02959fbd5e28 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -740,6 +740,17 @@ void blk_mq_free_request(struct request *rq);
 int blk_rq_poll(struct request *rq, struct io_comp_batch *iob,
 		unsigned int poll_flags);
 
+/* The alignment of the block in terms of LBA and size */
+u32 __blk_rq_lba_algn(struct request *req);
+static inline u32 blk_rq_lba_algn(struct request *req)
+{
+	if ((req_op(req) != REQ_OP_WRITE) && (req_op(req) != REQ_OP_READ) &&
+	    !(req->__data_len))
+		return 0;
+
+	return __blk_rq_lba_algn(req);
+}
+
 bool blk_mq_queue_inflight(struct request_queue *q);
 
 enum {
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index bf1aa951fda2..28557987daa8 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1433,6 +1433,12 @@ static inline int blk_rq_aligned(struct request_queue *q, unsigned long addr,
 	return !(addr & alignment) && !(len & alignment);
 }
 
+static inline bool blk_rq_lba_aligned(u32 len, u32 algn_len, u32 lba,
+				      u32 algn_lba)
+{
+	return !(len % algn_len) && !(lba % algn_lba);
+}
+
 /* assumes size > 256 */
 static inline unsigned int blksize_bits(unsigned int size)
 {
diff --git a/include/trace/events/block.h b/include/trace/events/block.h
index 1527d5d45e01..ba3764214dc7 100644
--- a/include/trace/events/block.h
+++ b/include/trace/events/block.h
@@ -202,6 +202,7 @@ DECLARE_EVENT_CLASS(block_rq,
 		__array(  char,		rwbs,	RWBS_LEN	)
 		__array(  char,         comm,   TASK_COMM_LEN   )
 		__dynamic_array( char,	cmd,	1		)
+		__field(  unsigned int,	algn			)
 	),
 
 	TP_fast_assign(
@@ -210,20 +211,22 @@ DECLARE_EVENT_CLASS(block_rq,
 		__entry->nr_sector = blk_rq_trace_nr_sectors(rq);
 		__entry->bytes     = blk_rq_bytes(rq);
 		__entry->ioprio	   = rq->ioprio;
+		__entry->algn      = blk_rq_lba_algn(rq);
 
 		blk_fill_rwbs(__entry->rwbs, rq->cmd_flags);
 		__get_str(cmd)[0] = '\0';
 		memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
 	),
 
-	TP_printk("%d,%d %s %u (%s) %llu + %u %s,%u,%u [%s]",
+	TP_printk("%d,%d %s %u (%s) %llu + %u %s,%u,%u |%u| [%s]",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  __entry->rwbs, __entry->bytes, __get_str(cmd),
 		  (unsigned long long)__entry->sector, __entry->nr_sector,
 		  __print_symbolic(IOPRIO_PRIO_CLASS(__entry->ioprio),
 				   IOPRIO_CLASS_STRINGS),
 		  IOPRIO_PRIO_HINT(__entry->ioprio),
-		  IOPRIO_PRIO_LEVEL(__entry->ioprio), __entry->comm)
+		  IOPRIO_PRIO_LEVEL(__entry->ioprio), __entry->algn,
+		  __entry->comm)
 );
 
 /**

---
base-commit: 57f962b956f1d116cd64d5c406776c4975de549d
change-id: 20240912-add-blkalgn-block-trace-71e8ab6708f1

Best regards,
-- 
Daniel Gomez <da.gomez@xxxxxxxxxxx>






[Index of Archives]     [Linux RAID]     [Linux SCSI]     [Linux ATA RAID]     [IDE]     [Linux Wireless]     [Linux Kernel]     [ATH6KL]     [Linux Bluetooth]     [Linux Netdev]     [Kernel Newbies]     [Security]     [Git]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Device Mapper]

  Powered by Linux