The existing blocktrace API has different data type when it comes to tracking the action. Now that we have increased the size of action mask and the action itself update all the APIs prototypes to handle action of size u64. Also with extension we now we can track the priorities, so update the API to hold the priority values for logging. Also, in the previous patch we have added two new block trace actions for REQ_OP_WRITE_ZEROES and REQ_OP_ZONE_RESET. Add those action into the __blk_add_trace() so we can track new actions. Signed-off-by: Chaitanya Kulkarni <chaitanya.kulkarni@xxxxxxx> --- kernel/trace/blktrace.c | 165 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 164 insertions(+), 1 deletion(-) diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index e1c6d79fb4cc..6d2b4adae76e 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -63,9 +63,16 @@ static void blk_unregister_tracepoints(void); /* * Send out a notify message. */ +#ifdef CONFIG_BLKTRACE_EXT +static void trace_note(struct blk_trace *bt, pid_t pid, u64 action, + const void *data, size_t len, + union kernfs_node_id *cgid) + +#else static void trace_note(struct blk_trace *bt, pid_t pid, int action, const void *data, size_t len, union kernfs_node_id *cgid) +#endif /* CONFIG_BLKTRACE_EXT */ { struct blk_io_trace *t; struct ring_buffer_event *event = NULL; @@ -180,8 +187,14 @@ void __trace_note_message(struct blk_trace *bt, struct blkcg *blkcg, } EXPORT_SYMBOL_GPL(__trace_note_message); + +#ifdef CONFIG_BLKTRACE_EXT +static int act_log_check(struct blk_trace *bt, u64 what, sector_t sector, + pid_t pid) +#else static int act_log_check(struct blk_trace *bt, u32 what, sector_t sector, pid_t pid) +#endif /* CONFIG_BLKTRACE_EXT */ { if (((bt->act_mask << BLK_TC_SHIFT) & what) == 0) return 1; @@ -196,8 +209,15 @@ static int act_log_check(struct blk_trace *bt, u32 what, sector_t sector, /* * Data direction bit lookup */ + +#ifdef CONFIG_BLKTRACE_EXT +static const u64 ddir_act[2] = { BLK_TC_ACT(BLK_TC_READ), + BLK_TC_ACT(BLK_TC_WRITE) }; + +#else static const u32 ddir_act[2] = { BLK_TC_ACT(BLK_TC_READ), - BLK_TC_ACT(BLK_TC_WRITE) }; + BLK_TC_ACT(BLK_TC_WRITE) }; +#endif /* CONFIG_BLKTRACE_EXT */ #define BLK_TC_RAHEAD BLK_TC_AHEAD #define BLK_TC_PREFLUSH BLK_TC_FLUSH @@ -210,9 +230,16 @@ static const u32 ddir_act[2] = { BLK_TC_ACT(BLK_TC_READ), * The worker for the various blk_add_trace*() types. Fills out a * blk_io_trace structure and places it in a per-cpu subbuffer. */ + +#ifdef CONFIG_BLKTRACE_EXT +static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes, + int op, int op_flags, u64 what, int error, int pdu_len, + void *pdu_data, union kernfs_node_id *cgid, u32 ioprio) +#else static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes, int op, int op_flags, u32 what, int error, int pdu_len, void *pdu_data, union kernfs_node_id *cgid) +#endif /* CONFIG_BLKTRACE_EXT */ { struct task_struct *tsk = current; struct ring_buffer_event *event = NULL; @@ -238,6 +265,14 @@ static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes, what |= BLK_TC_ACT(BLK_TC_DISCARD); if (op == REQ_OP_FLUSH) what |= BLK_TC_ACT(BLK_TC_FLUSH); + +#ifdef CONFIG_BLKTRACE_EXT + if (unlikely(op == REQ_OP_WRITE_ZEROES)) + what |= BLK_TC_ACT(BLK_TC_WRITE_ZEROES); + if (unlikely(op == REQ_OP_ZONE_RESET)) + what |= BLK_TC_ACT(BLK_TC_ZONE_RESET); +#endif /* CONFIG_BLKTRACE_EXT */ + if (cgid) what |= __BLK_TA_CGROUP; @@ -535,8 +570,14 @@ static int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev, goto err; bt->act_mask = buts->act_mask; + +#ifdef CONFIG_BLKTRACE_EXT + if (!bt->act_mask) + bt->act_mask = (u64) -1ULL; +#else if (!bt->act_mask) bt->act_mask = (u16) -1; +#endif /* CONFIG_BLKTRACE_EXT */ blk_trace_setup_lba(bt, bdev); @@ -802,9 +843,16 @@ blk_trace_request_get_cgid(struct request_queue *q, struct request *rq) * Records an action against a request. Will log the bio offset + size. * **/ +#ifdef CONFIG_BLKTRACE_EXT +static void blk_add_trace_rq(struct request *rq, int error, + unsigned int nr_bytes, u64 what, + union kernfs_node_id *cgid) + +#else static void blk_add_trace_rq(struct request *rq, int error, unsigned int nr_bytes, u32 what, union kernfs_node_id *cgid) +#endif /* CONFIG_BLKTRACE_EXT */ { struct blk_trace *bt = rq->q->blk_trace; @@ -816,8 +864,14 @@ static void blk_add_trace_rq(struct request *rq, int error, else what |= BLK_TC_ACT(BLK_TC_FS); +#ifdef CONFIG_BLKTRACE_EXT + __blk_add_trace(bt, blk_rq_trace_sector(rq), nr_bytes, req_op(rq), + rq->cmd_flags, what, error, 0, NULL, cgid, req_get_ioprio(rq)); + +#else __blk_add_trace(bt, blk_rq_trace_sector(rq), nr_bytes, req_op(rq), rq->cmd_flags, what, error, 0, NULL, cgid); +#endif /* CONFIG_BLKTRACE_EXT */ } static void blk_add_trace_rq_insert(void *ignore, @@ -860,17 +914,31 @@ static void blk_add_trace_rq_complete(void *ignore, struct request *rq, * Records an action against a bio. Will log the bio offset + size. * **/ + +#ifdef CONFIG_BLKTRACE_EXT +static void blk_add_trace_bio(struct request_queue *q, struct bio *bio, + u64 what, int error) + +#else static void blk_add_trace_bio(struct request_queue *q, struct bio *bio, u32 what, int error) +#endif /* CONFIG_BLKTRACE_EXT */ { struct blk_trace *bt = q->blk_trace; if (likely(!bt)) return; +#ifdef CONFIG_BLKTRACE_EXT + __blk_add_trace(bt, bio->bi_iter.bi_sector, bio->bi_iter.bi_size, + bio_op(bio), bio->bi_opf, what, error, 0, NULL, + blk_trace_bio_get_cgid(q, bio), bio_prio(bio)); + +#else __blk_add_trace(bt, bio->bi_iter.bi_sector, bio->bi_iter.bi_size, bio_op(bio), bio->bi_opf, what, error, 0, NULL, blk_trace_bio_get_cgid(q, bio)); +#endif /* CONFIG_BLKTRACE_EXT */ } static void blk_add_trace_bio_bounce(void *ignore, @@ -917,9 +985,16 @@ static void blk_add_trace_getrq(void *ignore, else { struct blk_trace *bt = q->blk_trace; +#ifdef CONFIG_BLKTRACE_EXT + if (bt) + __blk_add_trace(bt, 0, 0, rw, 0, BLK_TA_GETRQ, 0, 0, + NULL, NULL, 0); + +#else if (bt) __blk_add_trace(bt, 0, 0, rw, 0, BLK_TA_GETRQ, 0, 0, NULL, NULL); +#endif /* CONFIG_BLKTRACE_EXT */ } } @@ -933,9 +1008,17 @@ static void blk_add_trace_sleeprq(void *ignore, else { struct blk_trace *bt = q->blk_trace; + +#ifdef CONFIG_BLKTRACE_EXT + if (bt) + __blk_add_trace(bt, 0, 0, rw, 0, BLK_TA_SLEEPRQ, + 0, 0, NULL, NULL, 0); + +#else if (bt) __blk_add_trace(bt, 0, 0, rw, 0, BLK_TA_SLEEPRQ, 0, 0, NULL, NULL); +#endif /* CONFIG_BLKTRACE_EXT */ } } @@ -943,8 +1026,14 @@ static void blk_add_trace_plug(void *ignore, struct request_queue *q) { struct blk_trace *bt = q->blk_trace; +#ifdef CONFIG_BLKTRACE_EXT + if (bt) + __blk_add_trace(bt, 0, 0, 0, 0, BLK_TA_PLUG, 0, 0, NULL, NULL, + 0); +#else if (bt) __blk_add_trace(bt, 0, 0, 0, 0, BLK_TA_PLUG, 0, 0, NULL, NULL); +#endif /* CONFIG_BLKTRACE_EXT */ } static void blk_add_trace_unplug(void *ignore, struct request_queue *q, @@ -954,14 +1043,24 @@ static void blk_add_trace_unplug(void *ignore, struct request_queue *q, if (bt) { __be64 rpdu = cpu_to_be64(depth); + +#ifdef CONFIG_BLKTRACE_EXT + u64 what; +#else u32 what; +#endif /* CONFIG_BLKTRACE_EXT */ if (explicit) what = BLK_TA_UNPLUG_IO; else what = BLK_TA_UNPLUG_TIMER; +#ifdef CONFIG_BLKTRACE_EXT + __blk_add_trace(bt, 0, 0, 0, 0, what, 0, sizeof(rpdu), &rpdu, + NULL, 0); +#else __blk_add_trace(bt, 0, 0, 0, 0, what, 0, sizeof(rpdu), &rpdu, NULL); +#endif /* CONFIG_BLKTRACE_EXT */ } } @@ -974,10 +1073,18 @@ static void blk_add_trace_split(void *ignore, if (bt) { __be64 rpdu = cpu_to_be64(pdu); +#ifdef CONFIG_BLKTRACE_EXT + __blk_add_trace(bt, bio->bi_iter.bi_sector, + bio->bi_iter.bi_size, bio_op(bio), bio->bi_opf, + BLK_TA_SPLIT, bio->bi_status, sizeof(rpdu), + &rpdu, blk_trace_bio_get_cgid(q, bio), + bio_prio(bio)); +#else __blk_add_trace(bt, bio->bi_iter.bi_sector, bio->bi_iter.bi_size, bio_op(bio), bio->bi_opf, BLK_TA_SPLIT, bio->bi_status, sizeof(rpdu), &rpdu, blk_trace_bio_get_cgid(q, bio)); +#endif /* CONFIG_BLKTRACE_EXT */ } } @@ -1008,9 +1115,16 @@ static void blk_add_trace_bio_remap(void *ignore, r.device_to = cpu_to_be32(bio_dev(bio)); r.sector_from = cpu_to_be64(from); +#ifdef CONFIG_BLKTRACE_EXT + __blk_add_trace(bt, bio->bi_iter.bi_sector, bio->bi_iter.bi_size, + bio_op(bio), bio->bi_opf, BLK_TA_REMAP, bio->bi_status, + sizeof(r), &r, blk_trace_bio_get_cgid(q, bio), + bio_prio(bio)); +#else __blk_add_trace(bt, bio->bi_iter.bi_sector, bio->bi_iter.bi_size, bio_op(bio), bio->bi_opf, BLK_TA_REMAP, bio->bi_status, sizeof(r), &r, blk_trace_bio_get_cgid(q, bio)); +#endif /* CONFIG_BLKTRACE_EXT */ } /** @@ -1041,9 +1155,17 @@ static void blk_add_trace_rq_remap(void *ignore, r.device_to = cpu_to_be32(disk_devt(rq->rq_disk)); r.sector_from = cpu_to_be64(from); + +#ifdef CONFIG_BLKTRACE_EXT + __blk_add_trace(bt, blk_rq_pos(rq), blk_rq_bytes(rq), + rq_data_dir(rq), 0, BLK_TA_REMAP, 0, + sizeof(r), &r, blk_trace_request_get_cgid(q, rq), + req_get_ioprio(rq)); +#else __blk_add_trace(bt, blk_rq_pos(rq), blk_rq_bytes(rq), rq_data_dir(rq), 0, BLK_TA_REMAP, 0, sizeof(r), &r, blk_trace_request_get_cgid(q, rq)); +#endif /* CONFIG_BLKTRACE_EXT */ } /** @@ -1066,9 +1188,16 @@ void blk_add_driver_data(struct request_queue *q, if (likely(!bt)) return; +#ifdef CONFIG_BLKTRACE_EXT + __blk_add_trace(bt, blk_rq_trace_sector(rq), blk_rq_bytes(rq), 0, 0, + BLK_TA_DRV_DATA, 0, len, data, + blk_trace_request_get_cgid(q, rq), + req_get_ioprio(rq)); +#else __blk_add_trace(bt, blk_rq_trace_sector(rq), blk_rq_bytes(rq), 0, 0, BLK_TA_DRV_DATA, 0, len, data, blk_trace_request_get_cgid(q, rq)); +#endif /* CONFIG_BLKTRACE_EXT */ } EXPORT_SYMBOL_GPL(blk_add_driver_data); @@ -1139,7 +1268,11 @@ static void blk_unregister_tracepoints(void) static void fill_rwbs(char *rwbs, const struct blk_io_trace *t) { int i = 0; +#ifdef CONFIG_BLKTRACE_EXT + u64 tc = t->action >> BLK_TC_SHIFT; +#else int tc = t->action >> BLK_TC_SHIFT; +#endif /* CONFIG_BLKTRACE_EXT */ if ((t->action & ~__BLK_TN_CGROUP) == BLK_TN_MESSAGE) { rwbs[i++] = 'N'; @@ -1151,6 +1284,17 @@ static void fill_rwbs(char *rwbs, const struct blk_io_trace *t) if (tc & BLK_TC_DISCARD) rwbs[i++] = 'D'; + +#ifdef CONFIG_BLKTRACE_EXT + else if ((tc & BLK_TC_WRITE_ZEROES)) { + /* instead of adding nested if in BLK_TC_WRITE keep the code clean */ + rwbs[i++] = 'W'; + rwbs[i++] = 'Z'; + } else if ((tc & BLK_TC_ZONE_RESET)) { + rwbs[i++] = 'Z'; + rwbs[i++] = 'R'; + } +#endif /* CONFIG_BLKTRACE_EXT */ else if (tc & BLK_TC_WRITE) rwbs[i++] = 'W'; else if (t->bytes) @@ -1193,7 +1337,11 @@ static inline int pdu_real_len(const struct trace_entry *ent, bool has_cg) (has_cg ? sizeof(union kernfs_node_id) : 0); } +#ifdef CONFIG_BLKTRACE_EXT +static inline u64 t_action(const struct trace_entry *ent) +#else static inline u32 t_action(const struct trace_entry *ent) +#endif /* CONFIG_BLKTRACE_EXT */ { return te_blk_io_trace(ent)->action; } @@ -1474,7 +1622,12 @@ static enum print_line_t print_one_line(struct trace_iterator *iter, bool has_cg; t = te_blk_io_trace(iter->ent); + +#ifdef CONFIG_BLKTRACE_EXT + what = (t->action & ((1ULL << BLK_TC_SHIFT) - 1)) & ~__BLK_TA_CGROUP; +#else what = (t->action & ((1 << BLK_TC_SHIFT) - 1)) & ~__BLK_TA_CGROUP; +#endif /* CONFIG_BLKTRACE_EXT */ long_act = !!(tr->trace_flags & TRACE_ITER_VERBOSE); log_action = classic ? &blk_log_action_classic : &blk_log_action; has_cg = t->action & __BLK_TA_CGROUP; @@ -1617,7 +1770,13 @@ static int blk_trace_setup_queue(struct request_queue *q, goto free_bt; bt->dev = bdev->bd_dev; + +#ifdef CONFIG_BLKTRACE_EXT + bt->act_mask = (u64)-1ULL; + +#else bt->act_mask = (u16)-1; +#endif /* CONFIG_BLKTRACE_EXT */ blk_trace_setup_lba(bt, bdev); @@ -1688,6 +1847,10 @@ static const struct { { BLK_TC_DISCARD, "discard" }, { BLK_TC_DRV_DATA, "drv_data" }, { BLK_TC_FUA, "fua" }, +#ifdef CONFIG_BLKTRACE_EXT + { BLK_TC_WRITE_ZEROES, "write-zeroes" }, + { BLK_TC_ZONE_RESET, "zone-reset" }, +#endif /* CONFIG_BLKTRACE_EXT */ }; static int blk_trace_str2mask(const char *str) -- 2.19.1