Re: [blktrace-tools RFC PATCH 5/5] blkparse: add support for trace extension

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 6/2/19 6:58 PM, Chaitanya Kulkarni wrote:

Update blktrace tool to support the trace extensions. The new option
'-E' will allow blktrace to configure trace with newly introduced
IOCTLs for trace extensions. '-Y' and '-y' options are similar to
'-a' and '-A' but can now use the mask for additional operations
which are supported for trace-extension. For tracking the priority we
add '-P' along with the '-X' and '-x' to specify priority value and
mask.

Documentation in addition to the updated usage text?

doc/blkparse.1
doc/blktrace.tex

optimally including new output formats such as for the extended RWBS and prio-class so users know how to read it [my background is https://www.spinics.net/lists/linux-btrace/msg00840.html https://www.spinics.net/lists/linux-btrace/msg00836.html https://www.spinics.net/lists/linux-btrace/msg00825.html]

Update blkparse format with the trace extension support so blkparse
now can now identify and dump the trace extension. The new function
fill_rwbs_ext() formats the trace string with newly added trace
categories. We encapsulate existing blk_trace and newly added
blk_trace_ext structure under struct trace and use the t_bit_xxx()
helpers to read the appropriate value based on whether extensions
are enabled or not. We also add the support for priority tracking
when trace extension is enabled.

Signed-off-by: Chaitanya Kulkarni <chaitanya.kulkarni@xxxxxxx>
---
  Makefile       |    2 +-
  blkparse.c     | 1072 +++++++++++++++++++++++++++++++++++-------------
  blkparse.h     |   78 ++++
  blkparse_fmt.c |  204 ++++++---
  blktrace.h     |    6 +-
  5 files changed, 1017 insertions(+), 345 deletions(-)
  create mode 100644 blkparse.h

diff --git a/Makefile b/Makefile
index 5917814..af90a45 100644
--- a/Makefile
+++ b/Makefile
@@ -1,7 +1,7 @@
  CC	= gcc
  CFLAGS	= -Wall -O2 -g -W
  ALL_CFLAGS = $(CFLAGS) -D_GNU_SOURCE -D_LARGEFILE_SOURCE -D_FILE_OFFSET_BITS=64
-PROGS	= blkparse blktrace verify_blkparse blkrawverify blkiomon
+PROGS	= blkparse blktrace #verify_blkparse blkrawverify blkiomon

not building verify_blkparse any more looks odd, is it a left over from development / debugging?

  LIBS	= -lpthread
  SCRIPTS	= btrace
  SUBDIRS = btreplay btt iowatcher
diff --git a/blkparse.c b/blkparse.c
index 227cc44..36fe568 100644
--- a/blkparse.c
+++ b/blkparse.c
@@ -201,6 +201,42 @@ static struct option l_opts[] = {
  		.flag = NULL,
  		.val = 'w'
  	},
+	{
+		.name = "act-mask-ext",
+		.has_arg = required_argument,
+		.flag = NULL,
+		.val = 'y'
+	},
+	{
+		.name = "set-mask-ext",
+		.has_arg = required_argument,
+		.flag = NULL,
+		.val = 'Y'
+	},
+	{
+		.name = "use-extensions",
+		.has_arg = no_argument,
+		.flag = NULL,
+		.val = 'E'
+	},
+	{
+		.name = "track-priority",
+		.has_arg = no_argument,
+		.flag = NULL,
+		.val = 'P'
+	},
+	{
+		.name = "prio-mask",
+		.has_arg = required_argument,
+		.flag = NULL,
+		.val = 'x'
+	},
+	{
+		.name = "prio-set-mask",
+		.has_arg = required_argument,
+		.flag = NULL,
+		.val = 'X'
+	},
  	{
  		.name = "verbose",
  		.has_arg = no_argument,

@@ -572,53 +611,82 @@ static struct process_pid_map *add_ppm_hash(pid_t pid, const char *name)
  	return ppm;
  }

-static void handle_notify(struct blk_io_trace *bit)
+static void handle_notify(struct trace *t)
  {
-	void	*payload = (caddr_t) bit + sizeof(*bit);
+	void	*payload;
  	__u32	two32[2];

-	switch (bit->action) {
-	case BLK_TN_PROCESS:
-		add_ppm_hash(bit->pid, payload);
-		break;
-
-	case BLK_TN_TIMESTAMP:
-		if (bit->pdu_len != sizeof(two32))
-			return;
-		memcpy(two32, payload, sizeof(two32));
-		if (!data_is_native) {
-			two32[0] = be32_to_cpu(two32[0]);
-			two32[1] = be32_to_cpu(two32[1]);
-		}
-		start_timestamp = bit->time;
-		abs_start_time.tv_sec  = two32[0];
-		abs_start_time.tv_nsec = two32[1];
-		if (abs_start_time.tv_nsec < 0) {
-			abs_start_time.tv_sec--;
-			abs_start_time.tv_nsec += 1000000000;
+	if (t->use_ext)
+		payload = (caddr_t) t->bit_ext + sizeof(*(t->bit_ext));
+	else
+		payload = (caddr_t) t->bit + sizeof(*(t->bit));
+
+	if (use_ext) {

-
-		break;
-
-	case BLK_TN_MESSAGE:
-		if (bit->pdu_len > 0) {
-			char msg[bit->pdu_len+1];
-
-			memcpy(msg, (char *)payload, bit->pdu_len);
-			msg[bit->pdu_len] = '\0';
-
-			fprintf(ofp,
-				"%3d,%-3d %2d %8s %5d.%09lu %5u %2s %3s %s\n",
-				MAJOR(bit->device), MINOR(bit->device),
-				bit->cpu, "0", (int) SECONDS(bit->time),
-				(unsigned long) NANO_SECONDS(bit->time),
-				0, "m", "N", msg);

I find it a bit hard to review that everything under the else case remains the same code with just an additional indentation. Would it be possible to just add the if(use_ext) with an early return and leave the old non-extended code as is?

+	} else {
+		if (t_bit_act(t) == BLK_TN_PROCESS) {
+			add_ppm_hash(t_bit_pid(t), payload);
+		} else if (t_bit_act(t) == BLK_TN_TIMESTAMP) {
+			if (t_bit_pdu_len(t) != sizeof(two32))
+				return;
+			memcpy(two32, payload, sizeof(two32));
+			if (!data_is_native) {
+				two32[0] = be32_to_cpu(two32[0]);
+				two32[1] = be32_to_cpu(two32[1]);
+			}
+			start_timestamp = t_bit_time(t);
+			abs_start_time.tv_sec  = two32[0];
+			abs_start_time.tv_nsec = two32[1];
+			if (abs_start_time.tv_nsec < 0) {
+				abs_start_time.tv_sec--;
+				abs_start_time.tv_nsec += 1000000000;
+			}
+		} else if (t_bit_act(t) == BLK_TN_MESSAGE) {
+			if (t_bit_pdu_len(t) > 0) {
+				char msg[t_bit_pdu_len(t)+1];
+
+				memcpy(msg, (char *)payload, t_bit_pdu_len(t));
+				msg[t_bit_pdu_len(t)] = '\0';
+
+				fprintf(ofp,
+					"%3d,%-3d %2d %8s %5d.%09lu %5u %2s %3s %s\n",
+					MAJOR(t_bit_dev(t)), MINOR(t_bit_dev(t)),
+					t_bit_cpu(t), "0", (int) SECONDS(t_bit_time(t)),
+					(unsigned long) NANO_SECONDS(t_bit_time(t)),
+					0, "m", "N", msg);
+			}
  		}
-		break;
-
-	default:
-		/* Ignore unknown notify events */
-		;
  	}
  }


-static void dump_trace(struct blk_io_trace *t, struct per_cpu_info *pci,
+static void dump_trace(struct trace *t, struct per_cpu_info *pci,
  		       struct per_dev_info *pdi)
  {
  	if (text_output) {
-		if (t->action == BLK_TN_MESSAGE)
-			handle_notify(t);
-		else if (t->action & BLK_TC_ACT(BLK_TC_PC))
-			dump_trace_pc(t, pdi, pci);
-		else
-			dump_trace_fs(t, pdi, pci);
+		if (use_ext)
+			if (t->bit_ext->action == BLK_TN_MESSAGE_EXT)
+				handle_notify(t);
+			else if (t->bit_ext->action & BLK_TC_ACT_EXT(BLK_TC_PC))
+				dump_trace_pc(t, pdi, pci);
+			else
+				dump_trace_fs(t, pdi, pci);
+		else {
+			if (t->bit->action == BLK_TN_MESSAGE)
+				handle_notify(t);
+			else if (t->bit->action & BLK_TC_ACT(BLK_TC_PC))
+				dump_trace_pc(t, pdi, pci);
+			else
+				dump_trace_fs(t, pdi, pci);
+		}

dito

  	}

  	if (!pdi->events)
-		pdi->first_reported_time = t->time;
+		pdi->first_reported_time = t_bit_time(t);

  	pdi->events++;

-	if (bin_output_msgs ||
-			    !(t->action & BLK_TC_ACT(BLK_TC_NOTIFY) &&
-			      t->action == BLK_TN_MESSAGE))
-		output_binary(t, sizeof(*t) + t->pdu_len);
+	if (use_ext) {
+		struct blk_io_trace_ext *bit_ext = t->bit_ext;
+
+		if (bin_output_msgs ||
+				!(bit_ext->action & BLK_TC_ACT_EXT(BLK_TC_NOTIFY) &&
+					bit_ext->action == BLK_TN_MESSAGE_EXT))
+			output_binary(bit_ext, sizeof(*bit_ext) + bit_ext->pdu_len);
+	} else {
+		struct blk_io_trace *bit = t->bit;
+
+		if (bin_output_msgs ||
+				!(bit->action & BLK_TC_ACT(BLK_TC_NOTIFY) &&
+					bit->action == BLK_TN_MESSAGE))
+			output_binary(bit, sizeof(*bit) + bit->pdu_len);
+	}
  }

@@ -2111,41 +2237,57 @@ static void show_entries_rb(int force)

-		if (!(bit->action == BLK_TN_MESSAGE) &&
-		    check_sequence(pdi, t, force))
-			break;
+		if (use_ext) {
+			if (!(t->bit_ext->action == BLK_TN_MESSAGE_EXT) &&
+					check_sequence(pdi, t, force))
+				break;
+		} else {
+			if (!(t->bit->action == BLK_TN_MESSAGE) &&
+					check_sequence(pdi, t, force))
+				break;
+		}

-		if (!(bit->action == BLK_TN_MESSAGE))
-			pci->last_sequence = bit->sequence;
+		if (use_ext) {
+			if (!(t->bit_ext->action == BLK_TN_MESSAGE_EXT))
+				pci->last_sequence = t->bit_ext->sequence;
+		} else {
+			if (!(t->bit->action == BLK_TN_MESSAGE))
+				pci->last_sequence = t->bit->sequence;
+		}

-		if (bit->action & (act_mask << BLK_TC_SHIFT))
-			dump_trace(bit, pci, pdi);
+		if (use_ext) {
+			if (t->bit_ext->action &
+					(act_mask_ext << BLK_TC_SHIFT_EXT))
+				dump_trace(t, pci, pdi);
+		} else {
+			if (t->bit->action & (act_mask << BLK_TC_SHIFT))
+				dump_trace(t, pci, pdi);
+		}

  		put_trace(pdi, t);
  	}

@@ -2342,10 +2629,18 @@ static inline void ms_deq(struct ms_stream *msp)
  	msp->first = msp->first->next;
  	if (!msp->first) {
  		msp->last = NULL;
-		if (!ms_prime(msp)) {
-			ms_head = msp->next;
-			msp->next = NULL;
-			return;
+		if (use_ext) {
+			if (!ms_prime_ext(msp)) {
+				ms_head = msp->next;
+				msp->next = NULL;
+				return;
+			}
+		} else {
+			if (!ms_prime(msp)) {
+				ms_head = msp->next;
+				msp->next = NULL;
+				return;
+			}
  		}
  	}

@@ -2469,8 +2848,13 @@ static struct ms_stream *ms_alloc(struct per_dev_info *pdi, int cpu)

-	if (ms_prime(msp))
-		ms_sort(msp);
+	if (use_ext)  {
+		if (ms_prime_ext(msp))
+			ms_sort(msp);
+	} else {
+		if (ms_prime(msp))
+			ms_sort(msp);
+	}

@@ -2605,8 +3020,13 @@ static int do_file(void)

-	while (!is_done() && ms_head && handle(ms_head))
-		;
+	if (use_ext)
+		while (!is_done() && ms_head && handle_ext(ms_head))
+			;
+	else {
+		while (!is_done() && ms_head && handle(ms_head))
+			;
+	}

@@ -2618,19 +3038,23 @@ static void do_pipe(int fd)

  		read_sequence++;
-	
  #if 0
  		smallest_seq_read = -1U;
  #endif
-
  		if (sort_entries(&youngest))
  			break;
-
  		if (youngest > stopwatch_end)
  			break;
-
  		show_entries_rb(0);
  	}

Why such seemingly unrelated whitespace changes?

-#define S_OPTS  "a:A:b:D:d:f:F:hi:o:Oqstw:vVM"
+#define S_OPTS  "a:A:b:D:d:f:F:hi:o:Oqstw:vVMPx:X:y:Y:E"
  static char usage_str[] =    "\n\n" \
-	"-i <file>           | --input=<file>\n" \
-	"[ -a <action field> | --act-mask=<action field> ]\n" \
-	"[ -A <action mask>  | --set-mask=<action mask> ]\n" \
-	"[ -b <traces>       | --batch=<traces> ]\n" \
-	"[ -d <file>         | --dump-binary=<file> ]\n" \
-	"[ -D <dir>          | --input-directory=<dir> ]\n" \
-	"[ -f <format>       | --format=<format> ]\n" \
-	"[ -F <spec>         | --format-spec=<spec> ]\n" \
-	"[ -h                | --hash-by-name ]\n" \
-	"[ -o <file>         | --output=<file> ]\n" \
-	"[ -O                | --no-text-output ]\n" \
-	"[ -q                | --quiet ]\n" \
-	"[ -s                | --per-program-stats ]\n" \
-	"[ -t                | --track-ios ]\n" \
-	"[ -w <time>         | --stopwatch=<time> ]\n" \
-	"[ -M                | --no-msgs\n" \
-	"[ -v                | --verbose ]\n" \
-	"[ -V                | --version ]\n\n" \
+	"-i <file>               | --input=<file>\n" \
+	"[ -a <action field>     | --act-mask=<action field> ]\n" \
+	"[ -A <action mask>      | --set-mask=<action mask> ]\n" \
+	"[ -b <traces>           | --batch=<traces> ]\n" \
+	"[ -d <file>             | --dump-binary=<file> ]\n" \
+	"[ -D <dir>              | --input-directory=<dir> ]\n" \
+	"[ -f <format>           | --format=<format> ]\n" \
+	"[ -F <spec>             | --format-spec=<spec> ]\n" \
+	"[ -h                    | --hash-by-name ]\n" \
+	"[ -o <file>             | --output=<file> ]\n" \
+	"[ -O                    | --no-text-output ]\n" \
+	"[ -q                    | --quiet ]\n" \
+	"[ -s                    | --per-program-stats ]\n" \
+	"[ -t                    | --track-ios ]\n" \
+	"[ -w <time>             | --stopwatch=<time> ]\n" \
+	"[ -M                    | --no-msgs\n" \
+	"[ -E                    | --use-extensions ]\n" \
+	"[ -y <action field ext> | --act-mask-ext=<action field ext>]\n" \
+	"[ -Y <action mask ext>  | --set-mask=<action mask>]\n" \
+	"[ -P                    | --track-priority ]\n" \
+	"[ -x <ioprio field>     | --prio-mask=<ioprio field> ]\n" \
+	"[ -X <ioprio mask>      | --set-mask=<ioprio mask> ]\n" \
+	"[ -v                    | --verbose ]\n" \
+	"[ -V                    | --version ]\n\n" \
  	"\t-a Only trace specified actions. See documentation\n" \
  	"\t-A Give trace mask as a single value. See documentation\n" \
  	"\t-b stdin read batching\n" \
@@ -2769,6 +3199,12 @@ static char usage_str[] =    "\n\n" \
  	"\t-w Only parse data between the given time interval in seconds.\n" \
  	"\t   If 'start' isn't given, blkparse defaults the start time to 0\n" \
  	"\t-M Do not output messages to binary file\n" \
+	"\t-E Use Blocktrace Extensions\n" \
+	"\t-P Enable tracking priorites.\n" \
+	"\t-y Only trace specified actions ext.\n" \
+	"\t-Y Give trace mask as a single value ext.\n" \
+	"\t-x Only priority specified actions.\n" \
+	"\t-X Give priority mask as a single value.\n" \
  	"\t-v More verbose for marginal errors\n" \
  	"\t-V Print program version info\n\n";

@@ -2882,8 +3365,27 @@ int main(int argc, char *argv[])

-	if (act_mask_tmp != 0)
-		act_mask = act_mask_tmp;
+	if (use_ext) {
+		if (act_mask_tmp) {
+			fprintf(stderr, "please use y or Y with -E\n");
+			return 1;
+		}
+		if (act_mask_tmp_ext != 0)
+			act_mask_ext = act_mask_tmp_ext;
+
+		/*
+		 * When track-priority is on and user didn't specify prio_mask then
+		 * trace all the classes.
+		 */
+		if (blkparse_track_prio && !blkparse_prio_mask)
+			blkparse_prio_mask = TRACE_ALL_IOPRIO;
+	} else {
+		if (act_mask_tmp != 0)
+			act_mask = act_mask_tmp;
+	}

diff --git a/blkparse_fmt.c b/blkparse_fmt.c
index c42e6d7..15c5f44 100644
--- a/blkparse_fmt.c
+++ b/blkparse_fmt.c

+static inline void fill_rwbs_ext(char *rwbs, struct blk_io_trace_ext *t)
+{
+	uint64_t w = t->action & BLK_TC_ACT_EXT(BLK_TC_WRITE);
+	uint64_t a = t->action & BLK_TC_ACT_EXT(BLK_TC_AHEAD);
+	uint64_t s = t->action & BLK_TC_ACT_EXT(BLK_TC_SYNC);
+	uint64_t m = t->action & BLK_TC_ACT_EXT(BLK_TC_META);
+	uint64_t d = t->action & BLK_TC_ACT_EXT(BLK_TC_DISCARD);
+	uint64_t f = t->action & BLK_TC_ACT_EXT(BLK_TC_FLUSH);
+	uint64_t u = t->action & BLK_TC_ACT_EXT(BLK_TC_FUA);
+	uint64_t z = t->action & BLK_TC_ACT_EXT(BLK_TC_WRITE_ZEROES);
+	uint64_t r = t->action & BLK_TC_ACT_EXT(BLK_TC_ZONE_RESET);
+	int i = 0;
+
+	if (f)
+		rwbs[i++] = 'F'; /* flush */
+
+	if (d)
+		rwbs[i++] = 'D';
+	else if (z) {
+		rwbs[i++] = 'W'; /* write-zeroes */
+		rwbs[i++] = 'Z';
+	} else if (r) {
+		rwbs[i++] = 'Z'; /* zone-reset */
+		rwbs[i++] = 'R';
+	} else if (w)
+		rwbs[i++] = 'W';
+	else if (t->bytes)
+		rwbs[i++] = 'R';
+	else
+		rwbs[i++] = 'N';
+
+	if (u)
+		rwbs[i++] = 'F'; /* fua */
+	if (a)
+		rwbs[i++] = 'A';
+	if (s)
+		rwbs[i++] = 'S';
+	if (m)
+		rwbs[i++] = 'M';
+
+	rwbs[i] = '\0';
+}
+

+static inline bool t_zone_reset(struct trace *t)
+{
+	if (!t->use_ext)
+		return false;
+
+	return t_bit_act(t) & BLK_TC_ACT_EXT(BLK_TC_ZONE_RESET) ? true : false;
+}

nice

  static void process_default(char *act, struct per_cpu_info *pci,
-			    struct blk_io_trace *t, unsigned long long elapsed,
+			    struct trace *t, unsigned long long elapsed,
  			    int pdu_len, unsigned char *pdu_buf)
  {

+	if (t->use_ext && blkparse_track_prio) {
+		switch (IOPRIO_PRIO_CLASS(t->bit_ext->ioprio)) {
+		case IOPRIO_CLASS_NONE:
+			if (blkparse_prio_mask & 0x1)
+				fprintf(ofp, "N ");
+			break;
+		case IOPRIO_CLASS_RT:
+			if (blkparse_prio_mask & 0x2)
+				fprintf(ofp, "R ");
+			break;
+		case IOPRIO_CLASS_BE:
+			if (blkparse_prio_mask & 0x4)
+				fprintf(ofp, "B ");
+			break;
+		case IOPRIO_CLASS_IDLE:
+			if (blkparse_prio_mask & 0x8)
+				fprintf(ofp, "I ");
+			break;
+		default:
+			fprintf(ofp, "E ");
+		}
+	}


--
Mit freundlichen Gruessen / Kind regards
Steffen Maier

Linux on IBM Z Development

https://www.ibm.com/privacy/us/en/
IBM Deutschland Research & Development GmbH
Vorsitzender des Aufsichtsrats: Matthias Hartmann
Geschaeftsfuehrung: Dirk Wittkopp
Sitz der Gesellschaft: Boeblingen
Registergericht: Amtsgericht Stuttgart, HRB 243294




[Index of Archives]     [Netdev]     [Linux Wireless]     [Kernel Newbies]     [Security]     [Linux for Hams]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux RAID]     [Linux Admin]     [Samba]

  Powered by Linux