Re: [Patch] blkiomon: I/O monitor

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hi Jens,
here has been another patch I would like to recommend warmly to you :-)
Your feedback is appreciated.

Thanks,
Martin


On Tue, 2008-09-30 at 21:37 +0200, Martin Peschke wrote:
> Jens,
> I'd like to ask you whether this is something you are interested in for
> inclusion in the blktrace tree.
> 
> We use it as part of a small I/O monitoring solution. But blkiomon
> itself is pretty much generic and implements just another way of looking
> at raw blktrace data, like btt or some statistics implemented in
> blkparse. So I would be happy to contribute it to blktrace.
> 
> I posted it way back in July and made some changes suggested by Alan.
> 
> Thanks,
> Martin
> 
> 
> 
> 
> blkiomon periodicaly generates per devive request size and request latency
> statistics from blktrace data. It provides histograms as well as data that
> can be used to calculate min, max, average and variance. For this purpose,
> it consumes D and C traces read from stdin.
> 
> There are options for binary output and human-readable output to files and
> stdout. Output to a message queue is supported as well.
> 
> #blktrace /dev/sdw -a issue -a complete -w 200 -o - | blkiomon -I 8 -h -
> 
> time: Tue Sep 30 17:39:25 2008
> device: 65,96
> requests: read 62, write 40, bidir: 0
> sizes: num 102, min 4096, max 430080, sum 13312000, squ 3102442782720,
>  avg 130509.8, var 13383296793.3
> d2c: num 102, min 393, max 14261, sum 359441, squ 2830211755, avg 3523.9,
>  var 15329081.8
> sizes histogram (in kB):
>             0:     0         1024:     0         2048:     0         4096:     6
>          8192:     0        16384:    15        32768:     4        65536:    24
>        131072:    11       262144:    30       524288:    12      1048576:     0
>       2097152:     0      4194304:     0      8388608:     0    > 8388608:     0
> d2c histogram (in usec):
>             0:     0            8:     0           16:     0           32:     0
>            64:     0          128:     0          256:     0          512:    13
>          1024:    21         2048:    27         4096:    14         8192:     8
>         16384:    19        32768:     0        65536:     0       131072:     0
>        262144:     0       524288:     0      1048576:     0      2097152:     0
>       4194304:     0      8388608:     0     16777216:     0     33554432:     0
>     >33554432:     0
> 
> time: Tue Sep 30 17:39:33 2008
> device: 65,96
> requests: read 312, write 47, bidir: 0
> sizes: num 359, min 4096, max 430080, sum 13197312, squ 1575816790016,
>  avg 36761.3, var 3038067547.5
> d2c: num 359, min 294, max 9211, sum 387134, squ 1262489694, avg 1078.4,
>  var 2353807.5
> sizes histogram (in kB):
>             0:     0         1024:     0         2048:     0         4096:    32
>          8192:    17        16384:   133        32768:    87        65536:    59
>        131072:     9       262144:    18       524288:     4      1048576:     0
>       2097152:     0      4194304:     0      8388608:     0    > 8388608:     0
> d2c histogram (in usec):
>             0:     0            8:     0           16:     0           32:     0
>            64:     0          128:     0          256:     0          512:   129
>          1024:   164         2048:    33         4096:    15         8192:    13
>         16384:     5        32768:     0        65536:     0       131072:     0
>        262144:     0       524288:     0      1048576:     0      2097152:     0
>       4194304:     0      8388608:     0     16777216:     0     33554432:     0
>     >33554432:     0
> 
> Signed-off-by: Martin Peschke <mp3@xxxxxxxxxx>
> ---
>  Makefile       |    5 
>  blkiomon.c     |  716 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++
>  blkiomon.h     |  105 ++++++++
>  doc/blkiomon.8 |  123 +++++++++
>  stats.h        |  155 ++++++++++++
>  5 files changed, 1103 insertions(+), 1 deletion(-)
> 
> Index: blktrace/blkiomon.c
> ===================================================================
> --- /dev/null
> +++ blktrace/blkiomon.c
> @@ -0,0 +1,708 @@
> +/*
> + * I/O monitor based on block queue trace data
> + *
> + * Copyright IBM Corp. 2008
> + *
> + * Author(s): Martin Peschke <mp3@xxxxxxxxxx>
> + *
> + *  This program is free software; you can redistribute it and/or modify
> + *  it under the terms of the GNU General Public License as published by
> + *  the Free Software Foundation; either version 2 of the License, or
> + *  (at your option) any later version.
> + *
> + *  This program is distributed in the hope that it will be useful,
> + *  but WITHOUT ANY WARRANTY; without even the implied warranty of
> + *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + *  GNU General Public License for more details.
> + *
> + *  You should have received a copy of the GNU General Public License
> + *  along with this program; if not, write to the Free Software
> + *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
> + */
> +
> +#include <sys/types.h>
> +#include <sys/stat.h>
> +#include <fcntl.h>
> +#include <unistd.h>
> +#include <stdio.h>
> +#include <stdlib.h>
> +#include <string.h>
> +#include <signal.h>
> +#include <getopt.h>
> +#include <errno.h>
> +#include <locale.h>
> +#include <libgen.h>
> +#include <sys/msg.h>
> +#include <pthread.h>
> +#include <time.h>
> +
> +#include "blktrace.h"
> +#include "rbtree.h"
> +#include "jhash.h"
> +#include "blkiomon.h"
> +
> +struct trace {
> +	struct blk_io_trace bit;
> +	struct rb_node node;
> +	struct trace *next;
> +	long sequence;
> +};
> +
> +struct rb_search {
> +	struct rb_node **node_ptr;
> +	struct rb_node *parent;
> +};
> +
> +struct dstat_msg {
> +	long mtype;
> +	struct blkiomon_stat stat;
> +};
> +
> +struct dstat {
> +	struct dstat_msg msg;
> +	struct rb_node node;
> +	struct dstat *next;
> +};
> +
> +struct output {
> +	char *fn;
> +	FILE *fp;
> +	char *buf;
> +	int pipe;
> +};
> +
> +static char blkiomon_version[] = "0.2";
> +
> +static FILE *ifp;
> +static int interval = -1;
> +
> +static struct trace *vacant_traces_list = NULL;
> +static int vacant_traces = 0;
> +static struct rb_root trace_tree = RB_ROOT;
> +
> +#define TRACE_HASH_SIZE 128
> +struct trace *thash[TRACE_HASH_SIZE] = {};
> +
> +static struct dstat *vacant_dstats_list = NULL;
> +static struct rb_root dstat_tree[2] = { RB_ROOT, RB_ROOT };
> +static struct dstat *dstat_list[2] = {};
> +static int dstat_curr = 0;
> +
> +static struct output human, binary, debug;
> +
> +static char *msg_q_name = NULL;
> +static int msg_q_id = -1, msg_q = -1;
> +static long msg_id = -1;
> +
> +static pthread_t interval_thread;
> +static pthread_mutex_t dstat_mutex = PTHREAD_MUTEX_INITIALIZER;
> +
> +int data_is_native = -1;
> +
> +static int up = 1;
> +
> +/* debugging */
> +static long leftover = 0, driverdata = 0, match = 0, mismatch = 0, sequence = 0;
> +
> +static void dump_bit(struct trace *t, const char *descr)
> +{
> +	struct blk_io_trace *bit = &t->bit;
> +
> +	if (!debug.fn)
> +		return;
> +
> +	fprintf(debug.fp, "--- %s ---\n", descr);
> +	fprintf(debug.fp, "magic    %16d\n", bit->magic);
> +	fprintf(debug.fp, "sequence %16d\n", bit->sequence);
> +	fprintf(debug.fp, "time     %16ld\n", (unsigned long)bit->time);
> +	fprintf(debug.fp, "sector   %16ld\n", (unsigned long)bit->sector);
> +	fprintf(debug.fp, "bytes    %16d\n", bit->bytes);
> +	fprintf(debug.fp, "action   %16x\n", bit->action);
> +	fprintf(debug.fp, "pid      %16d\n", bit->pid);
> +	fprintf(debug.fp, "device   %16d\n", bit->device);
> +	fprintf(debug.fp, "cpu      %16d\n", bit->cpu);
> +	fprintf(debug.fp, "error    %16d\n", bit->error);
> +	fprintf(debug.fp, "pdu_len  %16d\n", bit->pdu_len);
> +
> +	fprintf(debug.fp, "order    %16ld\n", t->sequence);
> +}
> +
> +static void dump_bits(struct trace *t1, struct trace *t2, const char *descr)
> +{
> +	struct blk_io_trace *bit1 = &t1->bit;
> +	struct blk_io_trace *bit2 = &t2->bit;
> +
> +	if (!debug.fn)
> +		return;
> +
> +	fprintf(debug.fp, "--- %s ---\n", descr);
> +	fprintf(debug.fp, "magic    %16d %16d\n", bit1->magic, bit2->magic);
> +	fprintf(debug.fp, "sequence %16d %16d\n",
> +		bit1->sequence, bit2->sequence);
> +	fprintf(debug.fp, "time     %16ld %16ld\n",
> +		(unsigned long)bit1->time, (unsigned long)bit2->time);
> +	fprintf(debug.fp, "sector   %16ld %16ld\n",
> +		(unsigned long)bit1->sector, (unsigned long)bit2->sector);
> +	fprintf(debug.fp, "bytes    %16d %16d\n", bit1->bytes, bit2->bytes);
> +	fprintf(debug.fp, "action   %16x %16x\n", bit1->action, bit2->action);
> +	fprintf(debug.fp, "pid      %16d %16d\n", bit1->pid, bit2->pid);
> +	fprintf(debug.fp, "device   %16d %16d\n", bit1->device, bit2->device);
> +	fprintf(debug.fp, "cpu      %16d %16d\n", bit1->cpu, bit2->cpu);
> +	fprintf(debug.fp, "error    %16d %16d\n", bit1->error, bit2->error);
> +	fprintf(debug.fp, "pdu_len  %16d %16d\n", bit1->pdu_len, bit2->pdu_len);
> +
> +	fprintf(debug.fp, "order    %16ld %16ld\n", t1->sequence, t2->sequence);
> +}
> +
> +static struct dstat *blkiomon_alloc_dstat(void)
> +{
> +	struct dstat *dstat;
> +
> +	if (vacant_dstats_list) {
> +		dstat = vacant_dstats_list;
> +		vacant_dstats_list = dstat->next;
> +	} else
> +		dstat = malloc(sizeof(*dstat));
> +	if (!dstat) {
> +		perror("blkiomon: could not allocate device statistic");
> +		return NULL;
> +	}
> +
> +	memset(dstat, 0, sizeof(*dstat));
> +	return dstat;
> +}
> +
> +static struct dstat *blkiomon_find_dstat(struct rb_search *search, __u32 device)
> +{
> +	struct rb_node **p = &(dstat_tree[dstat_curr].rb_node);
> +	struct rb_node *parent = NULL;
> +	struct dstat *dstat;
> +
> +	while (*p) {
> +		parent = *p;
> +
> +		dstat = rb_entry(parent, struct dstat, node);
> +
> +		if (dstat->msg.stat.device < device)
> +			p = &(*p)->rb_left;
> +		else if (dstat->msg.stat.device > device)
> +			p = &(*p)->rb_right;
> +		else
> +			return dstat;
> +	}
> +	search->node_ptr = p;
> +	search->parent = parent;
> +	return NULL;
> +}
> +
> +static struct dstat *blkiomon_get_dstat(__u32 device)
> +{
> +	struct dstat *dstat;
> +	struct rb_search search;
> +
> +	pthread_mutex_lock(&dstat_mutex);
> +
> +	dstat = blkiomon_find_dstat(&search, device);
> +	if (dstat)
> +		goto out;
> +
> +	dstat = blkiomon_alloc_dstat();
> +	if (!dstat)
> +		goto out;
> +
> +	dstat->msg.stat.device = device;
> +	dstat->msg.stat.size_mm.min = -1ULL;
> +	dstat->msg.stat.d2c_mm.min = -1ULL;
> +
> +	rb_link_node(&dstat->node, search.parent, search.node_ptr);
> +	rb_insert_color(&dstat->node, &dstat_tree[dstat_curr]);
> +
> +	dstat->next = dstat_list[dstat_curr];
> +	dstat_list[dstat_curr] = dstat;
> +
> +out:
> +	pthread_mutex_unlock(&dstat_mutex);
> +	return dstat;
> +}
> +
> +static int blkiomon_output_msg_q(struct dstat *dstat)
> +{
> +	if (!msg_q_name)
> +		return 0;
> +
> +	dstat->msg.mtype = msg_id;
> +	return msgsnd(msg_q, &dstat->msg, sizeof(struct blkiomon_stat), 0);
> +}
> +
> +static int blkiomon_output_binary(struct dstat *dstat)
> +{
> +	struct blkiomon_stat *p = &dstat->msg.stat;
> +
> +	if (!binary.fn)
> +		return 0;
> +
> +	if (fwrite(p, sizeof(*p), 1, binary.fp) != 1)
> +		goto failed;
> +	if (binary.pipe && fflush(binary.fp))
> +		goto failed;
> +	return 0;
> +
> +failed:
> +	fprintf(stderr, "blkiomon: could not write to %s\n", binary.fn);
> +	fclose(binary.fp);
> +	binary.fn = NULL;
> +	return 1;
> +}
> +
> +static struct dstat *blkiomon_output(struct dstat *head, struct timespec *ts)
> +{
> +	struct dstat *dstat, *tail = NULL;
> +
> +	for (dstat = head; dstat; dstat = dstat->next) {
> +		dstat->msg.stat.time = ts->tv_sec;
> +		blkiomon_stat_print(human.fp, &dstat->msg.stat);
> +		blkiomon_stat_to_be(&dstat->msg.stat);
> +		blkiomon_output_binary(dstat);
> +		blkiomon_output_msg_q(dstat);
> +		tail = dstat;
> +	}
> +	return tail;
> +}
> +
> +static void *blkiomon_interval(void *data)
> +{
> +	struct timespec wake, r;
> +	struct dstat *head, *tail;
> +	int finished;
> +
> +	clock_gettime(CLOCK_REALTIME, &wake);
> +
> +	while (1) {
> +		wake.tv_sec += interval;
> +		if (clock_nanosleep(CLOCK_REALTIME, TIMER_ABSTIME, &wake, &r)) {
> +			perror("blkiomon: interrupted sleep");
> +			continue;
> +		}
> +
> +		/* grab tree and make data gatherer build up another tree */
> +		pthread_mutex_lock(&dstat_mutex);
> +		finished = dstat_curr;
> +		dstat_curr = dstat_curr ? 0 : 1;
> +		pthread_mutex_unlock(&dstat_mutex);
> +
> +		head = dstat_list[finished];
> +		if (!head)
> +			continue;
> +		dstat_list[finished] = NULL;
> +		dstat_tree[finished] = RB_ROOT;
> +		tail = blkiomon_output(head, &wake);
> +
> +		pthread_mutex_lock(&dstat_mutex);
> +		tail->next = vacant_dstats_list;
> +		vacant_dstats_list = head;
> +		pthread_mutex_unlock(&dstat_mutex);
> +	}
> +	return data;
> +}
> +
> +#define BLK_DATADIR(a) (((a) >> BLK_TC_SHIFT) & (BLK_TC_READ | BLK_TC_WRITE))
> +
> +static int blkiomon_account(struct blk_io_trace *bit_d,
> +			    struct blk_io_trace *bit_c)
> +{
> +	struct dstat *dstat;
> +	struct blkiomon_stat *p;
> +	__u64 d2c = (bit_c->time - bit_d->time) / 1000; /* ns -> us */
> +	__u32 size = bit_d->bytes;
> +
> +	dstat = blkiomon_get_dstat(bit_d->device);
> +	if (!dstat)
> +		return 1;
> +	p = &dstat->msg.stat;
> +
> +	if (BLK_DATADIR(bit_c->action) & BLK_TC_READ)
> +		p->read++;
> +	else if (BLK_DATADIR(bit_c->action) & BLK_TC_WRITE)
> +		p->write++;
> +	else
> +		p->bidir++;
> +
> +	histlog2_account(p->size_hist, size, &size_hist);
> +	histlog2_account(p->d2c_hist, d2c, &d2c_hist);
> +	minmax_account(&p->size_mm, size);
> +	minmax_account(&p->d2c_mm, d2c);
> +	return 0;
> +}
> +
> +static struct trace *blkiomon_alloc_trace(void)
> +{
> +	struct trace *t = vacant_traces_list;
> +	if (t) {
> +		vacant_traces_list = t->next;
> +		vacant_traces--;
> +	} else
> +		t = malloc(sizeof(*t));
> +	memset(t, 0, sizeof(*t));
> +	return t;
> +}
> +
> +static void blkiomon_free_trace(struct trace *t)
> +{
> +	if (vacant_traces < 256) {
> +		t->next = vacant_traces_list;
> +		vacant_traces_list = t;
> +		vacant_traces++;
> +	} else
> +		free(t);
> +}
> +
> +static int action(int a)
> +{
> +	int bits = BLK_TC_WRITE | BLK_TC_READ | BLK_TC_FS | BLK_TC_PC;
> +	return a & (BLK_TC_ACT(bits));
> +}
> +
> +static void blkiomon_store_trace(struct trace *t)
> +{
> +	int i = t->bit.sector % TRACE_HASH_SIZE;
> +
> +	t->next = thash[i];
> +	thash[i] = t;
> +}
> +
> +static struct trace *blkiomon_fetch_trace(struct blk_io_trace *bit)
> +{
> +	int i = bit->sector % TRACE_HASH_SIZE;
> +	struct trace *t, *prev = NULL;
> +
> +	for (t = thash[i]; t; t = t->next) {
> +		if (t->bit.device == bit->device &&
> +		    t->bit.sector == bit->sector &&
> +		    action(t->bit.action) == action(bit->action)) {
> +			if (prev)
> +				prev->next = t->next;
> +			else
> +				thash[i] = t->next;
> +			return t;
> +		}
> +		prev = t;
> +	}
> +	return NULL;
> +}
> +
> +static struct trace *blkiomon_do_trace(struct trace *t)
> +{
> +	struct trace *t_stored, *t_old, *t_young;
> +
> +	/* store trace if there is no match yet */
> +	t_stored = blkiomon_fetch_trace(&t->bit);
> +	if (!t_stored) {
> +		blkiomon_store_trace(t);
> +		return blkiomon_alloc_trace();
> +	}
> +
> +	/* figure out older trace and younger trace */
> +	if (t_stored->bit.time < t->bit.time) {
> +		t_old = t_stored;
> +		t_young = t;
> +	} else {
> +		t_old = t;
> +		t_young = t_stored;
> +	}
> +
> +	/* we need an older D trace and a younger C trace */
> +	if (t_old->bit.action & BLK_TC_ACT(BLK_TC_ISSUE) &&
> +	    t_young->bit.action & BLK_TC_ACT(BLK_TC_COMPLETE)) {
> +		/* matching D and C traces - update statistics */
> +		match++;
> +		blkiomon_account(&t_old->bit, &t_young->bit);
> +		blkiomon_free_trace(t_stored);
> +		return t;
> +	}
> +
> +	/* no matching D and C traces - keep more recent trace */
> +	dump_bits(t_old, t_young, "mismatch");
> +	mismatch++;
> +	blkiomon_store_trace(t_young);
> +	return t_old;
> +}
> +
> +static int blkiomon_do_fifo(void)
> +{
> +	struct trace *t;
> +	struct blk_io_trace *bit;
> +	void *pdu_buf = NULL;
> +
> +	t = blkiomon_alloc_trace();
> +	if (!t)
> +		return 1;
> +	bit = &t->bit;
> +
> +	while (up) {
> +		if (fread(bit, sizeof(*bit), 1, ifp) != 1) {
> +			if (!feof(ifp))
> +				fprintf(stderr,
> +					"blkiomon: could not read trace");
> +			break;
> +		}
> +		if (ferror(ifp)) {
> +			clearerr(ifp);
> +			perror("blkiomon: error while reading trace");
> +			break;
> +		}
> +
> +		if (data_is_native == -1 && check_data_endianness(bit->magic))
> +			break;
> +
> +		/* endianess */
> +		trace_to_cpu(bit);
> +		if (verify_trace(bit)) {
> +			perror("blkiomon: bad trace");
> +			break;
> +		}
> +
> +		/* read additional trace payload */
> +		if (bit->pdu_len) {
> +			pdu_buf = realloc(pdu_buf, bit->pdu_len);
> +			if (fread(pdu_buf, bit->pdu_len, 1, ifp) != 1) {
> +				clearerr(ifp);
> +				perror("blkiomon: could not read payload");
> +				break;
> +			}
> +		}
> +
> +		t->sequence = sequence++;
> +
> +		if (!(bit->action & BLK_TC_ACT(BLK_TC_ISSUE | BLK_TC_COMPLETE)))
> +			continue;
> +
> +		/* try to find matching trace and update statistics */
> +		t = blkiomon_do_trace(t);
> +		if (!t)
> +			break;
> +		bit = &t->bit;
> +		/* t and bit will be recycled for next incoming trace */
> +	}
> +	blkiomon_free_trace(t);
> +	free(pdu_buf);
> +	return 0;
> +}
> +
> +static int blkiomon_open_output(struct output *out)
> +{
> +	int mode, vbuf_size;
> +
> +	if (!out->fn)
> +		return 0;
> +
> +	if (!strcmp(out->fn, "-")) {
> +		out->fp = fdopen(STDOUT_FILENO, "w");
> +		mode = _IOLBF;
> +		vbuf_size = 4096;
> +		out->pipe = 1;
> +	} else {
> +		out->fp = fopen(out->fn, "w");
> +		mode = _IOFBF;
> +		vbuf_size = 128 * 1024;
> +		out->pipe = 0;
> +	}
> +	if (!out->fp)
> +		goto failed;
> +	out->buf = malloc(128 * 1024);
> +	if (setvbuf(out->fp, out->buf, mode, vbuf_size))
> +		goto failed;
> +	return 0;
> +
> +failed:
> +	fprintf(stderr, "blkiomon: could not write to %s\n", out->fn);
> +	out->fn = NULL;
> +	free(out->buf);
> +	return 1;
> +}
> +
> +static int blkiomon_open_msg_q(void)
> +{
> +	key_t key;
> +
> +	if (!msg_q_name)
> +		return 0;
> +	if (!msg_q_id || msg_id <= 0)
> +		return 1;
> +	key = ftok(msg_q_name, msg_q_id);
> +	if (key == -1)
> +		return 1;
> +	while (up) {
> +		msg_q = msgget(key, S_IRWXU);
> +		if (msg_q >= 0)
> +			break;
> +	}
> +	return (msg_q >= 0 ? 0 : -1);
> +}
> +
> +static void blkiomon_debug(void)
> +{
> +	struct rb_node *n;
> +	struct trace *t;
> +
> +	if (!debug.fn)
> +		return;
> +
> +	for (n = rb_first(&trace_tree); n; n = rb_next(n)) {
> +		t = rb_entry(n, struct trace, node);
> +		dump_bit(t, "leftover");
> +		leftover++;
> +	}
> +	fprintf(debug.fp, "%ld leftover, %ld match, %ld mismatch, "
> +		"%ld driverdata, %ld overall\n",
> +		leftover, match, mismatch, driverdata, sequence);
> +}
> +
> +#define S_OPTS "b:D:h:I:Q:q:m:V"
> +
> +static char usage_str[] = "\n\nblkiomon " \
> +	"-I <interval>       | --interval=<interval>\n" \
> +	"[ -h <file>         | --human-readable=<file> ]\n" \
> +	"[ -b <file>         | --binary=<file> ]\n" \
> +	"[ -D <file>         | --debug=<file> ]\n" \
> +	"[ -Q <path name>    | --msg-queue-name=<path name>]\n" \
> +	"[ -q <msg queue id> | --msg-queue-id=<msg queue id>]\n" \
> +	"[ -m <msg id>       | --msg-id=<msg id>]\n" \
> +	"[ -V                | --version ]\n\n" \
> +	"\t-I   Sample interval.\n" \
> +	"\t-h   Human-readable output file.\n" \
> +	"\t-b   Binary output file.\n" \
> +	"\t-D   Output file for debugging data.\n" \
> +	"\t-Qqm Output to message queue using given ID for messages.\n" \
> +	"\t-V   Print program version.\n\n";
> +
> +static struct option l_opts[] = {
> +	{
> +		.name = "human-readable",
> +		.has_arg = required_argument,
> +		.flag = NULL,
> +		.val = 'h'
> +	},
> +	{
> +		.name = "binary",
> +		.has_arg = required_argument,
> +		.flag = NULL,
> +		.val = 'b'
> +	},
> +	{
> +		.name = "debug",
> +		.has_arg = required_argument,
> +		.flag = NULL,
> +		.val = 'D'
> +	},
> +	{
> +		.name = "interval",
> +		.has_arg = required_argument,
> +		.flag = NULL,
> +		.val = 'I'
> +	},
> +	{
> +		.name = "msg-queue",
> +		.has_arg = required_argument,
> +		.flag = NULL,
> +		.val = 'Q'
> +	},
> +	{
> +		.name = "msg-queue-id",
> +		.has_arg = required_argument,
> +		.flag = NULL,
> +		.val = 'q'
> +	},
> +	{
> +		.name = "msg-id",
> +		.has_arg = required_argument,
> +		.flag = NULL,
> +		.val = 'm'
> +	},
> +	{
> +		.name = "version",
> +		.has_arg = no_argument,
> +		.flag = NULL,
> +		.val = 'V'
> +	},
> +	{
> +		.name = NULL,
> +	}
> +};
> +
> +static void blkiomon_signal(int signal)
> +{
> +	fprintf(stderr, "blkiomon: terminated by signal\n");
> +	up = signal & 0;
> +}
> +
> +int main(int argc, char *argv[])
> +{
> +	int c;
> +
> +	signal(SIGALRM, blkiomon_signal);
> +	signal(SIGINT, blkiomon_signal);
> +	signal(SIGTERM, blkiomon_signal);
> +	signal(SIGQUIT, blkiomon_signal);
> +
> +	while ((c = getopt_long(argc, argv, S_OPTS, l_opts, NULL)) != -1) {
> +		switch (c) {
> +		case 'h':
> +			human.fn = optarg;
> +			break;
> +		case 'b':
> +			binary.fn = optarg;
> +			break;
> +		case 'D':
> +			debug.fn = optarg;
> +			break;
> +		case 'I':
> +			interval = atoi(optarg);
> +			break;
> +		case 'Q':
> +			msg_q_name = optarg;
> +			break;
> +		case 'q':
> +			msg_q_id = atoi(optarg);
> +			break;
> +		case 'm':
> +			msg_id = atoi(optarg);
> +			break;
> +		case 'V':
> +			printf("%s version %s\n", argv[0], blkiomon_version);
> +			return 0;
> +		default:
> +			fprintf(stderr, "Usage: %s", usage_str);
> +			return 1;
> +		}
> +	}
> +
> +	if (interval <= 0) {
> +		fprintf(stderr, "Usage: %s", usage_str);
> +		return 1;
> +	}
> +
> +	ifp = fdopen(STDIN_FILENO, "r");
> +	if (!ifp) {
> +		perror("blkiomon: could not open stdin for reading");
> +		return 1;
> +	}
> +
> +	if (blkiomon_open_output(&human))
> +		return 1;
> +	if (blkiomon_open_output(&binary))
> +		return 1;
> +	if (blkiomon_open_output(&debug))
> +		return 1;
> +	if (blkiomon_open_msg_q())
> +		return 1;
> +
> +	if (pthread_create(&interval_thread, NULL, blkiomon_interval, NULL)) {
> +		perror("blkiomon: could not create thread");
> +		return 1;
> +	}
> +
> +	blkiomon_do_fifo();
> +
> +	blkiomon_debug();
> +	return 0;
> +}
> Index: blktrace/Makefile
> ===================================================================
> --- blktrace.orig/Makefile
> +++ blktrace/Makefile
> @@ -1,7 +1,7 @@
>  CC	= gcc
>  CFLAGS	= -Wall -O2 -g -W
>  ALL_CFLAGS = $(CFLAGS) -D_GNU_SOURCE -D_LARGEFILE_SOURCE -D_FILE_OFFSET_BITS=64
> -PROGS	= blkparse blktrace verify_blkparse blkrawverify
> +PROGS	= blkparse blktrace verify_blkparse blkrawverify blkiomon
>  LIBS	= -lpthread
>  SCRIPTS	= btrace
> 
> @@ -34,6 +34,9 @@ verify_blkparse: verify_blkparse.o
>  blkrawverify: blkrawverify.o
>  	$(CC) $(ALL_CFLAGS) -o $@ $(filter %.o,$^)
> 
> +blkiomon: blkiomon.o rbtree.o
> +	$(CC) $(ALL_CFLAGS) -o $@ $(filter %.o,$^) $(LIBS) -lrt
> +
>  $(PROGS): | depend
> 
>  docs:
> Index: blktrace/blkiomon.h
> ===================================================================
> --- /dev/null
> +++ blktrace/blkiomon.h
> @@ -0,0 +1,105 @@
> +/*
> + * I/O monitor based on block queue trace data
> + *
> + * Copyright IBM Corp. 2008
> + *
> + * Author(s): Martin Peschke <mp3@xxxxxxxxxx>
> + *
> + *  This program is free software; you can redistribute it and/or modify
> + *  it under the terms of the GNU General Public License as published by
> + *  the Free Software Foundation; either version 2 of the License, or
> + *  (at your option) any later version.
> + *
> + *  This program is distributed in the hope that it will be useful,
> + *  but WITHOUT ANY WARRANTY; without even the implied warranty of
> + *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + *  GNU General Public License for more details.
> + *
> + *  You should have received a copy of the GNU General Public License
> + *  along with this program; if not, write to the Free Software
> + *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
> + */
> +
> +#ifndef BLKIOMON_H
> +#define BLKIOMON_H
> +
> +#include <string.h>
> +
> +#include "stats.h"
> +#include "blktrace.h"
> +
> +#define BLKIOMON_SIZE_BUCKETS 16
> +#define BLKIOMON_D2C_BUCKETS 25
> +struct blkiomon_stat {
> +	__u64 time;
> +	__u32 size_hist[BLKIOMON_SIZE_BUCKETS];
> +	__u32 d2c_hist[BLKIOMON_D2C_BUCKETS];
> +	struct minmax size_mm;
> +	struct minmax d2c_mm;
> +	__u64 read;
> +	__u64 write;
> +	__u64 bidir;
> +	__u32 device;
> +};
> +
> +static struct histlog2 size_hist = {
> +	.first = 0,
> +	.delta = 1024,
> +	.num = BLKIOMON_SIZE_BUCKETS
> +};
> +
> +static struct histlog2 d2c_hist = {
> +	.first = 0,
> +	.delta = 8,
> +	.num = BLKIOMON_D2C_BUCKETS
> +};
> +
> +static inline void blkiomon_stat_init(struct blkiomon_stat *bstat)
> +{
> +	memset(bstat, 0, sizeof(*bstat));
> +	minmax_init(&bstat->size_mm);
> +	minmax_init(&bstat->d2c_mm);
> +}
> +
> +static inline void blkiomon_stat_to_be(struct blkiomon_stat *bstat)
> +{
> +	histlog2_to_be(bstat->size_hist, &size_hist);
> +	histlog2_to_be(bstat->d2c_hist, &d2c_hist);
> +	minmax_to_be(&bstat->size_mm);
> +	minmax_to_be(&bstat->d2c_mm);
> +	bstat->read = cpu_to_be64(bstat->read);
> +	bstat->write = cpu_to_be64(bstat->write);
> +	bstat->bidir = cpu_to_be64(bstat->bidir);
> +	bstat->time = cpu_to_be64(bstat->time);
> +	bstat->device = cpu_to_be32(bstat->device);
> +}
> +
> +static inline void blkiomon_stat_merge(struct blkiomon_stat *dst,
> +				       struct blkiomon_stat *src)
> +{
> +	histlog2_merge(&size_hist, dst->size_hist, src->size_hist);
> +	histlog2_merge(&d2c_hist, dst->d2c_hist, src->d2c_hist);
> +	minmax_merge(&dst->size_mm, &src->size_mm);
> +	minmax_merge(&dst->d2c_mm, &src->d2c_mm);
> +	dst->read += src->read;
> +	dst->write += src->write;
> +	dst->bidir += src->bidir;
> +}
> +
> +static inline void blkiomon_stat_print(FILE *fp, struct blkiomon_stat *p)
> +{
> +	if (!fp)
> +		return;
> +
> +	fprintf(fp, "\ntime: %s", ctime((void *)&p->time));
> +	fprintf(fp, "device: %d,%d\n", MAJOR(p->device), MINOR(p->device));
> +	fprintf(fp, "requests: read %ld, write %ld, bidir: %ld\n",
> +		(unsigned long)p->read, (unsigned long)p->write,
> +		(unsigned long)p->bidir);
> +	minmax_print(fp, "sizes", &p->size_mm);
> +	minmax_print(fp, "d2c", &p->d2c_mm);
> +	histlog2_print(fp, "sizes histogram (in kB)", p->size_hist, &size_hist);
> +	histlog2_print(fp, "d2c histogram (in usec)", p->d2c_hist, &d2c_hist);
> +}
> +
> +#endif
> Index: blktrace/doc/blkiomon.8
> ===================================================================
> --- /dev/null
> +++ blktrace/doc/blkiomon.8
> @@ -0,0 +1,116 @@
> +.TH BLKIOMON 8 "July 17, 2008" "" ""
> +
> +
> +.SH NAME
> +blkiomon \- monitor block device I/O based o blktrace data
> +
> +
> +.SH SYNOPSIS
> +.B blkiomon \-I \fIinterval\fR [ \-h \fIfile\fR ] [ \-b \fIfile\fR ]
> +[ \-D \fIfile\fR ] [ \-Q \fIpath_name\fR
> +\-q \fImsg_queue_id\fR \-m \fImsg_id\fR ] [ \-V ]
> +.br
> +
> +
> +.SH DESCRIPTION
> +blkiomon is a block device I/O monitor. It periodically generates per device
> +request size and request latency statistics from blktrace data. It provides
> +histograms as well as data that can be used to calculate min, max, average
> +and variance. For this purpose, it consumes D and C traces read from stdin.
> +
> +There are options for binary output and human-readable output to files and
> +stdout. Output to a message queue is supported as well.
> +
> +There is no need to use blkparse with blkiomon. blkiomon is capable of
> +consuming binary output written to stdout by blktrace.
> +
> +
> +.SH OPTIONS
> +
> +\-I \fIinterval\fR
> +.br
> +\-\-interval=\fIinterval\fR
> +.RS
> +Set sample interval
> +.RE
> +
> +\-h \fIfile\fR
> +.br
> +\-\-human\-readable=\fIfile\fR
> +.RS
> +Human-readable output file. Use '\-' for stdout.
> +.RE
> +
> +\-b \fIfile\fR
> +.br
> +\-\-binary=\fIfile\fR
> +.RS
> +Binary output file. Use '\-' for stdout.
> +.RE
> +
> +\-D \fIfile\fR
> +.br
> +\-\-debug=\fIfile\fR
> +.RS
> +Output file for debugging data. Use '\-' for stdout.
> +.RE
> +
> +\-Q \fIpath_name\fR
> +.br
> +\-\-msg\-queue\-name=\fIpath_name\fR
> +.RS
> +Sets \fIpath_name\fR as path name for existing message queue to be used
> +for binary output.
> +.RE
> +
> +\-q \fImsg_queue_id\fR
> +.br
> +\-\-msg\-queue\-id=\fImsg_queue_id\fR
> +.RS
> +Sets \fImsg_queue_id\fR as ID for an existing message queue to be used
> +for binary output.
> +.RE
> +
> +\-m \fImsg_id\fR
> +.br
> +\-\-msg\-id=\fImsg_id\fR
> +.RS
> +Sets \fImsg_id\fR as message identifier to be used for binary output
> +messages written to an existing message queue.
> +.RE
> +
> +\-V
> +.br
> +\-\-version
> +.RS
> +Print program version.
> +.RE
> +
> +
> +.SH EXAMPLES
> +To get I/O statistics for /dev/sdw every 10 seconds for a period of one hour,
> +use the following command:
> +
> +    % blktrace /dev/sdw -a issue -a complete -w 3600 -o - | blkiomon -I 10 -h -
> +
> +
> +.SH AUTHORS
> +blkiomon and this man page were written by Martin Peschke.
> +
> +
> +.SH "REPORTING BUGS"
> +Report bugs to <linux\-btrace@xxxxxxxxxxxxxxx>
> +
> +
> +.SH COPYRIGHT
> +Copyright \(co 2008 IBM Corp.
> +.br
> +This is free software.  You may redistribute copies of it under the terms of
> +the GNU General Public License <http://www.gnu.org/licenses/gpl.html>.
> +There is NO WARRANTY, to the extent permitted by law.
> +
> +
> +.SH "SEE ALSO"
> +btrace (8), blktrace (8), blkparse (1), verify_blkparse (1), blkrawverify (1),
> +btt (1)
> +
> Index: blktrace/stats.h
> ===================================================================
> --- /dev/null
> +++ blktrace/stats.h
> @@ -0,0 +1,155 @@
> +/*
> + * Copyright IBM Corp. 2008
> + *
> + * Author(s): Martin Peschke <mp3@xxxxxxxxxx>
> + *            Stefan Raspl <stefan.raspl@xxxxxxxxxx>
> + *
> + *  This program is free software; you can redistribute it and/or modify
> + *  it under the terms of the GNU General Public License as published by
> + *  the Free Software Foundation; either version 2 of the License, or
> + *  (at your option) any later version.
> + *
> + *  This program is distributed in the hope that it will be useful,
> + *  but WITHOUT ANY WARRANTY; without even the implied warranty of
> + *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + *  GNU General Public License for more details.
> + *
> + *  You should have received a copy of the GNU General Public License
> + *  along with this program; if not, write to the Free Software
> + *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
> + */
> +
> +#ifndef STATS_H
> +#define STATS_H
> +
> +#include <linux/types.h>
> +#include "endian.h"
> +
> +struct minmax {
> +	__u64 min;
> +	__u64 max;
> +	__u64 sum;
> +	__u64 sos;
> +	__u64 num;
> +};
> +
> +static inline void minmax_init(struct minmax *mm)
> +{
> +	mm->min = -1ULL;
> +	mm->max = 0;
> +	mm->sum = 0;
> +	mm->sos = 0;
> +	mm->num = 0;
> +}
> +
> +static inline void minmax_account(struct minmax *mm, __u64 value)
> +{
> +	mm->sum += value;
> +	mm->sos += value * value;
> +	if (value < mm->min)
> +		mm->min = value;
> +	if (value > mm->max)
> +		mm->max = value;
> +	mm->num++;
> +}
> +
> +static inline void minmax_merge(struct minmax *dst, struct minmax *src)
> +{
> +	dst->sum += src->sum;
> +	dst->sos += src->sos;
> +	if (src->min < dst->min)
> +		dst->min = src->min;
> +	if (src->max > dst->max)
> +		dst->max = src->max;
> +	dst->num += src->num;
> +}
> +
> +static inline void minmax_to_be(struct minmax *mm)
> +{
> +	mm->sum = cpu_to_be64(mm->sum);
> +	mm->sos = cpu_to_be64(mm->sos);
> +	mm->min = cpu_to_be64(mm->min);
> +	mm->max = cpu_to_be64(mm->max);
> +	mm->num = cpu_to_be64(mm->num);
> +}
> +
> +static inline double minmax_avg(struct minmax *mm)
> +{
> +	return (mm->sum / (double)mm->num);
> +}
> +
> +static inline double minmax_var(struct minmax *mm)
> +{
> +	double num = (double)mm->num;
> +
> +	return ((mm->sos - ((mm->sum * mm->sum) / num)) / num);
> +}
> +
> +static inline int minmax_print(FILE *fp, const char *s, struct minmax *mm)
> +{
> +	return fprintf(fp, "%s: num %Ld, min %Ld, max %Ld, sum %Ld, squ %Ld, "
> +		       "avg %.1f, var %.1f\n", s, (unsigned long long)mm->num,
> +		       (unsigned long long)mm->min, (unsigned long long)mm->max,
> +		       (unsigned long long)mm->sum, (unsigned long long)mm->sos,
> +		       minmax_avg(mm), minmax_var(mm));
> +}
> +
> +struct histlog2 {
> +	int first;
> +	int delta;
> +	int num;
> +};
> +
> +static inline __u64 histlog2_upper_limit(int index, struct histlog2 *h)
> +{
> +	return h->first + (index ? h->delta << (index - 1) : 0);
> +}
> +
> +static inline int histlog2_index(__u64 val, struct histlog2 *h)
> +{
> +	int i;
> +
> +	for (i = 0; i < (h->num - 1) && val > histlog2_upper_limit(i, h); i++);
> +	return i;
> +}
> +
> +static inline void histlog2_account(__u32 *bucket, __u32 val,
> +				    struct histlog2 *h)
> +{
> +	int index = histlog2_index(val, h);
> +	bucket[index]++;
> +}
> +
> +static inline void histlog2_merge(struct histlog2 *h, __u32 *dst, __u32 *src)
> +{
> +	int i;
> +
> +	for (i = 0; i < h->num - 1; i++)
> +		dst[i] += src[i];
> +}
> +
> +static inline void histlog2_to_be(__u32 a[], struct histlog2 *h)
> +{
> +	int i;
> +
> +	for (i = 0; i < h->num - 1; i++)
> +		a[i] = cpu_to_be32(a[i]);
> +}
> +
> +static inline void histlog2_print(FILE *fp, const char *s, __u32 a[],
> +				  struct histlog2 *h)
> +{
> +	int i;
> +
> +	fprintf(fp, "%s:\n", s);
> +	for (i = 0; i < h->num - 1; i++) {
> +		fprintf(fp, "   %10ld:%6d",
> +			(unsigned long)(histlog2_upper_limit(i, h)), a[i]);
> +		if (!((i + 1) % 4))
> +			fprintf(fp, "\n");
> +	}
> +	fprintf(fp, "    >%8ld:%6d\n",
> +		(unsigned long)(histlog2_upper_limit(i - 1, h)), a[i]);
> +}
> +
> +#endif
> 
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-btrace" in
> the body of a message to majordomo@xxxxxxxxxxxxxxx
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

--
To unsubscribe from this list: send the line "unsubscribe linux-btrace" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Index of Archives]     [Netdev]     [Linux Wireless]     [Kernel Newbies]     [Security]     [Linux for Hams]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux RAID]     [Linux Admin]     [Samba]

  Powered by Linux