Recent changes (master)

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



The following changes since commit a5a4fdfd44ec1b55ebab7800a931c148540a7324:

  zipf: use 64-bit safe hash for zipf/pareto (2012-11-11 08:27:24 +0100)

are available in the git repository at:
  git://git.kernel.dk/fio.git master

Jens Axboe (3):
      genzipf: add size and percentage hit rates
      JSON: fix escape of '"' and '\' characters
      genzipf: use regular array + qsort() instead of rbtree

 Makefile    |    2 +-
 json.c      |   27 +++++++++-
 t/genzipf.c |  176 +++++++++++++++++++++++++++++++----------------------------
 3 files changed, 119 insertions(+), 86 deletions(-)

---

Diff of recent changes:

diff --git a/Makefile b/Makefile
index 0db757b..3589770 100644
--- a/Makefile
+++ b/Makefile
@@ -79,7 +79,7 @@ T_IEEE_OBJS += lib/ieee754.o
 T_IEEE_PROGS = t/ieee754
 
 T_ZIPF_OBS = t/genzipf.o
-T_ZIPF_OBJS += rbtree.o t/log.o lib/ieee754.o lib/rand.o lib/zipf.o t/genzipf.o
+T_ZIPF_OBJS += t/log.o lib/ieee754.o lib/rand.o lib/zipf.o t/genzipf.o
 T_ZIPF_PROGS = t/genzipf
 
 T_OBJS = $(T_SMALLOC_OBJS)
diff --git a/json.c b/json.c
index 8efbbda..ea61af7 100644
--- a/json.c
+++ b/json.c
@@ -57,13 +57,38 @@ static struct json_value *json_create_value_float(float number)
 	return value;
 }
 
+static char *strdup_escape(const char *str)
+{
+	const char *input = str;
+	char *p, *ret;
+	int escapes;
+
+	escapes = 0;
+	while ((input = strpbrk(input, "\\\"")) != NULL) {
+		escapes++;
+		input++;
+	}
+
+	p = ret = malloc(strlen(str) + escapes);
+	while (*str) {
+		if (*str == '\\' || *str == '\"')
+			*p++ = '\\';
+		*p++ = *str++;
+	}
+
+	return ret;
+}
+
+/*
+ * Valid JSON strings must escape '"' and '/' with a preceeding '/'
+ */
 static struct json_value *json_create_value_string(const char *str)
 {
 	struct json_value *value = malloc(sizeof(struct json_value));
 
 	if (value) {
 		value->type = JSON_TYPE_STRING;
-		value->string = strdup(str);
+		value->string = strdup_escape(str);
 		if (!value->string) {
 			free(value);
 			value = NULL;
diff --git a/t/genzipf.c b/t/genzipf.c
index fc4bdc9..2d1b107 100644
--- a/t/genzipf.c
+++ b/t/genzipf.c
@@ -28,7 +28,6 @@
 
 struct node {
 	struct flist_head list;
-	struct rb_node rb;
 	unsigned long long val;
 	unsigned long hits;
 };
@@ -36,7 +35,6 @@ struct node {
 static struct flist_head *hash;
 static unsigned long hash_bits = 24;
 static unsigned long hash_size = 1 << 24;
-static struct rb_root rb;
 
 enum {
 	TYPE_NONE = 0,
@@ -47,8 +45,9 @@ static const char *dist_types[] = { "None", "Zipf", "Pareto" };
 
 static int dist_type = TYPE_ZIPF;
 static unsigned long gb_size = 500;
-static unsigned long nranges = DEF_NR;
+static unsigned long block_size = 4096;
 static unsigned long output_nranges = DEF_NR_OUTPUT;
+static double percentage;
 static double dist_val;
 
 #define DEF_ZIPF_VAL	1.2
@@ -69,72 +68,29 @@ static struct node *hash_lookup(unsigned long long val)
 	return NULL;
 }
 
-static void hash_insert(unsigned long long val)
+static struct node *hash_insert(struct node *n, unsigned long long val)
 {
 	struct flist_head *l = &hash[hash_long(val, hash_bits)];
-	struct node *n = malloc(sizeof(*n));
 
 	n->val = val;
 	n->hits = 1;
 	flist_add_tail(&n->list, l);
-}
-
-static void rb_insert(struct node *n)
-{
-	struct rb_node **p, *parent;
-
-	memset(&n->rb, 0, sizeof(n->rb));
-	p = &rb.rb_node;
-	parent = NULL;
-	while (*p) {
-		struct node *__n;
-
-		parent = *p;
-		__n = rb_entry(parent, struct node, rb);
-		if (n->hits > __n->hits)
-			p = &(*p)->rb_left;
-		else
-			p = &(*p)->rb_right;
-	}
-
-	rb_link_node(&n->rb, parent, p);
-	rb_insert_color(&n->rb, &rb);
-}
-
-static unsigned long rb_add(struct flist_head *list)
-{
-	struct flist_head *entry;
-	unsigned long ret = 0;
-	struct node *n;
-
-	flist_for_each(entry, list) {
-		n = flist_entry(entry, struct node, list);
-
-		rb_insert(n);
-		ret++;
-	}
-
-	return ret;
-}
-
-static unsigned long rb_gen(void)
-{
-	unsigned long ret = 0;
-	unsigned int i;
-
-	for (i = 0; i < hash_size; i++)
-		ret += rb_add(&hash[i]);
-
-	return ret;
+	return n;
 }
 
 static int parse_options(int argc, char *argv[])
 {
-	const char *optstring = "t:g:i:r:o:";
+	const char *optstring = "t:g:i:o:b:p:";
 	int c, dist_val_set = 0;
 
 	while ((c = getopt(argc, argv, optstring)) != -1) {
 		switch (c) {
+		case 'p':
+			percentage = atof(optarg);
+			break;
+		case 'b':
+			block_size = strtoul(optarg, NULL, 10);
+			break;
 		case 't':
 			if (!strncmp(optarg, "zipf", 4))
 				dist_type = TYPE_ZIPF;
@@ -152,9 +108,6 @@ static int parse_options(int argc, char *argv[])
 			dist_val = atof(optarg);
 			dist_val_set = 1;
 			break;
-		case 'r':
-			nranges = strtoul(optarg, NULL, 10);
-			break;
 		case 'o':
 			output_nranges = strtoul(optarg, NULL, 10);
 			break;
@@ -183,19 +136,36 @@ static int parse_options(int argc, char *argv[])
 	return 0;
 }
 
+struct output_sum {
+	double output;
+	unsigned int nranges;
+};
+
+static int node_cmp(const void *p1, const void *p2)
+{
+	const struct node *n1 = p1;
+	const struct node *n2 = p2;
+
+	return n2->hits - n1->hits;
+}
+
 int main(int argc, char *argv[])
 {
 	unsigned long offset;
-	unsigned long i, j, nr_vals, cur_vals, interval;
-	double *output, perc, perc_i;
+	unsigned long i, j, k, nr_vals, cur_vals, interval, total_vals, nnodes;
+	unsigned long long nranges;
+	struct output_sum *output_sums;
+	struct node *nodes;
+	double perc, perc_i;
 	struct zipf_state zs;
-	struct rb_node *n;
 
 	if (parse_options(argc, argv))
 		return 1;
 
-	printf("Generating %s distribution with %f input and %lu ranges.\n", dist_types[dist_type], dist_val, nranges);
-	printf("Using device gb=%lu\n\n", gb_size);
+	printf("Generating %s distribution with %f input and %lu GB size and %lu block_size.\n", dist_types[dist_type], dist_val, gb_size, block_size);
+
+	nranges = gb_size * 1024 * 1024 * 1024ULL;
+	nranges /= block_size;
 
 	if (dist_type == TYPE_ZIPF)
 		zipf_init(&zs, nranges, dist_val, 1);
@@ -213,7 +183,9 @@ int main(int argc, char *argv[])
 	for (i = 0; i < hash_size; i++)
 		INIT_FLIST_HEAD(&hash[i]);
 
-	for (nr_vals = 0, i = 0; i < nranges; i++) {
+	nodes = malloc(nranges * sizeof(struct node));
+
+	for (nr_vals = i = j = 0; i < nranges; i++) {
 		struct node *n;
 
 		if (dist_type == TYPE_ZIPF)
@@ -224,56 +196,92 @@ int main(int argc, char *argv[])
 		n = hash_lookup(offset);
 		if (n)
 			n->hits++;
-		else
-			hash_insert(offset);
+		else {
+			hash_insert(&nodes[j], offset);
+			j++;
+		}
 
 		nr_vals++;
 	}
 
-	nr_vals = rb_gen();
+	qsort(nodes, j, sizeof(struct node), node_cmp);
+	nnodes = j;
+	nr_vals = nnodes;
 
 	interval = (nr_vals + output_nranges - 1) / output_nranges;
 
-	output = malloc(output_nranges * sizeof(double));
+	output_sums = malloc(output_nranges * sizeof(struct output_sum));
+	for (i = 0; i < output_nranges; i++) {
+		output_sums[i].output = 0.0;
+		output_sums[i].nranges = 1;
+	}
 
-	i = j = cur_vals = 0;
+	total_vals = i = j = cur_vals = 0;
 	
-	n = rb_first(&rb);
-	while (n) {
-		struct node *node = rb_entry(n, struct node, rb);
+	for (k = 0; k < nnodes; k++) {
+		struct output_sum *os = &output_sums[j];
+		struct node *node = &nodes[k];
 
 		if (i >= interval) {
-			output[j] = (double) (cur_vals + 1) / (double) nranges;
-			output[j] *= 100.0;
+			os->output = (double) (cur_vals + 1) / (double) nranges;
+			os->output *= 100.0;
 			j++;
 			cur_vals = node->hits;
 			interval += (nr_vals + output_nranges - 1) / output_nranges;
-		} else
+		} else {
 			cur_vals += node->hits;
+			os->nranges += node->hits;
+		}
 
-		n = rb_next(n);
 		i++;
+		total_vals += node->hits;
+
+		if (percentage) {
+			unsigned long blocks = percentage * nranges / 100;
+
+			if (total_vals >= blocks) {
+				double cs = i * block_size / (1024 * 1024);
+				char p = 'M';
+
+				if (cs > 1024.0) {
+					cs /= 1024.0;
+					p = 'G';
+				}
+				if (cs > 1024.0) {
+					cs /= 1024.0;
+					p = 'T';
+				}
+
+				printf("%.2f%% of hits satisfied in %.3f%cB of cache\n", percentage, cs, p);
+				percentage = 0.0;
+			}
+		}
 	}
 
 	perc_i = 100.0 / (double) output_nranges;
 	perc = 0.0;
 
-	printf("   Rows           Hits           Size\n");
-	printf("-------------------------------------------\n");
+	printf("\n   Rows           Hits           No Hits         Size\n");
+	printf("--------------------------------------------------------\n");
 	for (i = 0; i < j; i++) {
-		double gb = (double) gb_size * perc_i / 100.0;
-		char p = 'G';
+		struct output_sum *os = &output_sums[i];
+		double gb = (double) os->nranges * block_size / 1024.0;
+		char p = 'K';
 
-		if (gb < 1.0) {
+		if (gb > 1024.0) {
 			p = 'M';
-			gb *= 1024.0;
+			gb /= 1024.0;
+		}
+		if (gb > 1024.0) {
+			p = 'G';
+			gb /= 1024.0;
 		}
 
 		perc += perc_i;
-		printf("%s %6.2f%%\t%6.2f%%\t\t%6.2f%c\n", i ? "|->" : "Top", perc, output[i], gb, p);
+		printf("%s %6.2f%%\t%6.2f%%\t\t%8u\t%6.2f%c\n", i ? "|->" : "Top", perc, os->output, os->nranges, gb, p);
 	}
 
-	free(output);
+	free(output_sums);
 	free(hash);
 	return 0;
 }
--
To unsubscribe from this list: send the line "unsubscribe fio" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [Linux Kernel]     [Linux SCSI]     [Linux IDE]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux SCSI]

  Powered by Linux