The following changes since commit a5a4fdfd44ec1b55ebab7800a931c148540a7324: zipf: use 64-bit safe hash for zipf/pareto (2012-11-11 08:27:24 +0100) are available in the git repository at: git://git.kernel.dk/fio.git master Jens Axboe (3): genzipf: add size and percentage hit rates JSON: fix escape of '"' and '\' characters genzipf: use regular array + qsort() instead of rbtree Makefile | 2 +- json.c | 27 +++++++++- t/genzipf.c | 176 +++++++++++++++++++++++++++++++---------------------------- 3 files changed, 119 insertions(+), 86 deletions(-) --- Diff of recent changes: diff --git a/Makefile b/Makefile index 0db757b..3589770 100644 --- a/Makefile +++ b/Makefile @@ -79,7 +79,7 @@ T_IEEE_OBJS += lib/ieee754.o T_IEEE_PROGS = t/ieee754 T_ZIPF_OBS = t/genzipf.o -T_ZIPF_OBJS += rbtree.o t/log.o lib/ieee754.o lib/rand.o lib/zipf.o t/genzipf.o +T_ZIPF_OBJS += t/log.o lib/ieee754.o lib/rand.o lib/zipf.o t/genzipf.o T_ZIPF_PROGS = t/genzipf T_OBJS = $(T_SMALLOC_OBJS) diff --git a/json.c b/json.c index 8efbbda..ea61af7 100644 --- a/json.c +++ b/json.c @@ -57,13 +57,38 @@ static struct json_value *json_create_value_float(float number) return value; } +static char *strdup_escape(const char *str) +{ + const char *input = str; + char *p, *ret; + int escapes; + + escapes = 0; + while ((input = strpbrk(input, "\\\"")) != NULL) { + escapes++; + input++; + } + + p = ret = malloc(strlen(str) + escapes); + while (*str) { + if (*str == '\\' || *str == '\"') + *p++ = '\\'; + *p++ = *str++; + } + + return ret; +} + +/* + * Valid JSON strings must escape '"' and '/' with a preceeding '/' + */ static struct json_value *json_create_value_string(const char *str) { struct json_value *value = malloc(sizeof(struct json_value)); if (value) { value->type = JSON_TYPE_STRING; - value->string = strdup(str); + value->string = strdup_escape(str); if (!value->string) { free(value); value = NULL; diff --git a/t/genzipf.c b/t/genzipf.c index fc4bdc9..2d1b107 100644 --- a/t/genzipf.c +++ b/t/genzipf.c @@ -28,7 +28,6 @@ struct node { struct flist_head list; - struct rb_node rb; unsigned long long val; unsigned long hits; }; @@ -36,7 +35,6 @@ struct node { static struct flist_head *hash; static unsigned long hash_bits = 24; static unsigned long hash_size = 1 << 24; -static struct rb_root rb; enum { TYPE_NONE = 0, @@ -47,8 +45,9 @@ static const char *dist_types[] = { "None", "Zipf", "Pareto" }; static int dist_type = TYPE_ZIPF; static unsigned long gb_size = 500; -static unsigned long nranges = DEF_NR; +static unsigned long block_size = 4096; static unsigned long output_nranges = DEF_NR_OUTPUT; +static double percentage; static double dist_val; #define DEF_ZIPF_VAL 1.2 @@ -69,72 +68,29 @@ static struct node *hash_lookup(unsigned long long val) return NULL; } -static void hash_insert(unsigned long long val) +static struct node *hash_insert(struct node *n, unsigned long long val) { struct flist_head *l = &hash[hash_long(val, hash_bits)]; - struct node *n = malloc(sizeof(*n)); n->val = val; n->hits = 1; flist_add_tail(&n->list, l); -} - -static void rb_insert(struct node *n) -{ - struct rb_node **p, *parent; - - memset(&n->rb, 0, sizeof(n->rb)); - p = &rb.rb_node; - parent = NULL; - while (*p) { - struct node *__n; - - parent = *p; - __n = rb_entry(parent, struct node, rb); - if (n->hits > __n->hits) - p = &(*p)->rb_left; - else - p = &(*p)->rb_right; - } - - rb_link_node(&n->rb, parent, p); - rb_insert_color(&n->rb, &rb); -} - -static unsigned long rb_add(struct flist_head *list) -{ - struct flist_head *entry; - unsigned long ret = 0; - struct node *n; - - flist_for_each(entry, list) { - n = flist_entry(entry, struct node, list); - - rb_insert(n); - ret++; - } - - return ret; -} - -static unsigned long rb_gen(void) -{ - unsigned long ret = 0; - unsigned int i; - - for (i = 0; i < hash_size; i++) - ret += rb_add(&hash[i]); - - return ret; + return n; } static int parse_options(int argc, char *argv[]) { - const char *optstring = "t:g:i:r:o:"; + const char *optstring = "t:g:i:o:b:p:"; int c, dist_val_set = 0; while ((c = getopt(argc, argv, optstring)) != -1) { switch (c) { + case 'p': + percentage = atof(optarg); + break; + case 'b': + block_size = strtoul(optarg, NULL, 10); + break; case 't': if (!strncmp(optarg, "zipf", 4)) dist_type = TYPE_ZIPF; @@ -152,9 +108,6 @@ static int parse_options(int argc, char *argv[]) dist_val = atof(optarg); dist_val_set = 1; break; - case 'r': - nranges = strtoul(optarg, NULL, 10); - break; case 'o': output_nranges = strtoul(optarg, NULL, 10); break; @@ -183,19 +136,36 @@ static int parse_options(int argc, char *argv[]) return 0; } +struct output_sum { + double output; + unsigned int nranges; +}; + +static int node_cmp(const void *p1, const void *p2) +{ + const struct node *n1 = p1; + const struct node *n2 = p2; + + return n2->hits - n1->hits; +} + int main(int argc, char *argv[]) { unsigned long offset; - unsigned long i, j, nr_vals, cur_vals, interval; - double *output, perc, perc_i; + unsigned long i, j, k, nr_vals, cur_vals, interval, total_vals, nnodes; + unsigned long long nranges; + struct output_sum *output_sums; + struct node *nodes; + double perc, perc_i; struct zipf_state zs; - struct rb_node *n; if (parse_options(argc, argv)) return 1; - printf("Generating %s distribution with %f input and %lu ranges.\n", dist_types[dist_type], dist_val, nranges); - printf("Using device gb=%lu\n\n", gb_size); + printf("Generating %s distribution with %f input and %lu GB size and %lu block_size.\n", dist_types[dist_type], dist_val, gb_size, block_size); + + nranges = gb_size * 1024 * 1024 * 1024ULL; + nranges /= block_size; if (dist_type == TYPE_ZIPF) zipf_init(&zs, nranges, dist_val, 1); @@ -213,7 +183,9 @@ int main(int argc, char *argv[]) for (i = 0; i < hash_size; i++) INIT_FLIST_HEAD(&hash[i]); - for (nr_vals = 0, i = 0; i < nranges; i++) { + nodes = malloc(nranges * sizeof(struct node)); + + for (nr_vals = i = j = 0; i < nranges; i++) { struct node *n; if (dist_type == TYPE_ZIPF) @@ -224,56 +196,92 @@ int main(int argc, char *argv[]) n = hash_lookup(offset); if (n) n->hits++; - else - hash_insert(offset); + else { + hash_insert(&nodes[j], offset); + j++; + } nr_vals++; } - nr_vals = rb_gen(); + qsort(nodes, j, sizeof(struct node), node_cmp); + nnodes = j; + nr_vals = nnodes; interval = (nr_vals + output_nranges - 1) / output_nranges; - output = malloc(output_nranges * sizeof(double)); + output_sums = malloc(output_nranges * sizeof(struct output_sum)); + for (i = 0; i < output_nranges; i++) { + output_sums[i].output = 0.0; + output_sums[i].nranges = 1; + } - i = j = cur_vals = 0; + total_vals = i = j = cur_vals = 0; - n = rb_first(&rb); - while (n) { - struct node *node = rb_entry(n, struct node, rb); + for (k = 0; k < nnodes; k++) { + struct output_sum *os = &output_sums[j]; + struct node *node = &nodes[k]; if (i >= interval) { - output[j] = (double) (cur_vals + 1) / (double) nranges; - output[j] *= 100.0; + os->output = (double) (cur_vals + 1) / (double) nranges; + os->output *= 100.0; j++; cur_vals = node->hits; interval += (nr_vals + output_nranges - 1) / output_nranges; - } else + } else { cur_vals += node->hits; + os->nranges += node->hits; + } - n = rb_next(n); i++; + total_vals += node->hits; + + if (percentage) { + unsigned long blocks = percentage * nranges / 100; + + if (total_vals >= blocks) { + double cs = i * block_size / (1024 * 1024); + char p = 'M'; + + if (cs > 1024.0) { + cs /= 1024.0; + p = 'G'; + } + if (cs > 1024.0) { + cs /= 1024.0; + p = 'T'; + } + + printf("%.2f%% of hits satisfied in %.3f%cB of cache\n", percentage, cs, p); + percentage = 0.0; + } + } } perc_i = 100.0 / (double) output_nranges; perc = 0.0; - printf(" Rows Hits Size\n"); - printf("-------------------------------------------\n"); + printf("\n Rows Hits No Hits Size\n"); + printf("--------------------------------------------------------\n"); for (i = 0; i < j; i++) { - double gb = (double) gb_size * perc_i / 100.0; - char p = 'G'; + struct output_sum *os = &output_sums[i]; + double gb = (double) os->nranges * block_size / 1024.0; + char p = 'K'; - if (gb < 1.0) { + if (gb > 1024.0) { p = 'M'; - gb *= 1024.0; + gb /= 1024.0; + } + if (gb > 1024.0) { + p = 'G'; + gb /= 1024.0; } perc += perc_i; - printf("%s %6.2f%%\t%6.2f%%\t\t%6.2f%c\n", i ? "|->" : "Top", perc, output[i], gb, p); + printf("%s %6.2f%%\t%6.2f%%\t\t%8u\t%6.2f%c\n", i ? "|->" : "Top", perc, os->output, os->nranges, gb, p); } - free(output); + free(output_sums); free(hash); return 0; } -- To unsubscribe from this list: send the line "unsubscribe fio" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html