Jeff King <peff@xxxxxxxx> writes: > The one difference is the sort order: git's diff output is > in tree-sort order, so a subtree "foo" sorts like "foo/", > which is after "foo.bar". Whereas the bitmap path list has a > true byte sort, which puts "foo.bar" after "foo". If we truly cared, it is easy enough to fix by having a custom comparison function in 1/3 used in collect_paths() phase. > + /* dump it while we have the sorted order in memory */ > + for (i = 0; i < n; i++) { > + printf("%s", sorted[i]->path); > + putchar('\0'); > + } With printf("%s%c", sorted[i]->path, '\0'); you can lose the braces. > + putchar('\0'); > + > free(sorted); > } > > @@ -142,6 +150,8 @@ static void generate_bitmap(struct diff_queue_struct *q, > > ewah = bitmap_to_ewah(bitmap); > ewah_serialize_strbuf(ewah, &out); > + > + fwrite(data->commit->object.oid.hash, 1, GIT_SHA1_RAWSZ, stdout); > fwrite(out.buf, 1, out.len, stdout); OK, so per commit, we have ewah bitmap that records the "changed paths" after the commit object name. Makes sense. And the list of paths are based on the "one" side of the filepair. When we do an equivalent of "git show X", we see "diff-tree X~1 X" and by collecting the "one" side (i.e. subset of paths in the tree of X~1 that were modified when going to X) we say "commit X changed these paths". Makes sense, too. > -int cmd_main(int argc, const char **argv) > +static void do_gen(void) > { > struct hashmap paths; > - Let's not lose this blank line. > setup_git_directory(); > collect_paths(&paths); > > walk_paths(generate_bitmap, &paths); > +} > + > +static void do_dump(void) > +{ > + struct strbuf in = STRBUF_INIT; > + const char *cur; > + size_t remain; > + > + const char **paths = NULL; > + size_t alloc_paths = 0, nr_paths = 0; > + > + /* slurp stdin; in the real world we'd mmap all this */ > + strbuf_read(&in, 0, 0); > + cur = in.buf; > + remain = in.len; > + > + /* read path for each bit; in the real world this would be separate */ > + while (remain) { > + const char *end = memchr(cur, '\0', remain); > + if (!end) { > + error("truncated input while reading path"); > + goto out; > + } > + if (end == cur) { > + /* empty field signals end of paths */ > + cur++; > + remain--; > + break; > + } > + > + ALLOC_GROW(paths, nr_paths + 1, alloc_paths); > + paths[nr_paths++] = cur; > + > + remain -= end - cur + 1; > + cur = end + 1; > + } > + OK. > + while (remain) { > + struct object_id oid; > + struct ewah_bitmap *ewah; > + ssize_t len; > + > + if (remain < GIT_SHA1_RAWSZ) { > + error("truncated input reading oid"); > + goto out; > + } > + hashcpy(oid.hash, (const unsigned char *)cur); > + cur += GIT_SHA1_RAWSZ; > + remain -= GIT_SHA1_RAWSZ; > + > + ewah = ewah_new(); > + len = ewah_read_mmap(ewah, cur, remain); > + if (len < 0) { > + ewah_free(ewah); > + goto out; > + } > + > + printf("%s\n", oid_to_hex(&oid)); > + ewah_each_bit(ewah, show_path, paths); > + > + ewah_free(ewah); > + cur += len; > + remain -= len; > + } Makes perfect sense. > +out: > + free(paths); > + strbuf_release(&in); > +} > + > +int cmd_main(int argc, const char **argv) > +{ > + const char *usage_msg = "test-tree-bitmap <gen|dump>"; > + > + if (!argv[1]) > + usage(usage_msg); > + else if (!strcmp(argv[1], "gen")) > + do_gen(); > + else if (!strcmp(argv[1], "dump")) > + do_dump(); > + else > + usage(usage_msg); > > return 0; > }