Hi, On Sun, 20 Aug 2006, Rutger Nijlunsing wrote: > On Sun, Aug 20, 2006 at 03:20:19PM +0200, Johannes Schindelin wrote: > > Hi, > > > > On Sun, 20 Aug 2006, Rutger Nijlunsing wrote: > > > > > You can also find it on http://www.wingding.demon.nl/git-rev-size.rb > > > > Ruby is _so_ mainstream. Could I have a Haskell version, pretty please? > > I _knew_ it... Please go bug someone else. The only thing I did was > help someone, and for that I choose my own tools since I do it for > fun. Fair enough. -- 8< -- [PATCH] Add git-rev-size This tool spits out the number of trees, the number of blobs, and the total bytes of the blobs for a given rev range. Most notably, it adds an object hash map structure to the library. Signed-off-by: Johannes Schindelin <Johannes.Schindelin@xxxxxx> --- Makefile | 4 ++ builtin-rev-size.c | 92 ++++++++++++++++++++++++++++++++++++++++++++++++++++ builtin.h | 1 + git.c | 1 + hash.c | 50 ++++++++++++++++++++++++++++ hash.h | 12 +++++++ 6 files changed, 159 insertions(+), 1 deletions(-) diff --git a/Makefile b/Makefile index a86f289..06c8dd9 100644 --- a/Makefile +++ b/Makefile @@ -264,7 +264,8 @@ LIB_OBJS = \ server-info.o setup.o sha1_file.o sha1_name.o strbuf.o \ tag.o tree.o usage.o config.o environment.o ctype.o copy.o \ fetch-clone.o revision.o pager.o tree-walk.o xdiff-interface.o \ - alloc.o merge-file.o path-list.o unpack-trees.o help.o $(DIFF_OBJS) + alloc.o merge-file.o path-list.o unpack-trees.o help.o \ + hash.o $(DIFF_OBJS) BUILTIN_OBJS = \ builtin-add.o \ @@ -297,6 +298,7 @@ BUILTIN_OBJS = \ builtin-repo-config.o \ builtin-rev-list.o \ builtin-rev-parse.o \ + builtin-rev-size.o \ builtin-rm.o \ builtin-show-branch.o \ builtin-stripspace.o \ diff --git a/builtin-rev-size.c b/builtin-rev-size.c new file mode 100644 index 0000000..ad88e48 --- /dev/null +++ b/builtin-rev-size.c @@ -0,0 +1,92 @@ +/* + * "git rev-size" builtin command + * + * Copyright (C) 2006 Johannes Schindelin + */ + +#include "cache.h" +#include "builtin.h" +#include "object.h" +#include "tree.h" +#include "tree-walk.h" +#include "commit.h" +#include "diff.h" +#include "revision.h" +#include "hash.h" + +static const char builtin_rev_size_usage[] = +"git-rev-size <commit-id>..."; + +struct rev_size { + struct object object; + size_t trees, blobs, bytes; +}; + +struct hash_map rev_size_hash = { 0, 0, NULL }; + +static struct rev_size *get_rev_size(const char *sha1) +{ + struct rev_size *rev_size = + (struct rev_size *)hash_get(&rev_size_hash, sha1); + + if (rev_size == NULL) { + char type[64]; + unsigned long size; + + rev_size = xcalloc(1, sizeof(struct rev_size)); + + if (sha1_object_info(sha1, type, &size)) + die("Cannot get info for %s", sha1_to_hex(sha1)); + + if (!strcmp(type, "blob")) { + rev_size->blobs = 1; + rev_size->bytes = size; + } else if (!strcmp(type, "tree")) { + struct tree *tree = (struct tree *)parse_object(sha1); + struct tree_desc desc; + struct name_entry entry; + + desc.buf = tree->buffer; + desc.size = tree->size; + + while (tree_entry(&desc, &entry)) { + struct rev_size *r = get_rev_size(entry.sha1); + + rev_size->trees += r->trees; + rev_size->blobs += r->blobs; + rev_size->bytes += r->bytes; + } + + rev_size->trees++; + } else + die("Cannot calculate size for type %s", type); + + memcpy(rev_size->object.sha1, sha1, 20); + hash_put(&rev_size_hash, &rev_size->object); + } + + return rev_size; +} + +int cmd_rev_size(int argc, const char **argv, const char *prefix) +{ + struct rev_info revs; + struct commit *commit; + + init_revisions(&revs, prefix); + revs.abbrev = 0; + revs.commit_format = CMIT_FMT_UNSPECIFIED; + argc = setup_revisions(argc, argv, &revs, NULL); + + prepare_revision_walk(&revs); + + while ((commit = get_revision(&revs))) { + struct rev_size *rev_size = + get_rev_size(commit->tree->object.sha1); + + printf("%s %d %d %d\n", sha1_to_hex(commit->object.sha1), + rev_size->trees, rev_size->blobs, rev_size->bytes); + } + + return 0; +} diff --git a/builtin.h b/builtin.h index ade58c4..9848a5e 100644 --- a/builtin.h +++ b/builtin.h @@ -46,6 +46,7 @@ extern int cmd_read_tree(int argc, const extern int cmd_repo_config(int argc, const char **argv, const char *prefix); extern int cmd_rev_list(int argc, const char **argv, const char *prefix); extern int cmd_rev_parse(int argc, const char **argv, const char *prefix); +extern int cmd_rev_size(int argc, const char **argv, const char *prefix); extern int cmd_rm(int argc, const char **argv, const char *prefix); extern int cmd_show_branch(int argc, const char **argv, const char *prefix); extern int cmd_show(int argc, const char **argv, const char *prefix); diff --git a/git.c b/git.c index bf0fe0e..4cfa6cf 100644 --- a/git.c +++ b/git.c @@ -262,6 +262,7 @@ static void handle_internal_command(int { "repo-config", cmd_repo_config }, { "rev-list", cmd_rev_list, RUN_SETUP }, { "rev-parse", cmd_rev_parse, RUN_SETUP }, + { "rev-size", cmd_rev_size, RUN_SETUP }, { "rm", cmd_rm, RUN_SETUP }, { "show-branch", cmd_show_branch, RUN_SETUP }, { "show", cmd_show, RUN_SETUP | USE_PAGER }, diff --git a/hash.c b/hash.c new file mode 100644 index 0000000..12d1e65 --- /dev/null +++ b/hash.c @@ -0,0 +1,50 @@ +#include "cache.h" +#include "object.h" +#include "hash.h" + +static unsigned int hash_index(struct hash_map *hash, const char *sha1) +{ + unsigned int index = *(unsigned int *)sha1; + while (1) { + if (index >= hash->alloc) + index = index % hash->alloc; + if (hash->map[index] == NULL || + !hashcmp(sha1, hash->map[index]->sha1)) + return index; + index++; + } +} + +static void grow_hash(struct hash_map *hash) +{ + int i; + int old_alloc = hash->alloc; + struct object **old_map = hash->map; + + hash->alloc = hash->alloc < 32 ? 32 : 2 * hash->alloc; + hash->map = xcalloc(hash->alloc, sizeof(struct object *)); + hash->nr = 0; + + for (i = 0; i < old_alloc; i++) { + struct object *obj = old_map[i]; + if (!obj) + continue; + hash_put(hash, obj); + } + free(old_map); +} + +void hash_put(struct hash_map *hash, struct object *obj) +{ + if (++hash->nr > hash->alloc / 2) + grow_hash(hash); + + hash->map[hash_index(hash, obj->sha1)] = obj; +} + +struct object *hash_get(struct hash_map *hash, const char *sha1) +{ + if (hash->alloc == 0) + return NULL; + return hash->map[hash_index(hash, sha1)]; +} diff --git a/hash.h b/hash.h new file mode 100644 index 0000000..0e2b67c --- /dev/null +++ b/hash.h @@ -0,0 +1,12 @@ +#ifndef HASH_H +#define HASH_H + +struct hash_map { + unsigned long nr, alloc; + struct object **map; +}; + +extern struct object *hash_get(struct hash_map *hash, const char *sha1); +extern void hash_put(struct hash_map *hash, struct object *obj); + +#endif -- 1.4.2.ga5e8f-dirty - To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html