prepare_packed_git_one() is modified to allow count-objects to hook a report function to so we don't need to duplicate the pack searching logic in count-objects.c. When report_pack_garbage is NULL, the overhead is insignificant. Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@xxxxxxxxx> --- Documentation/git-count-objects.txt | 4 +- builtin/count-objects.c | 18 ++++++++- sha1_file.c | 81 +++++++++++++++++++++++++++++++++++-- 3 files changed, 97 insertions(+), 6 deletions(-) diff --git a/Documentation/git-count-objects.txt b/Documentation/git-count-objects.txt index e816823..1611d7c 100644 --- a/Documentation/git-count-objects.txt +++ b/Documentation/git-count-objects.txt @@ -33,8 +33,8 @@ size-pack: disk space consumed by the packs, in KiB prune-packable: the number of loose objects that are also present in the packs. These objects could be pruned using `git prune-packed`. + -garbage: the number of files in loose object database that are not -valid loose objects +garbage: the number of files in object database that are not valid +loose objects nor valid packs GIT --- diff --git a/builtin/count-objects.c b/builtin/count-objects.c index 9afaa88..118b2ae 100644 --- a/builtin/count-objects.c +++ b/builtin/count-objects.c @@ -9,6 +9,20 @@ #include "builtin.h" #include "parse-options.h" +static unsigned long garbage; + +extern void (*report_pack_garbage)(const char *path, int len, const char *name); +static void real_report_pack_garbage(const char *path, int len, const char *name) +{ + if (len && name) + error("garbage found: %.*s/%s", len, path, name); + else if (!len && name) + error("garbage found: %s%s", path, name); + else + error("garbage found: %s", path); + garbage++; +} + static void count_objects(DIR *d, char *path, int len, int verbose, unsigned long *loose, off_t *loose_size, @@ -76,7 +90,7 @@ int cmd_count_objects(int argc, const char **argv, const char *prefix) const char *objdir = get_object_directory(); int len = strlen(objdir); char *path = xmalloc(len + 50); - unsigned long loose = 0, packed = 0, packed_loose = 0, garbage = 0; + unsigned long loose = 0, packed = 0, packed_loose = 0; off_t loose_size = 0; struct option opts[] = { OPT__VERBOSE(&verbose, N_("be verbose")), @@ -87,6 +101,8 @@ int cmd_count_objects(int argc, const char **argv, const char *prefix) /* we do not take arguments other than flags for now */ if (argc) usage_with_options(count_objects_usage, opts); + if (verbose) + report_pack_garbage = real_report_pack_garbage; memcpy(path, objdir, len); if (len && objdir[len-1] != '/') path[len++] = '/'; diff --git a/sha1_file.c b/sha1_file.c index 40b2329..cc6ef03 100644 --- a/sha1_file.c +++ b/sha1_file.c @@ -21,6 +21,7 @@ #include "sha1-lookup.h" #include "bulk-checkin.h" #include "streaming.h" +#include "dir.h" #ifndef O_NOATIME #if defined(__linux__) && (defined(__i386__) || defined(__PPC__)) @@ -1000,6 +1001,54 @@ void install_packed_git(struct packed_git *pack) packed_git = pack; } +/* A hook for count-objects to report invalid files in pack directory */ +void (*report_pack_garbage)(const char *path, int len, const char *name); + +static const char *known_pack_extensions[] = { ".pack", ".keep", NULL }; + +static void report_garbage(struct string_list *list) +{ + struct strbuf sb = STRBUF_INIT; + struct packed_git *p; + int i; + + if (!report_pack_garbage) + return; + + sort_string_list(list); + + for (p = packed_git; p; p = p->next) { + struct string_list_item *item; + if (!p->pack_local) + continue; + strbuf_reset(&sb); + strbuf_add(&sb, p->pack_name, + strlen(p->pack_name) - strlen(".pack")); + item = string_list_lookup(list, sb.buf); + if (!item) + continue; + /* + * string_list_lookup does not guarantee to return the + * first matched string if it's duplicated. + */ + while (item - list->items && + !strcmp(item[-1].string, item->string)) + item--; + while (item - list->items < list->nr && + !strcmp(item->string, sb.buf)) { + item->util = NULL; /* non-garbage mark */ + item++; + } + } + for (i = 0; i < list->nr; i++) { + struct string_list_item *item = list->items + i; + if (!item->util) + continue; + report_pack_garbage(item->string, 0, item->util); + } + strbuf_release(&sb); +} + static void prepare_packed_git_one(char *objdir, int local) { /* Ensure that this buffer is large enough so that we can @@ -1009,6 +1058,7 @@ static void prepare_packed_git_one(char *objdir, int local) int len; DIR *dir; struct dirent *de; + struct string_list garbage = STRING_LIST_INIT_DUP; sprintf(path, "%s/pack", objdir); len = strlen(path); @@ -1024,14 +1074,37 @@ static void prepare_packed_git_one(char *objdir, int local) int namelen = strlen(de->d_name); struct packed_git *p; - if (!has_extension(de->d_name, ".idx")) + if (len + namelen + 1 > sizeof(path)) { + if (report_pack_garbage) + report_pack_garbage(path, len - 1, de->d_name); continue; + } + + strcpy(path + len, de->d_name); - if (len + namelen + 1 > sizeof(path)) + if (!has_extension(de->d_name, ".idx")) { + struct string_list_item *item; + int i, n; + if (!report_pack_garbage) + continue; + if (is_dot_or_dotdot(de->d_name)) + continue; + for (i = 0; known_pack_extensions[i]; i++) + if (has_extension(de->d_name, + known_pack_extensions[i])) + break; + if (!known_pack_extensions[i]) { + report_pack_garbage(path, 0, NULL); + continue; + } + n = strlen(path) - strlen(known_pack_extensions[i]); + item = string_list_append_nodup(&garbage, + xstrndup(path, n)); + item->util = (void*)known_pack_extensions[i]; continue; + } /* Don't reopen a pack we already have. */ - strcpy(path + len, de->d_name); for (p = packed_git; p; p = p->next) { if (!memcmp(path, p->pack_name, len + namelen - 4)) break; @@ -1047,6 +1120,8 @@ static void prepare_packed_git_one(char *objdir, int local) install_packed_git(p); } closedir(dir); + report_garbage(&garbage); + string_list_clear(&garbage, 0); } static int sort_pack(const void *a_, const void *b_) -- 1.8.1.2.495.g3fdf5d5.dirty -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html