On Mon, Feb 04, 2013 at 10:16:23AM -0800, Junio C Hamano wrote: > I forgot to mention one more thing. Your report_pack_garbage() > special cases ".pack" to see if it is a regular file, but this loop > structure causes a regular file whose name ends with ".pack" but > without corresponding ".idx" file to go unreported. > > I think the loop should be restructured to iterate over all known > file types and report unknown ones, if you want to repurpose it for > the reporting, something along this line, perhaps: > > for (each name) { > if (does it end with ".idx") { > if (is it unusable ".idx") { > report garbage; > } > continue; > } > if (! we are in report mode) > continue; > if (does it end with ".pack") { > if (!have we seen corresponding ".idx") > remember it; > continue; > } > report garbage; > } > for (remembered pack) { > if (does it have corresponding ".idx" && > is it really usable ".pack") > continue; > report garbage; > } > How about the below patch? Ignoring good .commits files will be just a couple more lines in the "for (remembered_pack)" loop. Also in another mail in this thread: > I also wonder, especially because you are enumerating the temporary > pack files in .git/objects/pack/, if it make more sense to account > for their sizes as well. After all, you would end up doing stat() > for a common case of files with ".pack" suffix---doing so for all of > them shouldn't be too bad. I thought about that, but we may need to do extra stat() for loose garbage as well. As it is now, garbage is complained loudly, which gives me enough motivation to clean up, even without looking at how much disk space it uses. -- 8< -- Subject: count-objects: report garbage pack directory too prepare_packed_git_one() is modified to allow count-objects to hook a report function to so we don't need to duplicate the pack searching logic in count-objects.c. When report_pack_garbage is NULL, the overhead is insignificant. diff --git a/Documentation/git-count-objects.txt b/Documentation/git-count-objects.txt index e816823..1611d7c 100644 --- a/Documentation/git-count-objects.txt +++ b/Documentation/git-count-objects.txt @@ -33,8 +33,8 @@ size-pack: disk space consumed by the packs, in KiB prune-packable: the number of loose objects that are also present in the packs. These objects could be pruned using `git prune-packed`. + -garbage: the number of files in loose object database that are not -valid loose objects +garbage: the number of files in object database that are not valid +loose objects nor valid packs GIT --- diff --git a/builtin/count-objects.c b/builtin/count-objects.c index 9afaa88..7fdd508 100644 --- a/builtin/count-objects.c +++ b/builtin/count-objects.c @@ -9,6 +9,8 @@ #include "builtin.h" #include "parse-options.h" +static unsigned long garbage; + static void count_objects(DIR *d, char *path, int len, int verbose, unsigned long *loose, off_t *loose_size, @@ -65,6 +67,16 @@ static void count_objects(DIR *d, char *path, int len, int verbose, } } +extern void (*report_pack_garbage)(const char *path, int len, const char *name); +static void real_report_pack_garbage(const char *path, int len, const char *name) +{ + if (len) + error("garbage found: %.*s/%s", len, path, name); + else + error("garbage found: %s", path); + garbage++; +} + static char const * const count_objects_usage[] = { N_("git count-objects [-v]"), NULL @@ -76,7 +88,7 @@ int cmd_count_objects(int argc, const char **argv, const char *prefix) const char *objdir = get_object_directory(); int len = strlen(objdir); char *path = xmalloc(len + 50); - unsigned long loose = 0, packed = 0, packed_loose = 0, garbage = 0; + unsigned long loose = 0, packed = 0, packed_loose = 0; off_t loose_size = 0; struct option opts[] = { OPT__VERBOSE(&verbose, N_("be verbose")), @@ -87,6 +99,8 @@ int cmd_count_objects(int argc, const char **argv, const char *prefix) /* we do not take arguments other than flags for now */ if (argc) usage_with_options(count_objects_usage, opts); + if (verbose) + report_pack_garbage = real_report_pack_garbage; memcpy(path, objdir, len); if (len && objdir[len-1] != '/') path[len++] = '/'; diff --git a/sha1_file.c b/sha1_file.c index 40b2329..5b70e55 100644 --- a/sha1_file.c +++ b/sha1_file.c @@ -21,6 +21,7 @@ #include "sha1-lookup.h" #include "bulk-checkin.h" #include "streaming.h" +#include "dir.h" #ifndef O_NOATIME #if defined(__linux__) && (defined(__i386__) || defined(__PPC__)) @@ -1000,15 +1001,19 @@ void install_packed_git(struct packed_git *pack) packed_git = pack; } +void (*report_pack_garbage)(const char *path, int len, const char *name); + static void prepare_packed_git_one(char *objdir, int local) { /* Ensure that this buffer is large enough so that we can append "/pack/" without clobbering the stack even if strlen(objdir) were PATH_MAX. */ char path[PATH_MAX + 1 + 4 + 1 + 1]; - int len; + int i, len; DIR *dir; struct dirent *de; + struct packed_git *p; + struct string_list garbage = STRING_LIST_INIT_DUP; sprintf(path, "%s/pack", objdir); len = strlen(path); @@ -1024,14 +1029,33 @@ static void prepare_packed_git_one(char *objdir, int local) int namelen = strlen(de->d_name); struct packed_git *p; - if (!has_extension(de->d_name, ".idx")) + if (len + namelen + 1 > sizeof(path)) { + if (report_pack_garbage) + report_pack_garbage(path, len - 1, de->d_name); continue; + } - if (len + namelen + 1 > sizeof(path)) + strcpy(path + len, de->d_name); + + if (!has_extension(de->d_name, ".idx")) { + if (!report_pack_garbage) + continue; + if (is_dot_or_dotdot(de->d_name)) + continue; + if (!has_extension(de->d_name, ".pack")) { + report_pack_garbage(path, 0, NULL); + continue; + } + /* + * we can't decide right know if this .pack is + * garbage. Delay until we identify all good + * packs. + */ + string_list_append(&garbage, path); continue; + } /* Don't reopen a pack we already have. */ - strcpy(path + len, de->d_name); for (p = packed_git; p; p = p->next) { if (!memcmp(path, p->pack_name, len + namelen - 4)) break; @@ -1047,6 +1071,25 @@ static void prepare_packed_git_one(char *objdir, int local) install_packed_git(p); } closedir(dir); + + if (!report_pack_garbage) + return; + + sort_string_list(&garbage); + for (p = packed_git; p; p = p->next) { + struct string_list_item *item; + if (!p->pack_local) + continue; + item = string_list_lookup(&garbage, p->pack_name); + if (item) + item->util = &garbage; /* anything but NULL */ + } + for (i = 0; i < garbage.nr; i++) { + struct string_list_item *item = garbage.items + i; + if (!item->util) + report_pack_garbage(item->string, 0, NULL); + } + string_list_clear(&garbage, 0); } static int sort_pack(const void *a_, const void *b_) -- 8< -- -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html