Duy Nguyen <pclouds@xxxxxxxxx> writes: > I thought about that, but we may need to do extra stat() for loose > garbage as well. As it is now, garbage is complained loudly, which > gives me enough motivation to clean up, even without looking at how > much disk space it uses. I wouldn't call a single line "garbage: 4" exactly *loud*. I also think that this is not about *motivating* you, but about giving more information to the users to help them assess the health of their repository themselves. By the way, I wonder if we also want to notice .git/objects/garbage or .git/objects/ga/rbage if we are to do this? > -- 8< -- > Subject: count-objects: report garbage pack directory too > > prepare_packed_git_one() is modified to allow count-objects to hook a > report function to so we don't need to duplicate the pack searching > logic in count-objects.c. When report_pack_garbage is NULL, the > overhead is insignificant. > > diff --git a/Documentation/git-count-objects.txt b/Documentation/git-count-objects.txt > index e816823..1611d7c 100644 > --- a/Documentation/git-count-objects.txt > +++ b/Documentation/git-count-objects.txt > @@ -33,8 +33,8 @@ size-pack: disk space consumed by the packs, in KiB > prune-packable: the number of loose objects that are also present in > the packs. These objects could be pruned using `git prune-packed`. > + > -garbage: the number of files in loose object database that are not > -valid loose objects > +garbage: the number of files in object database that are not valid > +loose objects nor valid packs > > GIT > --- > diff --git a/builtin/count-objects.c b/builtin/count-objects.c > index 9afaa88..7fdd508 100644 > --- a/builtin/count-objects.c > +++ b/builtin/count-objects.c > @@ -9,6 +9,8 @@ > #include "builtin.h" > #include "parse-options.h" > > +static unsigned long garbage; > + > static void count_objects(DIR *d, char *path, int len, int verbose, > unsigned long *loose, > off_t *loose_size, > @@ -65,6 +67,16 @@ static void count_objects(DIR *d, char *path, int len, int verbose, > } > } > > +extern void (*report_pack_garbage)(const char *path, int len, const char *name); > +static void real_report_pack_garbage(const char *path, int len, const char *name) > +{ > + if (len) > + error("garbage found: %.*s/%s", len, path, name); > + else > + error("garbage found: %s", path); > + garbage++; > +} > + > static char const * const count_objects_usage[] = { > N_("git count-objects [-v]"), > NULL > @@ -76,7 +88,7 @@ int cmd_count_objects(int argc, const char **argv, const char *prefix) > const char *objdir = get_object_directory(); > int len = strlen(objdir); > char *path = xmalloc(len + 50); > - unsigned long loose = 0, packed = 0, packed_loose = 0, garbage = 0; > + unsigned long loose = 0, packed = 0, packed_loose = 0; > off_t loose_size = 0; > struct option opts[] = { > OPT__VERBOSE(&verbose, N_("be verbose")), > @@ -87,6 +99,8 @@ int cmd_count_objects(int argc, const char **argv, const char *prefix) > /* we do not take arguments other than flags for now */ > if (argc) > usage_with_options(count_objects_usage, opts); > + if (verbose) > + report_pack_garbage = real_report_pack_garbage; > memcpy(path, objdir, len); > if (len && objdir[len-1] != '/') > path[len++] = '/'; > diff --git a/sha1_file.c b/sha1_file.c > index 40b2329..5b70e55 100644 > --- a/sha1_file.c > +++ b/sha1_file.c > @@ -21,6 +21,7 @@ > #include "sha1-lookup.h" > #include "bulk-checkin.h" > #include "streaming.h" > +#include "dir.h" > > #ifndef O_NOATIME > #if defined(__linux__) && (defined(__i386__) || defined(__PPC__)) > @@ -1000,15 +1001,19 @@ void install_packed_git(struct packed_git *pack) > packed_git = pack; > } > > +void (*report_pack_garbage)(const char *path, int len, const char *name); > + > static void prepare_packed_git_one(char *objdir, int local) > { > /* Ensure that this buffer is large enough so that we can > append "/pack/" without clobbering the stack even if > strlen(objdir) were PATH_MAX. */ > char path[PATH_MAX + 1 + 4 + 1 + 1]; > - int len; > + int i, len; > DIR *dir; > struct dirent *de; > + struct packed_git *p; > + struct string_list garbage = STRING_LIST_INIT_DUP; > > sprintf(path, "%s/pack", objdir); > len = strlen(path); > @@ -1024,14 +1029,33 @@ static void prepare_packed_git_one(char *objdir, int local) > int namelen = strlen(de->d_name); > struct packed_git *p; > > - if (!has_extension(de->d_name, ".idx")) > + if (len + namelen + 1 > sizeof(path)) { > + if (report_pack_garbage) > + report_pack_garbage(path, len - 1, de->d_name); > continue; > + } > > - if (len + namelen + 1 > sizeof(path)) > + strcpy(path + len, de->d_name); > + > + if (!has_extension(de->d_name, ".idx")) { > + if (!report_pack_garbage) > + continue; > + if (is_dot_or_dotdot(de->d_name)) > + continue; > + if (!has_extension(de->d_name, ".pack")) { > + report_pack_garbage(path, 0, NULL); > + continue; > + } Didn't I already say the logic should be inverted to whitelist the known ones? Saying "Anything that is not '.pack' is bad" here is a direct opposite, I think. Add "A '.keep' file is OK" to this codeflow and see how it goes. > + /* > + * we can't decide right know if this .pack is > + * garbage. Delay until we identify all good > + * packs. > + */ > + string_list_append(&garbage, path); > continue; > + } > > /* Don't reopen a pack we already have. */ > - strcpy(path + len, de->d_name); > for (p = packed_git; p; p = p->next) { > if (!memcmp(path, p->pack_name, len + namelen - 4)) > break; > @@ -1047,6 +1071,25 @@ static void prepare_packed_git_one(char *objdir, int local) > install_packed_git(p); > } > closedir(dir); > + > + if (!report_pack_garbage) > + return; > + > + sort_string_list(&garbage); > + for (p = packed_git; p; p = p->next) { > + struct string_list_item *item; > + if (!p->pack_local) > + continue; > + item = string_list_lookup(&garbage, p->pack_name); > + if (item) > + item->util = &garbage; /* anything but NULL */ > + } > + for (i = 0; i < garbage.nr; i++) { > + struct string_list_item *item = garbage.items + i; > + if (!item->util) > + report_pack_garbage(item->string, 0, NULL); > + } > + string_list_clear(&garbage, 0); > } > > static int sort_pack(const void *a_, const void *b_) > -- 8< -- -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html