Nguyễn Thái Ngọc Duy <pclouds@xxxxxxxxx> writes: > prepare_packed_git_one() is modified to allow count-objects to hook a > report function to so we don't need to duplicate the pack searching > logic in count-objects.c. When report_pack_garbage is NULL, the > overhead is insignificant. > > Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@xxxxxxxxx> > --- > Documentation/git-count-objects.txt | 4 +- > builtin/count-objects.c | 18 ++++++++- > sha1_file.c | 81 +++++++++++++++++++++++++++++++++++-- > 3 files changed, 97 insertions(+), 6 deletions(-) > > diff --git a/Documentation/git-count-objects.txt b/Documentation/git-count-objects.txt > index e816823..1611d7c 100644 > --- a/Documentation/git-count-objects.txt > +++ b/Documentation/git-count-objects.txt > @@ -33,8 +33,8 @@ size-pack: disk space consumed by the packs, in KiB > prune-packable: the number of loose objects that are also present in > the packs. These objects could be pruned using `git prune-packed`. > + > -garbage: the number of files in loose object database that are not > -valid loose objects > +garbage: the number of files in object database that are not valid > +loose objects nor valid packs > > GIT > --- > diff --git a/builtin/count-objects.c b/builtin/count-objects.c > index 9afaa88..118b2ae 100644 > --- a/builtin/count-objects.c > +++ b/builtin/count-objects.c > @@ -9,6 +9,20 @@ > #include "builtin.h" > #include "parse-options.h" > > +static unsigned long garbage; > + > +extern void (*report_pack_garbage)(const char *path, int len, const char *name); > +static void real_report_pack_garbage(const char *path, int len, const char *name) > +{ Don't some callers call this on paths outside objects/pack/ directory? Is it still report-pack-garbage? > + if (len && name) > + error("garbage found: %.*s/%s", len, path, name); > + else if (!len && name) > + error("garbage found: %s%s", path, name); > + else > + error("garbage found: %s", path); > + garbage++; > +} > + > static void count_objects(DIR *d, char *path, int len, int verbose, > unsigned long *loose, > off_t *loose_size, > @@ -76,7 +90,7 @@ int cmd_count_objects(int argc, const char **argv, const char *prefix) > const char *objdir = get_object_directory(); > int len = strlen(objdir); > char *path = xmalloc(len + 50); > - unsigned long loose = 0, packed = 0, packed_loose = 0, garbage = 0; > + unsigned long loose = 0, packed = 0, packed_loose = 0; > off_t loose_size = 0; > struct option opts[] = { > OPT__VERBOSE(&verbose, N_("be verbose")), > @@ -87,6 +101,8 @@ int cmd_count_objects(int argc, const char **argv, const char *prefix) > /* we do not take arguments other than flags for now */ > if (argc) > usage_with_options(count_objects_usage, opts); > + if (verbose) > + report_pack_garbage = real_report_pack_garbage; > memcpy(path, objdir, len); > if (len && objdir[len-1] != '/') > path[len++] = '/'; > diff --git a/sha1_file.c b/sha1_file.c > index 40b2329..cc6ef03 100644 > --- a/sha1_file.c > +++ b/sha1_file.c > @@ -21,6 +21,7 @@ > #include "sha1-lookup.h" > #include "bulk-checkin.h" > #include "streaming.h" > +#include "dir.h" > > #ifndef O_NOATIME > #if defined(__linux__) && (defined(__i386__) || defined(__PPC__)) > @@ -1000,6 +1001,54 @@ void install_packed_git(struct packed_git *pack) > packed_git = pack; > } > > +/* A hook for count-objects to report invalid files in pack directory */ > +void (*report_pack_garbage)(const char *path, int len, const char *name); > + > +static const char *known_pack_extensions[] = { ".pack", ".keep", NULL }; This sounds wrong. Isn't ".idx" also known? > +static void report_garbage(struct string_list *list) > +{ > + struct strbuf sb = STRBUF_INIT; > + struct packed_git *p; > + int i; > + > + if (!report_pack_garbage) > + return; > + > + sort_string_list(list); > + > + for (p = packed_git; p; p = p->next) { > + struct string_list_item *item; > + if (!p->pack_local) > + continue; > + strbuf_reset(&sb); > + strbuf_add(&sb, p->pack_name, > + strlen(p->pack_name) - strlen(".pack")); > + item = string_list_lookup(list, sb.buf); > + if (!item) > + continue; > + /* > + * string_list_lookup does not guarantee to return the > + * first matched string if it's duplicated. > + */ > + while (item - list->items && > + !strcmp(item[-1].string, item->string)) > + item--; > + while (item - list->items < list->nr && > + !strcmp(item->string, sb.buf)) { > + item->util = NULL; /* non-garbage mark */ > + item++; > + } > + } > + for (i = 0; i < list->nr; i++) { > + struct string_list_item *item = list->items + i; > + if (!item->util) > + continue; > + report_pack_garbage(item->string, 0, item->util); > + } > + strbuf_release(&sb); > +} > + > static void prepare_packed_git_one(char *objdir, int local) > { > /* Ensure that this buffer is large enough so that we can > @@ -1009,6 +1058,7 @@ static void prepare_packed_git_one(char *objdir, int local) > int len; > DIR *dir; > struct dirent *de; > + struct string_list garbage = STRING_LIST_INIT_DUP; > > sprintf(path, "%s/pack", objdir); > len = strlen(path); > @@ -1024,14 +1074,37 @@ static void prepare_packed_git_one(char *objdir, int local) > int namelen = strlen(de->d_name); > struct packed_git *p; > > - if (!has_extension(de->d_name, ".idx")) > + if (len + namelen + 1 > sizeof(path)) { > + if (report_pack_garbage) > + report_pack_garbage(path, len - 1, de->d_name); A pack/in/a/very/long/path/pack-0000000000000000000000000000000000000000.pack may pass when fed to "git verify-pack", but this will report it as "garbage", without reporting what is wrong with it. Wouldn't that confuse users? > continue; > + } > + > + strcpy(path + len, de->d_name); > > - if (len + namelen + 1 > sizeof(path)) > + if (!has_extension(de->d_name, ".idx")) { > + struct string_list_item *item; > + int i, n; > + if (!report_pack_garbage) > + continue; > + if (is_dot_or_dotdot(de->d_name)) > + continue; > + for (i = 0; known_pack_extensions[i]; i++) > + if (has_extension(de->d_name, > + known_pack_extensions[i])) > + break; > + if (!known_pack_extensions[i]) { > + report_pack_garbage(path, 0, NULL); > + continue; > + } > + n = strlen(path) - strlen(known_pack_extensions[i]); > + item = string_list_append_nodup(&garbage, > + xstrndup(path, n)); > + item->util = (void*)known_pack_extensions[i]; > continue; > + } Why isn't this part more like this? if (dot-or-dotdot) { continue; } else if (has_extension(de->d_name, ".idx")) { do things for the .idx file; } else if (has_extension(de->d_name, ".pack") { do things for the .pack file, including queuing the name if we haven't seen corresponding .idx for later examination; } else if (has_extension(de->d_name, ".keep") { nothing special for now but we may want to add some other checks later } else { everything else is a garbage report_pack_garbage(); } > > /* Don't reopen a pack we already have. */ > - strcpy(path + len, de->d_name); > for (p = packed_git; p; p = p->next) { > if (!memcmp(path, p->pack_name, len + namelen - 4)) > break; > @@ -1047,6 +1120,8 @@ static void prepare_packed_git_one(char *objdir, int local) > install_packed_git(p); > } > closedir(dir); > + report_garbage(&garbage); > + string_list_clear(&garbage, 0); > } > > static int sort_pack(const void *a_, const void *b_) -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html