Re: [PATCH 2/2] count-objects: report garbage files in .git/objects/pack directory too

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Mon, Feb 04, 2013 at 10:16:23AM -0800, Junio C Hamano wrote:
> I forgot to mention one more thing.  Your report_pack_garbage()
> special cases ".pack" to see if it is a regular file, but this loop
> structure causes a regular file whose name ends with ".pack" but
> without corresponding ".idx" file to go unreported.
> 
> I think the loop should be restructured to iterate over all known
> file types and report unknown ones, if you want to repurpose it for
> the reporting, something along this line, perhaps:
> 
> 	for (each name) {
> 		if (does it end with ".idx") {
> 			if (is it unusable ".idx") {
> 				report garbage;
> 			}
>                         continue;
> 		}
> 		if (! we are in report mode)
> 			continue;
> 		if (does it end with ".pack") {
> 			if (!have we seen corresponding ".idx")
> 				remember it;
> 			continue;
> 		}
> 		report garbage;
> 	}
> 	for (remembered pack) {
> 		if (does it have corresponding ".idx" &&
> 			is it really usable ".pack")
> 			continue;
> 		report garbage;
> 	}
> 

How about the below patch? Ignoring good .commits files will be just a
couple more lines in the "for (remembered_pack)" loop.

Also in another mail in this thread:

> I also wonder, especially because you are enumerating the temporary
> pack files in .git/objects/pack/, if it make more sense to account
> for their sizes as well.  After all, you would end up doing stat()
> for a common case of files with ".pack" suffix---doing so for all of
> them shouldn't be too bad.

I thought about that, but we may need to do extra stat() for loose
garbage as well. As it is now, garbage is complained loudly, which
gives me enough motivation to clean up, even without looking at how
much disk space it uses.

-- 8< --
Subject: count-objects: report garbage pack directory too

prepare_packed_git_one() is modified to allow count-objects to hook a
report function to so we don't need to duplicate the pack searching
logic in count-objects.c. When report_pack_garbage is NULL, the
overhead is insignificant.

diff --git a/Documentation/git-count-objects.txt b/Documentation/git-count-objects.txt
index e816823..1611d7c 100644
--- a/Documentation/git-count-objects.txt
+++ b/Documentation/git-count-objects.txt
@@ -33,8 +33,8 @@ size-pack: disk space consumed by the packs, in KiB
 prune-packable: the number of loose objects that are also present in
 the packs. These objects could be pruned using `git prune-packed`.
 +
-garbage: the number of files in loose object database that are not
-valid loose objects
+garbage: the number of files in object database that are not valid
+loose objects nor valid packs
 
 GIT
 ---
diff --git a/builtin/count-objects.c b/builtin/count-objects.c
index 9afaa88..7fdd508 100644
--- a/builtin/count-objects.c
+++ b/builtin/count-objects.c
@@ -9,6 +9,8 @@
 #include "builtin.h"
 #include "parse-options.h"
 
+static unsigned long garbage;
+
 static void count_objects(DIR *d, char *path, int len, int verbose,
 			  unsigned long *loose,
 			  off_t *loose_size,
@@ -65,6 +67,16 @@ static void count_objects(DIR *d, char *path, int len, int verbose,
 	}
 }
 
+extern void (*report_pack_garbage)(const char *path, int len, const char *name);
+static void real_report_pack_garbage(const char *path, int len, const char *name)
+{
+	if (len)
+		error("garbage found: %.*s/%s", len, path, name);
+	else
+		error("garbage found: %s", path);
+	garbage++;
+}
+
 static char const * const count_objects_usage[] = {
 	N_("git count-objects [-v]"),
 	NULL
@@ -76,7 +88,7 @@ int cmd_count_objects(int argc, const char **argv, const char *prefix)
 	const char *objdir = get_object_directory();
 	int len = strlen(objdir);
 	char *path = xmalloc(len + 50);
-	unsigned long loose = 0, packed = 0, packed_loose = 0, garbage = 0;
+	unsigned long loose = 0, packed = 0, packed_loose = 0;
 	off_t loose_size = 0;
 	struct option opts[] = {
 		OPT__VERBOSE(&verbose, N_("be verbose")),
@@ -87,6 +99,8 @@ int cmd_count_objects(int argc, const char **argv, const char *prefix)
 	/* we do not take arguments other than flags for now */
 	if (argc)
 		usage_with_options(count_objects_usage, opts);
+	if (verbose)
+		report_pack_garbage = real_report_pack_garbage;
 	memcpy(path, objdir, len);
 	if (len && objdir[len-1] != '/')
 		path[len++] = '/';
diff --git a/sha1_file.c b/sha1_file.c
index 40b2329..5b70e55 100644
--- a/sha1_file.c
+++ b/sha1_file.c
@@ -21,6 +21,7 @@
 #include "sha1-lookup.h"
 #include "bulk-checkin.h"
 #include "streaming.h"
+#include "dir.h"
 
 #ifndef O_NOATIME
 #if defined(__linux__) && (defined(__i386__) || defined(__PPC__))
@@ -1000,15 +1001,19 @@ void install_packed_git(struct packed_git *pack)
 	packed_git = pack;
 }
 
+void (*report_pack_garbage)(const char *path, int len, const char *name);
+
 static void prepare_packed_git_one(char *objdir, int local)
 {
 	/* Ensure that this buffer is large enough so that we can
 	   append "/pack/" without clobbering the stack even if
 	   strlen(objdir) were PATH_MAX.  */
 	char path[PATH_MAX + 1 + 4 + 1 + 1];
-	int len;
+	int i, len;
 	DIR *dir;
 	struct dirent *de;
+	struct packed_git *p;
+	struct string_list garbage = STRING_LIST_INIT_DUP;
 
 	sprintf(path, "%s/pack", objdir);
 	len = strlen(path);
@@ -1024,14 +1029,33 @@ static void prepare_packed_git_one(char *objdir, int local)
 		int namelen = strlen(de->d_name);
 		struct packed_git *p;
 
-		if (!has_extension(de->d_name, ".idx"))
+		if (len + namelen + 1 > sizeof(path)) {
+			if (report_pack_garbage)
+				report_pack_garbage(path, len - 1, de->d_name);
 			continue;
+		}
 
-		if (len + namelen + 1 > sizeof(path))
+		strcpy(path + len, de->d_name);
+
+		if (!has_extension(de->d_name, ".idx")) {
+			if (!report_pack_garbage)
+				continue;
+			if (is_dot_or_dotdot(de->d_name))
+				continue;
+			if (!has_extension(de->d_name, ".pack")) {
+				report_pack_garbage(path, 0, NULL);
+				continue;
+			}
+			/*
+			 * we can't decide right know if this .pack is
+			 * garbage. Delay until we identify all good
+			 * packs.
+			 */
+			string_list_append(&garbage, path);
 			continue;
+		}
 
 		/* Don't reopen a pack we already have. */
-		strcpy(path + len, de->d_name);
 		for (p = packed_git; p; p = p->next) {
 			if (!memcmp(path, p->pack_name, len + namelen - 4))
 				break;
@@ -1047,6 +1071,25 @@ static void prepare_packed_git_one(char *objdir, int local)
 		install_packed_git(p);
 	}
 	closedir(dir);
+
+	if (!report_pack_garbage)
+		return;
+
+	sort_string_list(&garbage);
+	for (p = packed_git; p; p = p->next) {
+		struct string_list_item *item;
+		if (!p->pack_local)
+			continue;
+		item = string_list_lookup(&garbage, p->pack_name);
+		if (item)
+			item->util = &garbage; /* anything but NULL */
+	}
+	for (i = 0; i < garbage.nr; i++) {
+		struct string_list_item *item = garbage.items + i;
+		if (!item->util)
+			report_pack_garbage(item->string, 0, NULL);
+	}
+	string_list_clear(&garbage, 0);
 }
 
 static int sort_pack(const void *a_, const void *b_)
-- 8< --
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]