[PATCH 2/2] count-objects: report garbage files in .git/objects/pack directory too

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



While it's unusual to have strange files in loose object database,
.git/objects/pack/tmp_* is normal after a broken fetch and they
can eat up a lot of disk space if the user does not pay attention.
Report them.

Signed-off-by: Nguyễn Thái Ngọc Duy <pclouds@xxxxxxxxx>
---
 The hook in prepare_packed_git_one is ugly, but I don't want to
 duplicate the search file logic there in count-objects. Maybe I'm
 wrong.

 Interestingly it reports .commits file in my repo too. A nice
 reminder to myself to remind Jeff about count-objects improvements
 for his commit-cache work :)

 Way may also need a more friendly format than this one, which I
 assume is plumbing. Something that average git user can understand
 without looking up the document. If "git stats" is too much for this
 purpose, perhaps "git gc --stats"?

 Documentation/git-count-objects.txt |  4 ++--
 builtin/count-objects.c             | 27 ++++++++++++++++++++++++++-
 sha1_file.c                         | 23 ++++++++++++++++++++---
 3 files changed, 48 insertions(+), 6 deletions(-)

diff --git a/Documentation/git-count-objects.txt b/Documentation/git-count-objects.txt
index e816823..1611d7c 100644
--- a/Documentation/git-count-objects.txt
+++ b/Documentation/git-count-objects.txt
@@ -33,8 +33,8 @@ size-pack: disk space consumed by the packs, in KiB
 prune-packable: the number of loose objects that are also present in
 the packs. These objects could be pruned using `git prune-packed`.
 +
-garbage: the number of files in loose object database that are not
-valid loose objects
+garbage: the number of files in object database that are not valid
+loose objects nor valid packs
 
 GIT
 ---
diff --git a/builtin/count-objects.c b/builtin/count-objects.c
index 9afaa88..e8fabcf 100644
--- a/builtin/count-objects.c
+++ b/builtin/count-objects.c
@@ -9,6 +9,8 @@
 #include "builtin.h"
 #include "parse-options.h"
 
+static unsigned long garbage;
+
 static void count_objects(DIR *d, char *path, int len, int verbose,
 			  unsigned long *loose,
 			  off_t *loose_size,
@@ -65,6 +67,27 @@ static void count_objects(DIR *d, char *path, int len, int verbose,
 	}
 }
 
+extern void (*report_pack_garbage)(const char *path, int len, const char *name);
+static void real_report_pack_garbage(const char *path, int len, const char *name)
+{
+	if (is_dot_or_dotdot(name))
+		return;
+	if (has_extension(name, ".pack")) {
+		struct strbuf idx_file = STRBUF_INIT;
+		struct stat st;
+
+		strbuf_addf(&idx_file, "%.*s/%.*s.idx", len, path,
+			    (int)strlen(name) - 5, name);
+		if (!stat(idx_file.buf, &st) && S_ISREG(st.st_mode)) {
+			strbuf_release(&idx_file);
+			return;
+		}
+		strbuf_release(&idx_file);
+	}
+	error("garbage found: %.*s/%s", len, path, name);
+	garbage++;
+}
+
 static char const * const count_objects_usage[] = {
 	N_("git count-objects [-v]"),
 	NULL
@@ -76,7 +99,7 @@ int cmd_count_objects(int argc, const char **argv, const char *prefix)
 	const char *objdir = get_object_directory();
 	int len = strlen(objdir);
 	char *path = xmalloc(len + 50);
-	unsigned long loose = 0, packed = 0, packed_loose = 0, garbage = 0;
+	unsigned long loose = 0, packed = 0, packed_loose = 0;
 	off_t loose_size = 0;
 	struct option opts[] = {
 		OPT__VERBOSE(&verbose, N_("be verbose")),
@@ -87,6 +110,8 @@ int cmd_count_objects(int argc, const char **argv, const char *prefix)
 	/* we do not take arguments other than flags for now */
 	if (argc)
 		usage_with_options(count_objects_usage, opts);
+	if (verbose)
+		report_pack_garbage = real_report_pack_garbage;
 	memcpy(path, objdir, len);
 	if (len && objdir[len-1] != '/')
 		path[len++] = '/';
diff --git a/sha1_file.c b/sha1_file.c
index 40b2329..6045946 100644
--- a/sha1_file.c
+++ b/sha1_file.c
@@ -1000,6 +1000,17 @@ void install_packed_git(struct packed_git *pack)
 	packed_git = pack;
 }
 
+static void dummy_report_pack_garbage(const char *path,
+				      int len,
+				      const char *name)
+{
+}
+
+void (*report_pack_garbage)(const char *path,
+			    int len,
+			    const char *name) =
+	dummy_report_pack_garbage;
+
 static void prepare_packed_git_one(char *objdir, int local)
 {
 	/* Ensure that this buffer is large enough so that we can
@@ -1024,11 +1035,15 @@ static void prepare_packed_git_one(char *objdir, int local)
 		int namelen = strlen(de->d_name);
 		struct packed_git *p;
 
-		if (!has_extension(de->d_name, ".idx"))
+		if (!has_extension(de->d_name, ".idx")) {
+			report_pack_garbage(path, len - 1, de->d_name);
 			continue;
+		}
 
-		if (len + namelen + 1 > sizeof(path))
+		if (len + namelen + 1 > sizeof(path)) {
+			report_pack_garbage(path, len - 1, de->d_name);
 			continue;
+		}
 
 		/* Don't reopen a pack we already have. */
 		strcpy(path + len, de->d_name);
@@ -1042,8 +1057,10 @@ static void prepare_packed_git_one(char *objdir, int local)
 		 * .pack file that we can map.
 		 */
 		p = add_packed_git(path, len + namelen, local);
-		if (!p)
+		if (!p) {
+			report_pack_garbage(path, len - 1, de->d_name);
 			continue;
+		}
 		install_packed_git(p);
 	}
 	closedir(dir);
-- 
1.8.1.2.536.gf441e6d

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]