Re: [PATCH 2/2] count-objects: report garbage files in .git/objects/pack directory too

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Duy Nguyen <pclouds@xxxxxxxxx> writes:

> I thought about that, but we may need to do extra stat() for loose
> garbage as well. As it is now, garbage is complained loudly, which
> gives me enough motivation to clean up, even without looking at how
> much disk space it uses.

I wouldn't call a single line "garbage: 4" exactly *loud*.  I also
think that this is not about *motivating* you, but about giving
more information to the users to help them assess the health of
their repository themselves.

By the way, I wonder if we also want to notice .git/objects/garbage
or .git/objects/ga/rbage if we are to do this?

> -- 8< --
> Subject: count-objects: report garbage pack directory too
>
> prepare_packed_git_one() is modified to allow count-objects to hook a
> report function to so we don't need to duplicate the pack searching
> logic in count-objects.c. When report_pack_garbage is NULL, the
> overhead is insignificant.
>
> diff --git a/Documentation/git-count-objects.txt b/Documentation/git-count-objects.txt
> index e816823..1611d7c 100644
> --- a/Documentation/git-count-objects.txt
> +++ b/Documentation/git-count-objects.txt
> @@ -33,8 +33,8 @@ size-pack: disk space consumed by the packs, in KiB
>  prune-packable: the number of loose objects that are also present in
>  the packs. These objects could be pruned using `git prune-packed`.
>  +
> -garbage: the number of files in loose object database that are not
> -valid loose objects
> +garbage: the number of files in object database that are not valid
> +loose objects nor valid packs
>  
>  GIT
>  ---
> diff --git a/builtin/count-objects.c b/builtin/count-objects.c
> index 9afaa88..7fdd508 100644
> --- a/builtin/count-objects.c
> +++ b/builtin/count-objects.c
> @@ -9,6 +9,8 @@
>  #include "builtin.h"
>  #include "parse-options.h"
>  
> +static unsigned long garbage;
> +
>  static void count_objects(DIR *d, char *path, int len, int verbose,
>  			  unsigned long *loose,
>  			  off_t *loose_size,
> @@ -65,6 +67,16 @@ static void count_objects(DIR *d, char *path, int len, int verbose,
>  	}
>  }
>  
> +extern void (*report_pack_garbage)(const char *path, int len, const char *name);
> +static void real_report_pack_garbage(const char *path, int len, const char *name)
> +{
> +	if (len)
> +		error("garbage found: %.*s/%s", len, path, name);
> +	else
> +		error("garbage found: %s", path);
> +	garbage++;
> +}
> +
>  static char const * const count_objects_usage[] = {
>  	N_("git count-objects [-v]"),
>  	NULL
> @@ -76,7 +88,7 @@ int cmd_count_objects(int argc, const char **argv, const char *prefix)
>  	const char *objdir = get_object_directory();
>  	int len = strlen(objdir);
>  	char *path = xmalloc(len + 50);
> -	unsigned long loose = 0, packed = 0, packed_loose = 0, garbage = 0;
> +	unsigned long loose = 0, packed = 0, packed_loose = 0;
>  	off_t loose_size = 0;
>  	struct option opts[] = {
>  		OPT__VERBOSE(&verbose, N_("be verbose")),
> @@ -87,6 +99,8 @@ int cmd_count_objects(int argc, const char **argv, const char *prefix)
>  	/* we do not take arguments other than flags for now */
>  	if (argc)
>  		usage_with_options(count_objects_usage, opts);
> +	if (verbose)
> +		report_pack_garbage = real_report_pack_garbage;
>  	memcpy(path, objdir, len);
>  	if (len && objdir[len-1] != '/')
>  		path[len++] = '/';
> diff --git a/sha1_file.c b/sha1_file.c
> index 40b2329..5b70e55 100644
> --- a/sha1_file.c
> +++ b/sha1_file.c
> @@ -21,6 +21,7 @@
>  #include "sha1-lookup.h"
>  #include "bulk-checkin.h"
>  #include "streaming.h"
> +#include "dir.h"
>  
>  #ifndef O_NOATIME
>  #if defined(__linux__) && (defined(__i386__) || defined(__PPC__))
> @@ -1000,15 +1001,19 @@ void install_packed_git(struct packed_git *pack)
>  	packed_git = pack;
>  }
>  
> +void (*report_pack_garbage)(const char *path, int len, const char *name);
> +
>  static void prepare_packed_git_one(char *objdir, int local)
>  {
>  	/* Ensure that this buffer is large enough so that we can
>  	   append "/pack/" without clobbering the stack even if
>  	   strlen(objdir) were PATH_MAX.  */
>  	char path[PATH_MAX + 1 + 4 + 1 + 1];
> -	int len;
> +	int i, len;
>  	DIR *dir;
>  	struct dirent *de;
> +	struct packed_git *p;
> +	struct string_list garbage = STRING_LIST_INIT_DUP;
>  
>  	sprintf(path, "%s/pack", objdir);
>  	len = strlen(path);
> @@ -1024,14 +1029,33 @@ static void prepare_packed_git_one(char *objdir, int local)
>  		int namelen = strlen(de->d_name);
>  		struct packed_git *p;
>  
> -		if (!has_extension(de->d_name, ".idx"))
> +		if (len + namelen + 1 > sizeof(path)) {
> +			if (report_pack_garbage)
> +				report_pack_garbage(path, len - 1, de->d_name);
>  			continue;
> +		}
>  
> -		if (len + namelen + 1 > sizeof(path))
> +		strcpy(path + len, de->d_name);
> +
> +		if (!has_extension(de->d_name, ".idx")) {
> +			if (!report_pack_garbage)
> +				continue;
> +			if (is_dot_or_dotdot(de->d_name))
> +				continue;
> +			if (!has_extension(de->d_name, ".pack")) {
> +				report_pack_garbage(path, 0, NULL);
> +				continue;
> +			}

Didn't I already say the logic should be inverted to whitelist the
known ones?  Saying "Anything that is not '.pack' is bad" here is a
direct opposite, I think.  

Add "A '.keep' file is OK" to this codeflow and see how it goes.

> +			/*
> +			 * we can't decide right know if this .pack is
> +			 * garbage. Delay until we identify all good
> +			 * packs.
> +			 */
> +			string_list_append(&garbage, path);
>  			continue;
> +		}
>  
>  		/* Don't reopen a pack we already have. */
> -		strcpy(path + len, de->d_name);
>  		for (p = packed_git; p; p = p->next) {
>  			if (!memcmp(path, p->pack_name, len + namelen - 4))
>  				break;
> @@ -1047,6 +1071,25 @@ static void prepare_packed_git_one(char *objdir, int local)
>  		install_packed_git(p);
>  	}
>  	closedir(dir);
> +
> +	if (!report_pack_garbage)
> +		return;
> +
> +	sort_string_list(&garbage);
> +	for (p = packed_git; p; p = p->next) {
> +		struct string_list_item *item;
> +		if (!p->pack_local)
> +			continue;
> +		item = string_list_lookup(&garbage, p->pack_name);
> +		if (item)
> +			item->util = &garbage; /* anything but NULL */
> +	}
> +	for (i = 0; i < garbage.nr; i++) {
> +		struct string_list_item *item = garbage.items + i;
> +		if (!item->util)
> +			report_pack_garbage(item->string, 0, NULL);
> +	}
> +	string_list_clear(&garbage, 0);
>  }
>  
>  static int sort_pack(const void *a_, const void *b_)
> -- 8< --
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]