RFC: "negative" dirstat

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Ok, so this is just an RFC patch, but the concept is pretty simple..

In the kernel, the ARM platform is growing boundlessly, with platform
files being added for every random SoC out there. One of the things
that Russell King (arm maintainer) has worried about is that when they
try to clean stuff up, code *removal* also ends up adding to the
damage, so if ARM ever gets its act together and is able to
consolidate a lot of this, it's still going to look very bad in the
statistics because there will be a lot of damage due to removed files.

In the regular "--diffstat" output, this is all very obvious, because
if you actually remove more lines than you add, it will say so, and
people will be very happy.

But in --dirstat, removed lines are always counted towards damage.

So here's a total hacky RFC patch to add a "--negative" option, which
allows for dirstat to actually take the amount of added/removed code
into account, and make "damage" a signed integer instead.

Example case right now for my pulls today:

[torvalds@i5 linux]$ git diff --dirstat=1 --cumulative @{6am}..
  22.6% Documentation/input/
   1.4% arch/powerpc/include/asm/
   3.7% arch/powerpc/kernel/
   6.6% arch/powerpc/
   6.3% block/
   6.0% drivers/input/
  15.7% drivers/md/
  22.1% drivers/
  38.4% fs/btrfs/
  39.1% fs/
   2.7% include/linux/
[torvalds@i5 linux]$ git diff --dirstat=1 --cumulative --negative @{6am}..
  56.2% Documentation/input/
   1.9% arch/powerpc/include/asm/
   4.3% arch/powerpc/kernel/
   1.3% arch/powerpc/platforms/pseries/
   1.4% arch/powerpc/platforms/
   8.4% arch/powerpc/
   2.4% block/
   1.6% drivers/input/misc/
   8.4% drivers/input/
  -5.6% drivers/md/
   2.7% drivers/
  28.5% fs/btrfs/
  29.0% fs/

ie note how with "--negative", it becomes obvious that drivers/md
actually removed more than it added, while some subdirectories were
all about adding (Documentation/input got a new file), while others
were more about changing existing lines with not a lot of additional
actual new code.

The diffstat for drivers/md looks like this:

 7 files changed, 103 insertions(+), 137 deletions(-)

and I think most of the insertions were shorter lines than the deletions too.

NOTE! This is known-buggy in that you may end up in a situation where
the percentages are > 100% (the "total change" may be arbitrarily
small, since they all add up). In fact, you might get a
division-by-zero if the total change ends up being zero. This is a RFC
patch, nothing more. I don't know what the "right" solution for the
percentages should be (except that it obviously should never cause a
divide-by-zero).

Comments?

                        Linus "yeah, that option name sucks" Torvalds
 diff.c |   26 +++++++++++++++++---------
 diff.h |    1 +
 2 files changed, 18 insertions(+), 9 deletions(-)

diff --git a/diff.c b/diff.c
index 9fa841010cc2..c93fdaaa541f 100644
--- a/diff.c
+++ b/diff.c
@@ -1447,7 +1447,7 @@ static void show_numstat(struct diffstat_t *data, struct diff_options *options)
 
 struct dirstat_file {
 	const char *name;
-	unsigned long changed;
+	long changed;
 };
 
 struct dirstat_dir {
@@ -1458,7 +1458,7 @@ struct dirstat_dir {
 static long gather_dirstat(struct diff_options *opt, struct dirstat_dir *dir,
 		unsigned long changed, const char *base, int baselen)
 {
-	unsigned long this_dir = 0;
+	long this_dir = 0;
 	unsigned int sources = 0;
 	const char *line_prefix = "";
 	struct strbuf *msg = NULL;
@@ -1499,12 +1499,14 @@ static long gather_dirstat(struct diff_options *opt, struct dirstat_dir *dir,
 	 *    under this directory (sources == 1).
 	 */
 	if (baselen && sources != 1) {
-		int permille = this_dir * 1000 / changed;
+		int permille = labs(this_dir) * 1000 / changed;
 		if (permille) {
-			int percent = permille / 10;
+			double percent = permille / 10.0;
 			if (percent >= dir->percent) {
-				fprintf(opt->file, "%s%4d.%01d%% %.*s\n", line_prefix,
-					percent, permille % 10, baselen, base);
+				if (this_dir < 0)
+					percent = -percent;
+				fprintf(opt->file, "%s%6.1f%% %.*s\n", line_prefix,
+					percent, baselen, base);
 				if (!dir->cumulative)
 					return 0;
 			}
@@ -1537,7 +1539,8 @@ static void show_dirstat(struct diff_options *options)
 	for (i = 0; i < q->nr; i++) {
 		struct diff_filepair *p = q->queue[i];
 		const char *name;
-		unsigned long copied, added, damage;
+		unsigned long copied, added, deleted;
+		long damage;
 
 		name = p->one->path ? p->one->path : p->two->path;
 
@@ -1567,7 +1570,11 @@ static void show_dirstat(struct diff_options *options)
 		 * damaged files, not damaged lines. This is done by
 		 * counting only a single damaged line per file.
 		 */
-		damage = (p->one->size - copied) + added;
+		deleted = p->one->size - copied;
+		if (DIFF_OPT_TST(options, DIFFSTAT_NEGATIVE))
+			damage = added - deleted;
+		else
+			damage = added + deleted;
 		if (DIFF_OPT_TST(options, DIRSTAT_BY_FILE) && damage > 0)
 			damage = 1;
 
@@ -3139,7 +3146,8 @@ int diff_opt_parse(struct diff_options *options, const char **av, int ac)
 			   &options->dirstat_percent)) {
 		options->output_format |= DIFF_FORMAT_DIRSTAT;
 		DIFF_OPT_SET(options, DIRSTAT_BY_FILE);
-	}
+	} else if (!strcmp(arg, "--negative"))
+		DIFF_OPT_SET(options, DIFFSTAT_NEGATIVE);
 	else if (!strcmp(arg, "--check"))
 		options->output_format |= DIFF_FORMAT_CHECKDIFF;
 	else if (!strcmp(arg, "--summary"))
diff --git a/diff.h b/diff.h
index 007a0554d4b2..95d6e65247ae 100644
--- a/diff.h
+++ b/diff.h
@@ -78,6 +78,7 @@ typedef struct strbuf *(*diff_prefix_fn_t)(struct diff_options *opt, void *data)
 #define DIFF_OPT_IGNORE_UNTRACKED_IN_SUBMODULES (1 << 25)
 #define DIFF_OPT_IGNORE_DIRTY_SUBMODULES (1 << 26)
 #define DIFF_OPT_OVERRIDE_SUBMODULE_CONFIG (1 << 27)
+#define DIFF_OPT_DIFFSTAT_NEGATIVE   (1 << 28)
 
 #define DIFF_OPT_TST(opts, flag)    ((opts)->flags & DIFF_OPT_##flag)
 #define DIFF_OPT_SET(opts, flag)    ((opts)->flags |= DIFF_OPT_##flag)

[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]