Re: [PATCH v3] help: always suggest common-cmds if prefix of cmd

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Erik Faye-Lund <kusmabite@xxxxxxxxx> writes:

> @@ -320,9 +321,16 @@ const char *help_unknown_cmd(const char *cmd)
>  	uniq(&main_cmds);
>  
>  	/* This reuses cmdname->len for similarity index */
> -	for (i = 0; i < main_cmds.cnt; ++i)
> -		main_cmds.names[i]->len =
> +	for (i = 0; i < main_cmds.cnt; ++i) {
> +		main_cmds.names[i]->len = 1 +
>  			levenshtein(cmd, main_cmds.names[i]->name, 0, 2, 1, 4);
> +		for (n = 0; n < ARRAY_SIZE(common_cmds); ++n) {
> +			if (!strcmp(main_cmds.names[i]->name,
> +			    common_cmds[n].name) &&
> +			    !prefixcmp(main_cmds.names[i]->name, cmd))
> +				main_cmds.names[i]->len = 0;
> +		}
> +	}

This is an error codepath so performance would not matter much, but this
is doing it in an unnecessarily slow way, no?  At this point, both arrays
are sorted the same way, so we should be able to walk common_cmds[]
alongside the main_cmds.names[] (see below).

> +	if (n < main_cmds.cnt) {
> +		best_similarity = main_cmds.names[n++]->len;
> +		while (n < main_cmds.cnt &&
> +		       best_similarity == main_cmds.names[n]->len)
> +			++n;
> +	} else
> +		best_similarity = 0;

Think about what does this case _means_... The end user input was so
ambiguous that it prefix matched all the common commands!  Is it really
similar enough?

Note that most of the time main_cmds[] has more than what common_cmds[]
has, and because prefix match is done only against common_cmds[],
"everything is a prefix-match" never happens.  You might want to mark it
as a BUG(), but someday we may change the rules to give 0 to non common
commands with prefix match under some condition, so thinking these rare
corner cases through would defend ourselves from future gotchas.

How about doing it this way instead?  Isn't it more readable?

diff --git a/help.c b/help.c
index 7f4928e..7654f1b 100644
--- a/help.c
+++ b/help.c
@@ -3,6 +3,7 @@
 #include "exec_cmd.h"
 #include "levenshtein.h"
 #include "help.h"
+#include "common-cmds.h"
 
 /* most GUI terminals set COLUMNS (although some don't export it) */
 static int term_columns(void)
@@ -298,7 +299,8 @@ static void add_cmd_list(struct cmdnames *cmds, struct cmdnames *old)
 }
 
 /* An empirically derived magic number */
-#define SIMILAR_ENOUGH(x) ((x) < 6)
+#define SIMILARITY_FLOOR 7
+#define SIMILAR_ENOUGH(x) ((x) < SIMILARITY_FLOOR)
 
 const char *help_unknown_cmd(const char *cmd)
 {
@@ -319,10 +321,28 @@ const char *help_unknown_cmd(const char *cmd)
 	      sizeof(main_cmds.names), cmdname_compare);
 	uniq(&main_cmds);
 
-	/* This reuses cmdname->len for similarity index */
-	for (i = 0; i < main_cmds.cnt; ++i)
+	/* This abuses cmdname->len for levenshtein distance */
+	for (i = 0, n = 0; i < main_cmds.cnt; i++) {
+		int cmp = 0; /* avoid compiler stupidity */
+		const char *candidate = main_cmds.names[i]->name;
+
+		/* Does the candidate appear in common_cmds list? */
+		while (n < ARRAY_SIZE(common_cmds) &&
+		       (cmp = strcmp(common_cmds[n].name, candidate)) < 0)
+			n++;
+		if ((n < ARRAY_SIZE(common_cmds)) && !cmp) {
+			/* Yes, this is one of the common commands */
+			n++; /* use the entry from common_cmds[] */
+			if (!prefixcmp(candidate, cmd)) {
+				/* Give prefix match a very good score */
+				main_cmds.names[i]->len = 0;
+				continue;
+			}
+		}
+
 		main_cmds.names[i]->len =
-			levenshtein(cmd, main_cmds.names[i]->name, 0, 2, 1, 4);
+			levenshtein(cmd, candidate, 0, 2, 1, 4) + 1;
+	}
 
 	qsort(main_cmds.names, main_cmds.cnt,
 	      sizeof(*main_cmds.names), levenshtein_compare);
@@ -330,10 +350,21 @@ const char *help_unknown_cmd(const char *cmd)
 	if (!main_cmds.cnt)
 		die ("Uh oh. Your system reports no Git commands at all.");
 
-	best_similarity = main_cmds.names[0]->len;
-	n = 1;
-	while (n < main_cmds.cnt && best_similarity == main_cmds.names[n]->len)
-		++n;
+	/* skip and count prefix matches */
+	for (n = 0; n < main_cmds.cnt && !main_cmds.names[n]->len; n++)
+		; /* still counting */
+
+	if (main_cmds.cnt <= n) {
+		/* prefix matches with everything? that is too ambiguous */
+		best_similarity = SIMILARITY_FLOOR + 1;
+	} else {
+		/* count all the most similar ones */
+		for (best_similarity = main_cmds.names[n++]->len;
+		     (n < main_cmds.cnt &&
+		      best_similarity == main_cmds.names[n]->len);
+		     n++)
+			; /* still counting */
+	}
 	if (autocorrect && n == 1 && SIMILAR_ENOUGH(best_similarity)) {
 		const char *assumed = main_cmds.names[0]->name;
 		main_cmds.names[0] = NULL;
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]