Erik Faye-Lund <kusmabite@xxxxxxxxx> writes: > @@ -320,9 +321,16 @@ const char *help_unknown_cmd(const char *cmd) > uniq(&main_cmds); > > /* This reuses cmdname->len for similarity index */ > - for (i = 0; i < main_cmds.cnt; ++i) > - main_cmds.names[i]->len = > + for (i = 0; i < main_cmds.cnt; ++i) { > + main_cmds.names[i]->len = 1 + > levenshtein(cmd, main_cmds.names[i]->name, 0, 2, 1, 4); > + for (n = 0; n < ARRAY_SIZE(common_cmds); ++n) { > + if (!strcmp(main_cmds.names[i]->name, > + common_cmds[n].name) && > + !prefixcmp(main_cmds.names[i]->name, cmd)) > + main_cmds.names[i]->len = 0; > + } > + } This is an error codepath so performance would not matter much, but this is doing it in an unnecessarily slow way, no? At this point, both arrays are sorted the same way, so we should be able to walk common_cmds[] alongside the main_cmds.names[] (see below). > + if (n < main_cmds.cnt) { > + best_similarity = main_cmds.names[n++]->len; > + while (n < main_cmds.cnt && > + best_similarity == main_cmds.names[n]->len) > + ++n; > + } else > + best_similarity = 0; Think about what does this case _means_... The end user input was so ambiguous that it prefix matched all the common commands! Is it really similar enough? Note that most of the time main_cmds[] has more than what common_cmds[] has, and because prefix match is done only against common_cmds[], "everything is a prefix-match" never happens. You might want to mark it as a BUG(), but someday we may change the rules to give 0 to non common commands with prefix match under some condition, so thinking these rare corner cases through would defend ourselves from future gotchas. How about doing it this way instead? Isn't it more readable? diff --git a/help.c b/help.c index 7f4928e..7654f1b 100644 --- a/help.c +++ b/help.c @@ -3,6 +3,7 @@ #include "exec_cmd.h" #include "levenshtein.h" #include "help.h" +#include "common-cmds.h" /* most GUI terminals set COLUMNS (although some don't export it) */ static int term_columns(void) @@ -298,7 +299,8 @@ static void add_cmd_list(struct cmdnames *cmds, struct cmdnames *old) } /* An empirically derived magic number */ -#define SIMILAR_ENOUGH(x) ((x) < 6) +#define SIMILARITY_FLOOR 7 +#define SIMILAR_ENOUGH(x) ((x) < SIMILARITY_FLOOR) const char *help_unknown_cmd(const char *cmd) { @@ -319,10 +321,28 @@ const char *help_unknown_cmd(const char *cmd) sizeof(main_cmds.names), cmdname_compare); uniq(&main_cmds); - /* This reuses cmdname->len for similarity index */ - for (i = 0; i < main_cmds.cnt; ++i) + /* This abuses cmdname->len for levenshtein distance */ + for (i = 0, n = 0; i < main_cmds.cnt; i++) { + int cmp = 0; /* avoid compiler stupidity */ + const char *candidate = main_cmds.names[i]->name; + + /* Does the candidate appear in common_cmds list? */ + while (n < ARRAY_SIZE(common_cmds) && + (cmp = strcmp(common_cmds[n].name, candidate)) < 0) + n++; + if ((n < ARRAY_SIZE(common_cmds)) && !cmp) { + /* Yes, this is one of the common commands */ + n++; /* use the entry from common_cmds[] */ + if (!prefixcmp(candidate, cmd)) { + /* Give prefix match a very good score */ + main_cmds.names[i]->len = 0; + continue; + } + } + main_cmds.names[i]->len = - levenshtein(cmd, main_cmds.names[i]->name, 0, 2, 1, 4); + levenshtein(cmd, candidate, 0, 2, 1, 4) + 1; + } qsort(main_cmds.names, main_cmds.cnt, sizeof(*main_cmds.names), levenshtein_compare); @@ -330,10 +350,21 @@ const char *help_unknown_cmd(const char *cmd) if (!main_cmds.cnt) die ("Uh oh. Your system reports no Git commands at all."); - best_similarity = main_cmds.names[0]->len; - n = 1; - while (n < main_cmds.cnt && best_similarity == main_cmds.names[n]->len) - ++n; + /* skip and count prefix matches */ + for (n = 0; n < main_cmds.cnt && !main_cmds.names[n]->len; n++) + ; /* still counting */ + + if (main_cmds.cnt <= n) { + /* prefix matches with everything? that is too ambiguous */ + best_similarity = SIMILARITY_FLOOR + 1; + } else { + /* count all the most similar ones */ + for (best_similarity = main_cmds.names[n++]->len; + (n < main_cmds.cnt && + best_similarity == main_cmds.names[n]->len); + n++) + ; /* still counting */ + } if (autocorrect && n == 1 && SIMILAR_ENOUGH(best_similarity)) { const char *assumed = main_cmds.names[0]->name; main_cmds.names[0] = NULL; -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html