This patch introduces a modified Damerau-Levenshtein algorithm into Git's code base, and uses it with the following penalties to show some similar commands when an unknown command was encountered: swap = 0, insertion = 1, substitution = 2, deletion = 4 A typical output would now look like this: $ git reabse git: 'reabse' is not a git-command. See 'git --help'. Did you mean one of these? rebase merge-base rev-parse remote rerere Signed-off-by: Johannes Schindelin <johannes.schindelin@xxxxxx> --- This is just a toy, but might be useful to other people. Makefile | 2 ++ help.c | 43 +++++++++++++++++++++++++++++++++++++++++++ levenshtein.c | 47 +++++++++++++++++++++++++++++++++++++++++++++++ levenshtein.h | 8 ++++++++ 4 files changed, 100 insertions(+), 0 deletions(-) create mode 100644 levenshtein.c create mode 100644 levenshtein.h diff --git a/Makefile b/Makefile index cce5a6e..df48af2 100644 --- a/Makefile +++ b/Makefile @@ -376,6 +376,7 @@ LIB_H += tree-walk.h LIB_H += unpack-trees.h LIB_H += utf8.h LIB_H += wt-status.h +LIB_H += levenshtein.h LIB_OBJS += alias.o LIB_OBJS += alloc.o @@ -471,6 +472,7 @@ LIB_OBJS += write_or_die.o LIB_OBJS += ws.o LIB_OBJS += wt-status.o LIB_OBJS += xdiff-interface.o +LIB_OBJS += levenshtein.o BUILTIN_OBJS += builtin-add.o BUILTIN_OBJS += builtin-annotate.o diff --git a/help.c b/help.c index d89d437..ac29225 100644 --- a/help.c +++ b/help.c @@ -9,6 +9,7 @@ #include "common-cmds.h" #include "parse-options.h" #include "run-command.h" +#include "levenshtein.h" static struct man_viewer_list { struct man_viewer_list *next; @@ -623,9 +624,51 @@ static void show_html_page(const char *git_cmd) execl_git_cmd("web--browse", "-c", "help.browser", page_path.buf, NULL); } +static const char *levenshtein_cmd; +static int similarity(const char *cmd) { + return levenshtein(levenshtein_cmd, cmd, 0, 2, 1, 4); +} + +static int levenshtein_compare(const void *p1, const void *p2) +{ + const struct cmdname *const *c1 = p1, *const *c2 = p2; + const char *s1 = (*c1)->name, *s2 = (*c2)->name; + int l1 = similarity(s1); + int l2 = similarity(s2); + return l1 != l2 ? l1 - l2 : strcmp(s1, s2); +} + void help_unknown_cmd(const char *cmd) { + int i, header_shown = 0; + fprintf(stderr, "git: '%s' is not a git-command. See 'git --help'.\n", cmd); + + load_command_list(); + ALLOC_GROW(main_cmds.names, main_cmds.cnt + other_cmds.cnt, + main_cmds.alloc); + memcpy(main_cmds.names + main_cmds.cnt, other_cmds.names, + other_cmds.cnt * sizeof(other_cmds.names[0])); + main_cmds.cnt += other_cmds.cnt; + + levenshtein_cmd = cmd; + qsort(main_cmds.names, main_cmds.cnt, + sizeof(*main_cmds.names), levenshtein_compare); + + for (i = 0; i < main_cmds.cnt; i++) { + int s = similarity(main_cmds.names[i]->name); + if (s > 6) + break; + if (!i) { + fprintf(stderr, "\nDid you mean %s?\n", + main_cmds.cnt < 2 || + similarity(main_cmds.names[1]->name) > 6 ? + "this" : "one of these"); + header_shown = 1; + } + fprintf(stderr, "\t%s\n", main_cmds.names[i]->name); + } + exit(1); } diff --git a/levenshtein.c b/levenshtein.c new file mode 100644 index 0000000..db52f2c --- /dev/null +++ b/levenshtein.c @@ -0,0 +1,47 @@ +#include "cache.h" +#include "levenshtein.h" + +int levenshtein(const char *string1, const char *string2, + int w, int s, int a, int d) +{ + int len1 = strlen(string1), len2 = strlen(string2); + int *row0 = xmalloc(sizeof(int) * (len2 + 1)); + int *row1 = xmalloc(sizeof(int) * (len2 + 1)); + int *row2 = xmalloc(sizeof(int) * (len2 + 1)); + int i, j; + + for (j = 0; j <= len2; j++) + row1[j] = j * a; + for (i = 0; i < len1; i++) { + int *dummy; + + row2[0] = (i + 1) * d; + for (j = 0; j < len2; j++) { + /* substitution */ + row2[j + 1] = row1[j] + s * (string1[i] != string2[j]); + /* swap */ + if (i > 0 && j > 0 && string1[i - 1] == string2[j] && + string1[i] == string2[j - 1] && + row2[j + 1] > row0[j - 1] + w) + row2[j + 1] = row0[j - 1] + w; + /* deletion */ + if (j + 1 < len2 && row2[j + 1] > row1[j + 1] + d) + row2[j + 1] = row1[j + 1] + d; + /* insertion */ + if (row2[j + 1] > row2[j] + a) + row2[j + 1] = row2[j] + a; + } + + dummy = row0; + row0 = row1; + row1 = row2; + row2 = dummy; + } + + i = row1[len2]; + free(row0); + free(row1); + free(row2); + + return i; +} diff --git a/levenshtein.h b/levenshtein.h new file mode 100644 index 0000000..0173abe --- /dev/null +++ b/levenshtein.h @@ -0,0 +1,8 @@ +#ifndef LEVENSHTEIN_H +#define LEVENSHTEIN_H + +int levenshtein(const char *string1, const char *string2, + int swap_penalty, int substition_penalty, + int insertion_penalty, int deletion_penalty); + +#endif -- 1.5.6.rc1.167.gce972 -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html