Recently and not so recently, we made sure that log/grep type operations use textconv filters when a userfacing diff would do the same: ef90ab6 (pickaxe: use textconv for -S counting, 2012-10-28) b1c2f57 (diff_grep: use textconv buffers for add/deleted files, 2012-10-28) 0508fe5 (combine-diff: respect textconv attributes, 2011-05-23) "git grep" currently does not use textconv filters at all, that is neither for displaying the match and context nor for the actual grepping. Introduce a binary mode "--textconv" (in addition to "--text" and "-I") which makes git grep use any configured textconv filters for grepping and output purposes. Signed-off-by: Michael J Gruber <git@xxxxxxxxxxxxxxxxxxxx> --- Notes: I'm somehow stuck in textconv/filespec/... hell, so I'm sending this out in request for help. I'm sure there are people for whom it's a breeze to get this right. The difficulty is in getting the different cases (blob/sha1 vs. worktree) right, and in making the changes minimally invasive. It seems that some more refactoring could help: "git show --textconv" does not use textconv filters when used on blobs either. (It does for diffs, of course.) Most existing helper functions are tailored for diffs. Nota bene: --textconv does not affect "diff --stat" either... builtin/grep.c | 5 ++++- grep.c | 47 +++++++++++++++++++++++++++++------------------ grep.h | 3 ++- 3 files changed, 35 insertions(+), 20 deletions(-) diff --git a/builtin/grep.c b/builtin/grep.c index 8025964..2181c22 100644 --- a/builtin/grep.c +++ b/builtin/grep.c @@ -96,7 +96,7 @@ static void add_work(struct grep_opt *opt, enum grep_source_type type, grep_source_init(&todo[todo_end].source, type, name, path, id); if (opt->binary != GREP_BINARY_TEXT) - grep_source_load_driver(&todo[todo_end].source); + grep_source_load_driver(&todo[todo_end].source, opt); todo[todo_end].done = 0; strbuf_reset(&todo[todo_end].out); todo_end = (todo_end + 1) % ARRAY_SIZE(todo); @@ -659,6 +659,9 @@ int cmd_grep(int argc, const char **argv, const char *prefix) OPT_SET_INT('I', NULL, &opt.binary, N_("don't match patterns in binary files"), GREP_BINARY_NOMATCH), + OPT_SET_INT(0, "textconv", &opt.binary, + N_("process binary files with textconv filters"), + GREP_BINARY_TEXTCONV), { OPTION_INTEGER, 0, "max-depth", &opt.max_depth, N_("depth"), N_("descend at most <depth> levels"), PARSE_OPT_NONEG, NULL, 1 }, diff --git a/grep.c b/grep.c index 4bd1b8b..410b7b8 100644 --- a/grep.c +++ b/grep.c @@ -1,10 +1,12 @@ #include "cache.h" +#include "diff.h" +#include "diffcore.h" #include "grep.h" #include "userdiff.h" #include "xdiff-interface.h" -static int grep_source_load(struct grep_source *gs); -static int grep_source_is_binary(struct grep_source *gs); +static int grep_source_load(struct grep_source *gs, struct grep_opt *opt); +static int grep_source_is_binary(struct grep_source *gs, struct grep_opt *opt); static struct grep_opt grep_defaults; @@ -1174,7 +1176,7 @@ static int match_funcname(struct grep_opt *opt, struct grep_source *gs, char *bo { xdemitconf_t *xecfg = opt->priv; if (xecfg && !xecfg->find_func) { - grep_source_load_driver(gs); + grep_source_load_driver(gs, opt); if (gs->driver->funcname.pattern) { const struct userdiff_funcname *pe = &gs->driver->funcname; xdiff_set_find_func(xecfg, pe->pattern, pe->cflags); @@ -1354,14 +1356,15 @@ static int grep_source_1(struct grep_opt *opt, struct grep_source *gs, int colle switch (opt->binary) { case GREP_BINARY_DEFAULT: - if (grep_source_is_binary(gs)) + if (grep_source_is_binary(gs, opt)) binary_match_only = 1; break; case GREP_BINARY_NOMATCH: - if (grep_source_is_binary(gs)) + if (grep_source_is_binary(gs, opt)) return 0; /* Assume unmatch */ break; case GREP_BINARY_TEXT: + case GREP_BINARY_TEXTCONV: break; default: die("bug: unknown binary handling mode"); @@ -1372,7 +1375,7 @@ static int grep_source_1(struct grep_opt *opt, struct grep_source *gs, int colle try_lookahead = should_lookahead(opt); - if (grep_source_load(gs) < 0) + if (grep_source_load(gs, opt) < 0) return 0; bol = gs->buf; @@ -1610,12 +1613,17 @@ void grep_source_clear_data(struct grep_source *gs) } } -static int grep_source_load_sha1(struct grep_source *gs) +static int grep_source_load_sha1(struct grep_source *gs, struct grep_opt *opt) { enum object_type type; - grep_read_lock(); - gs->buf = read_sha1_file(gs->identifier, &type, &gs->size); + if (opt->binary == GREP_BINARY_TEXTCONV) { + struct diff_filespec *df = alloc_filespec(gs->name); + gs->size = fill_textconv(gs->driver, df, &gs->buf); + free_filespec(df); + } else { + gs->buf = read_sha1_file(gs->identifier, &type, &gs->size); + } grep_read_unlock(); if (!gs->buf) @@ -1625,7 +1633,7 @@ static int grep_source_load_sha1(struct grep_source *gs) return 0; } -static int grep_source_load_file(struct grep_source *gs) +static int grep_source_load_file(struct grep_source *gs, struct grep_opt *opt) { const char *filename = gs->identifier; struct stat st; @@ -1660,42 +1668,45 @@ static int grep_source_load_file(struct grep_source *gs) return 0; } -static int grep_source_load(struct grep_source *gs) +static int grep_source_load(struct grep_source *gs, struct grep_opt *opt) { if (gs->buf) return 0; switch (gs->type) { case GREP_SOURCE_FILE: - return grep_source_load_file(gs); + return grep_source_load_file(gs, opt); case GREP_SOURCE_SHA1: - return grep_source_load_sha1(gs); + return grep_source_load_sha1(gs, opt); case GREP_SOURCE_BUF: return gs->buf ? 0 : -1; } die("BUG: invalid grep_source type"); } -void grep_source_load_driver(struct grep_source *gs) +void grep_source_load_driver(struct grep_source *gs, struct grep_opt *opt) { if (gs->driver) return; - grep_attr_lock(); + grep_attr_lock(); //TODO + printf("Looking up userdiff driver for: %s", gs->path); if (gs->path) gs->driver = userdiff_find_by_path(gs->path); if (!gs->driver) gs->driver = userdiff_find_by_name("default"); + if (opt->binary == GREP_BINARY_TEXTCONV) + gs->driver = userdiff_get_textconv(gs->driver); grep_attr_unlock(); } -static int grep_source_is_binary(struct grep_source *gs) +static int grep_source_is_binary(struct grep_source *gs, struct grep_opt *opt) { - grep_source_load_driver(gs); + grep_source_load_driver(gs, opt); if (gs->driver->binary != -1) return gs->driver->binary; - if (!grep_source_load(gs)) + if (!grep_source_load(gs, opt)) return buffer_is_binary(gs->buf, gs->size); return 0; diff --git a/grep.h b/grep.h index 8fc854f..d272d25 100644 --- a/grep.h +++ b/grep.h @@ -105,6 +105,7 @@ struct grep_opt { #define GREP_BINARY_DEFAULT 0 #define GREP_BINARY_NOMATCH 1 #define GREP_BINARY_TEXT 2 +#define GREP_BINARY_TEXTCONV 3 int binary; int extended; int use_reflog_filter; @@ -173,7 +174,7 @@ void grep_source_init(struct grep_source *gs, enum grep_source_type type, const void *identifier); void grep_source_clear_data(struct grep_source *gs); void grep_source_clear(struct grep_source *gs); -void grep_source_load_driver(struct grep_source *gs); +void grep_source_load_driver(struct grep_source *gs, struct grep_opt *opt); int grep_source(struct grep_opt *opt, struct grep_source *gs); -- 1.8.1.2.718.g9d378fc -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html