On Tue, Oct 09, 2012 at 07:01:44PM +0700, Nguyen Thai Ngoc Duy wrote: > On Tue, Oct 09, 2012 at 04:38:32PM +0700, Nguyen Thai Ngoc Duy wrote: > > #5 0x0815e736 in userdiff_find_by_path (path=0x820e7f0 > > "HEAD:Documentation/.gitattributes") at userdiff.c:278 > > #6 0x081058ca in grep_source_load_driver (gs=0xbfffd978) at grep.c:1504 > > A bandage patch may look like this. But it does not solve the real > problem: > > - we should be able to look up in-tree .gitattributes, not just > ignore like this patch does > > - gs->name seems to be prepared for human consumption, not for > accessing files. grep_file() with opt->relative is true can put > quotes in gs->name, for example. Right. For the second, you would probably want gs->identifier in the case of GREP_SOURCE_FILE. But that identifier information is not available at all for GREP_SOURCE_SHA1, which is what is breaking the first point. > I feel like we should make this function a callback and let git-grep > deal with driver loading itself. I think we just need to have callers of grep_source_init provide us with the actual pathname (or NULL, in the case of GREP_SOURCE_BUF). That is where the information is lost. Like this incomplete and untested patch, which should fix the quoting problem for GREP_SOURCE_FILE, but leave the sha1 bits broken (see the in-code comment). I'm traveling this week, so I doubt I'll have time to look for a few more days. If you want to work on it, please do. diff --git a/builtin/grep.c b/builtin/grep.c index 82530a6..be602dd 100644 --- a/builtin/grep.c +++ b/builtin/grep.c @@ -86,7 +86,7 @@ static void add_work(struct grep_opt *opt, enum grep_source_type type, static int skip_first_line; static void add_work(struct grep_opt *opt, enum grep_source_type type, - const char *name, const void *id) + const char *name, const char *path, const void *id) { grep_lock(); @@ -94,7 +94,7 @@ static void add_work(struct grep_opt *opt, enum grep_source_type type, pthread_cond_wait(&cond_write, &grep_mutex); } - grep_source_init(&todo[todo_end].source, type, name, id); + grep_source_init(&todo[todo_end].source, type, name, path, id); if (opt->binary != GREP_BINARY_TEXT) grep_source_load_driver(&todo[todo_end].source); todo[todo_end].done = 0; @@ -378,14 +378,21 @@ static int grep_sha1(struct grep_opt *opt, const unsigned char *sha1, if (opt->relative && opt->prefix_length) { quote_path_relative(filename + tree_name_len, -1, &pathbuf, opt->prefix); + /* XXX Why do we insert here instead of just putting it in + * first? */ strbuf_insert(&pathbuf, 0, filename, tree_name_len); } else { strbuf_addstr(&pathbuf, filename); } + /* XXX We seem to get all kinds of junk via the filename field here, + * including partial filenames, sha1:path, etc. We could parse it + * ourselves, but that is probably insanity. We should ask the + * caller to break it down more for us. For now, just pass NULL. */ + #ifndef NO_PTHREADS if (use_threads) { - add_work(opt, GREP_SOURCE_SHA1, pathbuf.buf, sha1); + add_work(opt, GREP_SOURCE_SHA1, pathbuf.buf, NULL, sha1); strbuf_release(&pathbuf); return 0; } else @@ -394,7 +401,7 @@ static int grep_sha1(struct grep_opt *opt, const unsigned char *sha1, struct grep_source gs; int hit; - grep_source_init(&gs, GREP_SOURCE_SHA1, pathbuf.buf, sha1); + grep_source_init(&gs, GREP_SOURCE_SHA1, pathbuf.buf, NULL, sha1); strbuf_release(&pathbuf); hit = grep_source(opt, &gs); @@ -414,7 +421,7 @@ static int grep_file(struct grep_opt *opt, const char *filename) #ifndef NO_PTHREADS if (use_threads) { - add_work(opt, GREP_SOURCE_FILE, buf.buf, filename); + add_work(opt, GREP_SOURCE_FILE, buf.buf, filename, filename); strbuf_release(&buf); return 0; } else @@ -423,7 +430,7 @@ static int grep_file(struct grep_opt *opt, const char *filename) struct grep_source gs; int hit; - grep_source_init(&gs, GREP_SOURCE_FILE, buf.buf, filename); + grep_source_init(&gs, GREP_SOURCE_FILE, buf.buf, filename, filename); strbuf_release(&buf); hit = grep_source(opt, &gs); diff --git a/grep.c b/grep.c index edc7776..06bc1c6 100644 --- a/grep.c +++ b/grep.c @@ -1373,7 +1373,7 @@ int grep_buffer(struct grep_opt *opt, char *buf, unsigned long size) struct grep_source gs; int r; - grep_source_init(&gs, GREP_SOURCE_BUF, NULL, NULL); + grep_source_init(&gs, GREP_SOURCE_BUF, NULL, NULL, NULL); gs.buf = buf; gs.size = size; @@ -1384,10 +1384,12 @@ void grep_source_init(struct grep_source *gs, enum grep_source_type type, } void grep_source_init(struct grep_source *gs, enum grep_source_type type, - const char *name, const void *identifier) + const char *name, const char *path, + const void *identifier) { gs->type = type; gs->name = name ? xstrdup(name) : NULL; + gs->path = path ? xstrdup(path) : NULL; gs->buf = NULL; gs->size = 0; gs->driver = NULL; @@ -1409,6 +1411,8 @@ void grep_source_clear(struct grep_source *gs) { free(gs->name); gs->name = NULL; + free(gs->path); + gs->path = NULL; free(gs->identifier); gs->identifier = NULL; grep_source_clear_data(gs); diff --git a/grep.h b/grep.h index c256ac6..c2cf57b 100644 --- a/grep.h +++ b/grep.h @@ -158,11 +158,13 @@ void grep_source_init(struct grep_source *gs, enum grep_source_type type, char *buf; unsigned long size; + char *path; /* for attribute lookups */ struct userdiff_driver *driver; }; void grep_source_init(struct grep_source *gs, enum grep_source_type type, - const char *name, const void *identifier); + const char *name, const char *path, + const void *identifier); void grep_source_clear_data(struct grep_source *gs); void grep_source_clear(struct grep_source *gs); void grep_source_load_driver(struct grep_source *gs); -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html