On Mon, 22 Oct 2007, Linus Torvalds wrote: > > Ok, as some people notices, there were a few bugs in the previous patch. I > didn't free the hashes correctly (stupid) and the Makefile had "hash.o" > instead of "hash.h". Ok, there were still more bugs, and before you get too involved with this last patch (not that I've seen any comments yet), apply this appended patch to actually fix things a bit more first! Yes, I'm a moron. I hadn't even bothered to run the test-suite on it, and that showed several silly problems. One of the problems was that since the rename detection copied the diffspecs around, the "rename_used" count couldn't work right, because things got copied around and the count stayed with one diffspec, but not the other.. In the kernel, we have a rule that says that any data structure that isn't ref-counted is basically a bug, and that was true here too. Instead of copying and splitting the diffspecs, just refcount them and keep track of how many users there are. While the above bug was a somewhat subtle issue from me trying to be clever in avoiding the O(n*m) file copy/rename reuse issue, there were a few issues that were me just being totally braindead: the exact rename detection had lost the code that took file modes into account, so it would generate "renames" from regular files to symlinks, that the generic diff core layer would just split up again. And even more stupidly, I had matched up the src/dst things when finding the rename, which just complicated things (added a totally unnecessary need to keep track of a destination being used more than once) and also broke the basename matching comparison. Duh. So here's an incremental patch on top of the previous failed try. And if somebody is confused (and that might be me) and cannot get things to apply, just holler and I'll send the whole thing again. I might even try to clean up the series a bit and do it in stages. This patch shouldn't change any performance behaviour (well, it might speed things up a bit to not allocate those diffspec structures, but it's unlikely that is even measurable). It just fixes stuff. I'm sure there's more to come.. Linus --- diff.c | 17 ++++++++++++----- diffcore-rename.c | 40 ++++++++++++++++++++++------------------ diffcore.h | 2 ++ 3 files changed, 36 insertions(+), 23 deletions(-) diff --git a/diff.c b/diff.c index e892030..2e74cb3 100644 --- a/diff.c +++ b/diff.c @@ -1440,9 +1440,18 @@ struct diff_filespec *alloc_filespec(const char *path) memset(spec, 0, sizeof(*spec)); spec->path = (char *)(spec + 1); memcpy(spec->path, path, namelen+1); + spec->count = 1; return spec; } +void free_filespec(struct diff_filespec *spec) +{ + if (!--spec->count) { + diff_free_filespec_data(spec); + free(spec); + } +} + void fill_filespec(struct diff_filespec *spec, const unsigned char *sha1, unsigned short mode) { @@ -2431,10 +2440,8 @@ struct diff_filepair *diff_queue(struct diff_queue_struct *queue, void diff_free_filepair(struct diff_filepair *p) { - diff_free_filespec_data(p->one); - diff_free_filespec_data(p->two); - free(p->one); - free(p->two); + free_filespec(p->one); + free_filespec(p->two); free(p); } @@ -2588,7 +2595,7 @@ void diff_debug_filepair(const struct diff_filepair *p, int i) diff_debug_filespec(p->two, i, "two"); fprintf(stderr, "score %d, status %c rename_used %d broken %d\n", p->score, p->status ? p->status : '?', - p->rename_used, p->broken_pair); + p->one->rename_used, p->broken_pair); } void diff_debug_queue(const char *msg, struct diff_queue_struct *q) diff --git a/diffcore-rename.c b/diffcore-rename.c index cc105db..3946932 100644 --- a/diffcore-rename.c +++ b/diffcore-rename.c @@ -184,7 +184,7 @@ static int estimate_similarity(struct diff_filespec *src, static void record_rename_pair(int dst_index, int src_index, int score) { - struct diff_filespec *one, *two, *src, *dst; + struct diff_filespec *src, *dst; struct diff_filepair *dp; if (rename_dst[dst_index].pair) @@ -192,14 +192,12 @@ static void record_rename_pair(int dst_index, int src_index, int score) src = rename_src[src_index].one; src->rename_used++; - one = alloc_filespec(src->path); - fill_filespec(one, src->sha1, src->mode); + src->count++; dst = rename_dst[dst_index].two; - two = alloc_filespec(dst->path); - fill_filespec(two, dst->sha1, dst->mode); + dst->count++; - dp = diff_queue(NULL, one, two); + dp = diff_queue(NULL, src, dst); dp->renamed_pair = 1; if (!strcmp(src->path, dst->path)) dp->score = rename_src[src_index].score; @@ -232,21 +230,30 @@ static int find_identical_files(struct file_similarity *src, struct file_similarity *dst) { int renames = 0; + + /* + * Walk over all the destinations ... + */ do { - struct diff_filespec *one = src->filespec; + struct diff_filespec *one = dst->filespec; struct file_similarity *p, *best; int i = 100; + /* + * .. to find the best source match + */ best = NULL; - for (p = dst; p; p = p->next) { + for (p = src; p; p = p->next) { struct diff_filespec *two = p->filespec; - /* Already picked as a destination? */ - if (!p->src_dst) - continue; /* False hash collission? */ if (hashcmp(one->sha1, two->sha1)) continue; + /* Non-regular files? If so, the modes must match! */ + if (!S_ISREG(one->mode) || !S_ISREG(two->mode)) { + if (one->mode != two->mode) + continue; + } best = p; if (basename_same(one, two)) break; @@ -256,11 +263,10 @@ static int find_identical_files(struct file_similarity *src, break; } if (best) { - best->src_dst = 0; - record_rename_pair(best->index, src->index, MAX_SCORE); + record_rename_pair(dst->index, best->index, MAX_SCORE); renames++; } - } while ((src = src->next) != NULL); + } while ((dst = dst->next) != NULL); return renames; } @@ -569,10 +575,8 @@ void diffcore_rename(struct diff_options *options) *q = outq; diff_debug_queue("done collapsing", q); - for (i = 0; i < rename_dst_nr; i++) { - diff_free_filespec_data(rename_dst[i].two); - free(rename_dst[i].two); - } + for (i = 0; i < rename_dst_nr; i++) + free_filespec(rename_dst[i].two); free(rename_dst); rename_dst = NULL; diff --git a/diffcore.h b/diffcore.h index ceda932..cc96c20 100644 --- a/diffcore.h +++ b/diffcore.h @@ -29,6 +29,7 @@ struct diff_filespec { void *cnt_data; const char *funcname_pattern_ident; unsigned long size; + int count; /* Reference count */ int xfrm_flags; /* for use by the xfrm */ int rename_used; /* Count of rename users */ unsigned short mode; /* file mode */ @@ -44,6 +45,7 @@ struct diff_filespec { }; extern struct diff_filespec *alloc_filespec(const char *); +extern void free_filespec(struct diff_filespec *); extern void fill_filespec(struct diff_filespec *, const unsigned char *, unsigned short); - To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html