On Tue, Sep 17, 2013 at 04:56:16PM -0400, Jeff King wrote: > On Tue, Sep 17, 2013 at 11:38:07PM +0300, Michael S. Tsirkin wrote: > > > > A problem with both schemes, though, is that they are not > > > backwards-compatible with existing git-patch-id implementations. > > > > Could you clarify? > > We never send patch IDs on the wire - how isn't this compatible? > > I meant that you might be comparing patch-ids generated by different > implementations, or across time. There are no dedicated tools to do so, > but it is very easy to do so with standard tools like "join". > > For example, you can do: > > patch_ids() { > git rev-list "$1" | > git diff-tree --stdin -p | > git patch-id | > sort > } > > patch_ids origin..topic1 >us > patch_ids origin..topic2 >them > > join us them | cut -d' ' -f2-3 > > to get a list of correlated commits between two branches. If the "them" > was on another machine with a different implementation (or is saved from > an earlier time), your patch-ids would not match. > > It may be esoteric enough not to worry about, though. By far the most > common use of patch-ids is going to be in a single "rev-list > --cherry-pick" situation where you are trying to omit commits during > a rebase. > > I am mostly thinking of the problems we had with the "kup" tool, which > expected stability across diffs that would be signed by both kernel.org. > But as far as I know, they do not use patch-id. More details in case you > are curious (including me arguing that we should not care, and it is > kup's problem!) are here: > > http://thread.gmane.org/gmane.comp.version-control.git/192331/focus=192424 > > rerere is mentioned in that thread, but I believe that it does its own > hash, and does not rely on patch-id. > > -Peff OK so far I came up with the following. Needs documentation and tests obviously. But besides that - would something like this be enough to address the issue Junio raised? ---> patch-id: make it more stable Add a new patch-id algorithm making it stable against hunk reodering: - prepend header to each hunk (if not there) - calculate SHA1 hash for each hunk separately - sum all hashes to get patch id Add --order-sensitive to get historical unstable behaviour. Signed-off-by: Michael S. Tsirkin <mst@xxxxxxxxxx> --- builtin/patch-id.c | 65 +++++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 50 insertions(+), 15 deletions(-) diff --git a/builtin/patch-id.c b/builtin/patch-id.c index 3cfe02d..d1902ff 100644 --- a/builtin/patch-id.c +++ b/builtin/patch-id.c @@ -1,17 +1,14 @@ #include "builtin.h" -static void flush_current_id(int patchlen, unsigned char *id, git_SHA_CTX *c) +static void flush_current_id(int patchlen, unsigned char *id, unsigned char *result) { - unsigned char result[20]; char name[50]; if (!patchlen) return; - git_SHA1_Final(result, c); memcpy(name, sha1_to_hex(id), 41); printf("%s %s\n", sha1_to_hex(result), name); - git_SHA1_Init(c); } static int remove_space(char *line) @@ -56,10 +53,30 @@ static int scan_hunk_header(const char *p, int *p_before, int *p_after) return 1; } -static int get_one_patchid(unsigned char *next_sha1, git_SHA_CTX *ctx, struct strbuf *line_buf) +static void flush_one_hunk(unsigned char *result, git_SHA_CTX *ctx) { - int patchlen = 0, found_next = 0; + unsigned char hash[20]; + unsigned short carry = 0; + int i; + + git_SHA1_Final(hash, ctx); + git_SHA1_Init(ctx); + /* 20-byte sum, with carry */ + for (i = 0; i < 20; ++i) { + carry += result[i] + hash[i]; + result[i] = carry; + carry >>= 8; + } +} +static int get_one_patchid(unsigned char *next_sha1, unsigned char *result, + struct strbuf *line_buf, int stable) +{ + int patchlen = 0, found_next = 0, hunks = 0; int before = -1, after = -1; + git_SHA_CTX ctx, header_ctx; + + git_SHA1_Init(&ctx); + hashclr(result); while (strbuf_getwholeline(line_buf, stdin, '\n') != EOF) { char *line = line_buf->buf; @@ -99,6 +116,18 @@ static int get_one_patchid(unsigned char *next_sha1, git_SHA_CTX *ctx, struct st if (!memcmp(line, "@@ -", 4)) { /* Parse next hunk, but ignore line numbers. */ scan_hunk_header(line, &before, &after); + if (stable) { + if (hunks) { + flush_one_hunk(result, &ctx); + memcpy(&ctx, &header_ctx, + sizeof ctx); + } else { + /* Save ctx for next hunk. */ + memcpy(&header_ctx, &ctx, + sizeof ctx); + } + } + hunks++; continue; } @@ -108,6 +137,7 @@ static int get_one_patchid(unsigned char *next_sha1, git_SHA_CTX *ctx, struct st /* Else we're parsing another header. */ before = after = -1; + hunks = 0; } /* If we get here, we're inside a hunk. */ @@ -119,27 +149,27 @@ static int get_one_patchid(unsigned char *next_sha1, git_SHA_CTX *ctx, struct st /* Compute the sha without whitespace */ len = remove_space(line); patchlen += len; - git_SHA1_Update(ctx, line, len); + git_SHA1_Update(&ctx, line, len); } if (!found_next) hashclr(next_sha1); + flush_one_hunk(result, &ctx); + return patchlen; } -static void generate_id_list(void) +static void generate_id_list(int stable) { - unsigned char sha1[20], n[20]; - git_SHA_CTX ctx; + unsigned char sha1[20], n[20], result[20]; int patchlen; struct strbuf line_buf = STRBUF_INIT; - git_SHA1_Init(&ctx); hashclr(sha1); while (!feof(stdin)) { - patchlen = get_one_patchid(n, &ctx, &line_buf); - flush_current_id(patchlen, sha1, &ctx); + patchlen = get_one_patchid(n, result, &line_buf, stable); + flush_current_id(patchlen, sha1, result); hashcpy(sha1, n); } strbuf_release(&line_buf); @@ -149,9 +179,14 @@ static const char patch_id_usage[] = "git patch-id < patch"; int cmd_patch_id(int argc, const char **argv, const char *prefix) { - if (argc != 1) + int stable; + if (argc == 2 && !strcmp(argv[1], "--order-sensitive")) + stable = 0; + else if (argc == 1) + stable = 1; + else usage(patch_id_usage); - generate_id_list(); + generate_id_list(stable); return 0; } -- MST -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html