On Fri, Sep 20, 2013 at 12:32:26AM +0300, Michael S. Tsirkin wrote: > On Tue, Sep 17, 2013 at 04:56:16PM -0400, Jeff King wrote: > > On Tue, Sep 17, 2013 at 11:38:07PM +0300, Michael S. Tsirkin wrote: > > > > > > A problem with both schemes, though, is that they are not > > > > backwards-compatible with existing git-patch-id implementations. > > > > > > Could you clarify? > > > We never send patch IDs on the wire - how isn't this compatible? > > > > I meant that you might be comparing patch-ids generated by different > > implementations, or across time. There are no dedicated tools to do so, > > but it is very easy to do so with standard tools like "join". > > > > For example, you can do: > > > > patch_ids() { > > git rev-list "$1" | > > git diff-tree --stdin -p | > > git patch-id | > > sort > > } > > > > patch_ids origin..topic1 >us > > patch_ids origin..topic2 >them > > > > join us them | cut -d' ' -f2-3 > > > > to get a list of correlated commits between two branches. If the "them" > > was on another machine with a different implementation (or is saved from > > an earlier time), your patch-ids would not match. > > > > It may be esoteric enough not to worry about, though. By far the most > > common use of patch-ids is going to be in a single "rev-list > > --cherry-pick" situation where you are trying to omit commits during > > a rebase. > > > > I am mostly thinking of the problems we had with the "kup" tool, which > > expected stability across diffs that would be signed by both kernel.org. > > But as far as I know, they do not use patch-id. More details in case you > > are curious (including me arguing that we should not care, and it is > > kup's problem!) are here: > > > > http://thread.gmane.org/gmane.comp.version-control.git/192331/focus=192424 > > > > rerere is mentioned in that thread, but I believe that it does its own > > hash, and does not rely on patch-id. > > > > -Peff > > OK so far I came up with the following. > Needs documentation and tests obviously. > But besides that - > would something like this be enough to address the > issue Junio raised? Ping. Junio could you comment on this please? > ---> > > patch-id: make it more stable > > Add a new patch-id algorithm making it stable against > hunk reodering: > - prepend header to each hunk (if not there) > - calculate SHA1 hash for each hunk separately > - sum all hashes to get patch id > > Add --order-sensitive to get historical unstable behaviour. > > Signed-off-by: Michael S. Tsirkin <mst@xxxxxxxxxx> > --- > builtin/patch-id.c | 65 +++++++++++++++++++++++++++++++++++++++++------------- > 1 file changed, 50 insertions(+), 15 deletions(-) > > diff --git a/builtin/patch-id.c b/builtin/patch-id.c > index 3cfe02d..d1902ff 100644 > --- a/builtin/patch-id.c > +++ b/builtin/patch-id.c > @@ -1,17 +1,14 @@ > #include "builtin.h" > > -static void flush_current_id(int patchlen, unsigned char *id, git_SHA_CTX *c) > +static void flush_current_id(int patchlen, unsigned char *id, unsigned char *result) > { > - unsigned char result[20]; > char name[50]; > > if (!patchlen) > return; > > - git_SHA1_Final(result, c); > memcpy(name, sha1_to_hex(id), 41); > printf("%s %s\n", sha1_to_hex(result), name); > - git_SHA1_Init(c); > } > > static int remove_space(char *line) > @@ -56,10 +53,30 @@ static int scan_hunk_header(const char *p, int *p_before, int *p_after) > return 1; > } > > -static int get_one_patchid(unsigned char *next_sha1, git_SHA_CTX *ctx, struct strbuf *line_buf) > +static void flush_one_hunk(unsigned char *result, git_SHA_CTX *ctx) > { > - int patchlen = 0, found_next = 0; > + unsigned char hash[20]; > + unsigned short carry = 0; > + int i; > + > + git_SHA1_Final(hash, ctx); > + git_SHA1_Init(ctx); > + /* 20-byte sum, with carry */ > + for (i = 0; i < 20; ++i) { > + carry += result[i] + hash[i]; > + result[i] = carry; > + carry >>= 8; > + } > +} > +static int get_one_patchid(unsigned char *next_sha1, unsigned char *result, > + struct strbuf *line_buf, int stable) > +{ > + int patchlen = 0, found_next = 0, hunks = 0; > int before = -1, after = -1; > + git_SHA_CTX ctx, header_ctx; > + > + git_SHA1_Init(&ctx); > + hashclr(result); > > while (strbuf_getwholeline(line_buf, stdin, '\n') != EOF) { > char *line = line_buf->buf; > @@ -99,6 +116,18 @@ static int get_one_patchid(unsigned char *next_sha1, git_SHA_CTX *ctx, struct st > if (!memcmp(line, "@@ -", 4)) { > /* Parse next hunk, but ignore line numbers. */ > scan_hunk_header(line, &before, &after); > + if (stable) { > + if (hunks) { > + flush_one_hunk(result, &ctx); > + memcpy(&ctx, &header_ctx, > + sizeof ctx); > + } else { > + /* Save ctx for next hunk. */ > + memcpy(&header_ctx, &ctx, > + sizeof ctx); > + } > + } > + hunks++; > continue; > } > > @@ -108,6 +137,7 @@ static int get_one_patchid(unsigned char *next_sha1, git_SHA_CTX *ctx, struct st > > /* Else we're parsing another header. */ > before = after = -1; > + hunks = 0; > } > > /* If we get here, we're inside a hunk. */ > @@ -119,27 +149,27 @@ static int get_one_patchid(unsigned char *next_sha1, git_SHA_CTX *ctx, struct st > /* Compute the sha without whitespace */ > len = remove_space(line); > patchlen += len; > - git_SHA1_Update(ctx, line, len); > + git_SHA1_Update(&ctx, line, len); > } > > if (!found_next) > hashclr(next_sha1); > > + flush_one_hunk(result, &ctx); > + > return patchlen; > } > > -static void generate_id_list(void) > +static void generate_id_list(int stable) > { > - unsigned char sha1[20], n[20]; > - git_SHA_CTX ctx; > + unsigned char sha1[20], n[20], result[20]; > int patchlen; > struct strbuf line_buf = STRBUF_INIT; > > - git_SHA1_Init(&ctx); > hashclr(sha1); > while (!feof(stdin)) { > - patchlen = get_one_patchid(n, &ctx, &line_buf); > - flush_current_id(patchlen, sha1, &ctx); > + patchlen = get_one_patchid(n, result, &line_buf, stable); > + flush_current_id(patchlen, sha1, result); > hashcpy(sha1, n); > } > strbuf_release(&line_buf); > @@ -149,9 +179,14 @@ static const char patch_id_usage[] = "git patch-id < patch"; > > int cmd_patch_id(int argc, const char **argv, const char *prefix) > { > - if (argc != 1) > + int stable; > + if (argc == 2 && !strcmp(argv[1], "--order-sensitive")) > + stable = 0; > + else if (argc == 1) > + stable = 1; > + else > usage(patch_id_usage); > > - generate_id_list(); > + generate_id_list(stable); > return 0; > } > -- > MST > -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html