Re: [PATCH] diff: add a config option to control orderfile

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Tue, Sep 17, 2013 at 04:56:16PM -0400, Jeff King wrote:
> On Tue, Sep 17, 2013 at 11:38:07PM +0300, Michael S. Tsirkin wrote:
> 
> > > A problem with both schemes, though, is that they are not
> > > backwards-compatible with existing git-patch-id implementations.
> > 
> > Could you clarify?
> > We never send patch IDs on the wire - how isn't this compatible?
> 
> I meant that you might be comparing patch-ids generated by different
> implementations, or across time. There are no dedicated tools to do so,
> but it is very easy to do so with standard tools like "join".
> 
> For example, you can do:
> 
>   patch_ids() {
>     git rev-list "$1" |
>     git diff-tree --stdin -p |
>     git patch-id |
>     sort
>   }
> 
>   patch_ids origin..topic1 >us
>   patch_ids origin..topic2 >them
> 
>   join us them | cut -d' ' -f2-3
> 
> to get a list of correlated commits between two branches. If the "them"
> was on another machine with a different implementation (or is saved from
> an earlier time), your patch-ids would not match.
> 
> It may be esoteric enough not to worry about, though. By far the most
> common use of patch-ids is going to be in a single "rev-list
> --cherry-pick" situation where you are trying to omit commits during
> a rebase.
> 
> I am mostly thinking of the problems we had with the "kup" tool, which
> expected stability across diffs that would be signed by both kernel.org.
> But as far as I know, they do not use patch-id. More details in case you
> are curious (including me arguing that we should not care, and it is
> kup's problem!) are here:
> 
>   http://thread.gmane.org/gmane.comp.version-control.git/192331/focus=192424
> 
> rerere is mentioned in that thread, but I believe that it does its own
> hash, and does not rely on patch-id.
> 
> -Peff

OK so far I came up with the following.
Needs documentation and tests obviously.
But besides that -
would something like this be enough to address the
issue Junio raised?

--->

patch-id: make it more stable

Add a new patch-id algorithm making it stable against
hunk reodering:
	- prepend header to each hunk (if not there)
	- calculate SHA1 hash for each hunk separately
	- sum all hashes to get patch id

Add --order-sensitive to get historical unstable behaviour.

Signed-off-by: Michael S. Tsirkin <mst@xxxxxxxxxx>
---
 builtin/patch-id.c | 65 +++++++++++++++++++++++++++++++++++++++++-------------
 1 file changed, 50 insertions(+), 15 deletions(-)

diff --git a/builtin/patch-id.c b/builtin/patch-id.c
index 3cfe02d..d1902ff 100644
--- a/builtin/patch-id.c
+++ b/builtin/patch-id.c
@@ -1,17 +1,14 @@
 #include "builtin.h"
 
-static void flush_current_id(int patchlen, unsigned char *id, git_SHA_CTX *c)
+static void flush_current_id(int patchlen, unsigned char *id, unsigned char *result)
 {
-	unsigned char result[20];
 	char name[50];
 
 	if (!patchlen)
 		return;
 
-	git_SHA1_Final(result, c);
 	memcpy(name, sha1_to_hex(id), 41);
 	printf("%s %s\n", sha1_to_hex(result), name);
-	git_SHA1_Init(c);
 }
 
 static int remove_space(char *line)
@@ -56,10 +53,30 @@ static int scan_hunk_header(const char *p, int *p_before, int *p_after)
 	return 1;
 }
 
-static int get_one_patchid(unsigned char *next_sha1, git_SHA_CTX *ctx, struct strbuf *line_buf)
+static void flush_one_hunk(unsigned char *result, git_SHA_CTX *ctx)
 {
-	int patchlen = 0, found_next = 0;
+	unsigned char hash[20];
+	unsigned short carry = 0;
+	int i;
+
+	git_SHA1_Final(hash, ctx);
+	git_SHA1_Init(ctx);
+	/* 20-byte sum, with carry */
+	for (i = 0; i < 20; ++i) {
+		carry += result[i] + hash[i];
+		result[i] = carry;
+		carry >>= 8;
+	}
+}
+static int get_one_patchid(unsigned char *next_sha1, unsigned char *result,
+			   struct strbuf *line_buf, int stable)
+{
+	int patchlen = 0, found_next = 0, hunks = 0;
 	int before = -1, after = -1;
+	git_SHA_CTX ctx, header_ctx;
+
+	git_SHA1_Init(&ctx);
+	hashclr(result);
 
 	while (strbuf_getwholeline(line_buf, stdin, '\n') != EOF) {
 		char *line = line_buf->buf;
@@ -99,6 +116,18 @@ static int get_one_patchid(unsigned char *next_sha1, git_SHA_CTX *ctx, struct st
 			if (!memcmp(line, "@@ -", 4)) {
 				/* Parse next hunk, but ignore line numbers.  */
 				scan_hunk_header(line, &before, &after);
+				if (stable) {
+					if (hunks) {
+						flush_one_hunk(result, &ctx);
+						memcpy(&ctx, &header_ctx,
+						       sizeof ctx);
+					} else {
+						/* Save ctx for next hunk.  */
+						memcpy(&header_ctx, &ctx,
+						       sizeof ctx);
+					}
+				}
+				hunks++;
 				continue;
 			}
 
@@ -108,6 +137,7 @@ static int get_one_patchid(unsigned char *next_sha1, git_SHA_CTX *ctx, struct st
 
 			/* Else we're parsing another header.  */
 			before = after = -1;
+			hunks = 0;
 		}
 
 		/* If we get here, we're inside a hunk.  */
@@ -119,27 +149,27 @@ static int get_one_patchid(unsigned char *next_sha1, git_SHA_CTX *ctx, struct st
 		/* Compute the sha without whitespace */
 		len = remove_space(line);
 		patchlen += len;
-		git_SHA1_Update(ctx, line, len);
+		git_SHA1_Update(&ctx, line, len);
 	}
 
 	if (!found_next)
 		hashclr(next_sha1);
 
+	flush_one_hunk(result, &ctx);
+
 	return patchlen;
 }
 
-static void generate_id_list(void)
+static void generate_id_list(int stable)
 {
-	unsigned char sha1[20], n[20];
-	git_SHA_CTX ctx;
+	unsigned char sha1[20], n[20], result[20];
 	int patchlen;
 	struct strbuf line_buf = STRBUF_INIT;
 
-	git_SHA1_Init(&ctx);
 	hashclr(sha1);
 	while (!feof(stdin)) {
-		patchlen = get_one_patchid(n, &ctx, &line_buf);
-		flush_current_id(patchlen, sha1, &ctx);
+		patchlen = get_one_patchid(n, result, &line_buf, stable);
+		flush_current_id(patchlen, sha1, result);
 		hashcpy(sha1, n);
 	}
 	strbuf_release(&line_buf);
@@ -149,9 +179,14 @@ static const char patch_id_usage[] = "git patch-id < patch";
 
 int cmd_patch_id(int argc, const char **argv, const char *prefix)
 {
-	if (argc != 1)
+	int stable;
+	if (argc == 2 && !strcmp(argv[1], "--order-sensitive"))
+		stable = 0;
+	else if (argc == 1)
+		stable = 1;
+	else
 		usage(patch_id_usage);
 
-	generate_id_list();
+	generate_id_list(stable);
 	return 0;
 }
-- 
MST

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html




[Index of Archives]     [Linux Kernel Development]     [Gcc Help]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [V4L]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]     [Fedora Users]