This is the sixteenth series of hash function transition patches. Note that the patches khash patches use a different name (kh_oid_map_t) than the SHA-1 type (kh_sha1_t). The _oid names are already used by the oidset code, which uses a set approach, not a map approach. This series has slightly more patches than before, but it also does a little more conversion than before. Changes from v1: * Add support for object_id khash maps. * Use this support in the pack bitmap code. * Improve comments in notes code. * Fix mistranslation of fast-import code. * Provide more descriptive values and comments in fast-import code. * Fix miscapitalization in commit message. * Add code to look up a hash algorithm by length. * Use this code, along with René's patch, to restructure the archive patches. * Fix a preexisting off-by-one issue on error in builtin/difftool. * Adopt Ævar's suggested changes to Gitweb. René Scharfe (1): get-tar-commit-id: parse comment record brian m. carlson (34): t/lib-submodule-update: use appropriate length constant khash: move oid hash table definition pack-bitmap: make bitmap header handling hash agnostic pack-bitmap: convert struct stored_bitmap to object_id pack-bitmap: replace sha1_to_hex pack-bitmap: switch hard-coded constants to the_hash_algo pack-bitmap: switch hash tables to use struct object_id submodule: avoid hard-coded constants notes-merge: switch to use the_hash_algo notes: make hash size independent notes: replace sha1_to_hex object-store: rename and expand packed_git's sha1 member builtin/name-rev: make hash-size independent fast-import: make hash-size independent fast-import: replace sha1_to_hex builtin/am: make hash size independent builtin/pull: make hash-size independent http-push: convert to use the_hash_algo http-backend: allow 64-character hex names http-push: remove remaining uses of sha1_to_hex http-walker: replace sha1_to_hex http: replace hard-coded constant with the_hash_algo http: compute hash of downloaded objects using the_hash_algo http: replace sha1_to_hex remote-curl: make hash size independent hash: add a function to lookup hash algorithm by length builtin/get-tar-commit-id: make hash size independent archive: convert struct archiver_args to object_id refspec: make hash size independent builtin/difftool: use parse_oid_hex dir: make untracked cache extension hash size independent read-cache: read data in a hash-independent way Git.pm: make hash size independent gitweb: make hash size independent archive-tar.c | 7 +-- archive-zip.c | 10 ++-- archive.c | 8 +-- archive.h | 2 +- builtin/am.c | 9 ++-- builtin/difftool.c | 10 ++-- builtin/get-tar-commit-id.c | 14 +++++- builtin/name-rev.c | 14 +++--- builtin/pack-objects.c | 6 +-- builtin/pack-redundant.c | 2 +- builtin/pull.c | 11 +++-- dir.c | 28 +++++------ fast-import.c | 66 +++++++++++++++---------- gitweb/gitweb.perl | 97 +++++++++++++++++++++++++------------ hash.h | 2 + http-backend.c | 3 ++ http-push.c | 29 +++++------ http-walker.c | 18 +++---- http.c | 33 +++++++------ http.h | 2 +- khash.h | 18 +++++++ merge-recursive.c | 2 +- notes-merge.c | 6 +-- notes.c | 44 +++++++++-------- object-store.h | 2 +- oidset.h | 12 ----- pack-bitmap-write.c | 8 +-- pack-bitmap.c | 76 ++++++++++++++--------------- pack-bitmap.h | 4 +- packfile.c | 6 +-- perl/Git.pm | 2 +- read-cache.c | 74 +++++++++++----------------- refspec.c | 2 +- remote-curl.c | 11 +++-- sha1-file.c | 8 +++ submodule.c | 2 +- t/lib-submodule-update.sh | 3 +- 37 files changed, 362 insertions(+), 289 deletions(-) Diff-intervalle contre v1 : -: ---------- > 1: a8262704bf khash: move oid hash table definition 1: 78b7a887d1 = 2: 6ea91e43bb pack-bitmap: make bitmap header handling hash agnostic 2: 307dd4d7f3 = 3: 2c1e7d56b4 pack-bitmap: convert struct stored_bitmap to object_id 3: 7b31ed912b = 4: cc74a66e9c pack-bitmap: replace sha1_to_hex 4: 90a2cbba90 = 5: a6d0161ade pack-bitmap: switch hard-coded constants to the_hash_algo -: ---------- > 6: 55db506411 pack-bitmap: switch hash tables to use struct object_id 5: 869587b01d = 7: e8ed86d773 submodule: avoid hard-coded constants 6: ce253521c0 = 8: 3f7e1da6d2 notes-merge: switch to use the_hash_algo 7: 36da40abe0 ! 9: f367ddef94 notes: make hash size independent @@ -82,7 +82,7 @@ -/* hex SHA1 + 19 * '/' + NUL */ -#define FANOUT_PATH_MAX GIT_SHA1_HEXSZ + FANOUT_PATH_SEPARATORS + 1 -+/* hex oid + one slash between each pair + NUL */ ++/* hex oid + '/' between each pair of hex digits + NUL */ +#define FANOUT_PATH_MAX GIT_MAX_HEXSZ + FANOUT_PATH_SEPARATORS_MAX + 1 static void construct_path_with_fanout(const unsigned char *sha1, 8: 91829a63e3 ! 10: 8e3508e891 notes: replace sha1_to_hex @@ -12,7 +12,7 @@ --- a/notes.c +++ b/notes.c @@ - /* hex oid + one slash between each pair + NUL */ + /* hex oid + '/' between each pair of hex digits + NUL */ #define FANOUT_PATH_MAX GIT_MAX_HEXSZ + FANOUT_PATH_SEPARATORS_MAX + 1 -static void construct_path_with_fanout(const unsigned char *sha1, 9: 0b049ec2b0 = 11: a3d1f218dd object-store: rename and expand packed_git's sha1 member 10: 3b3a389040 = 12: 7d9a9a5c12 builtin/name-rev: make hash-size independent 11: f6cf848d3e ! 13: fe457d42f0 fast-import: make hash-size independent @@ -11,6 +11,44 @@ diff --git a/fast-import.c b/fast-import.c --- a/fast-import.c +++ b/fast-import.c +@@ + */ + #define NO_DELTA S_ISUID + ++/* ++ * The amount of additional space required in order to write an object into the ++ * current pack. This is the hash lengths at the end of the pack, plus the ++ * length of one object ID. ++ */ ++#define PACK_SIZE_THRESHOLD (the_hash_algo->rawsz * 3) ++ + struct object_entry { + struct pack_idx_entry idx; + struct object_entry *next; +@@ + git_deflate_end(&s); + + /* Determine if we should auto-checkpoint. */ +- if ((max_packsize && (pack_size + 60 + s.total_out) > max_packsize) +- || (pack_size + 60 + s.total_out) < pack_size) { ++ if ((max_packsize ++ && (pack_size + PACK_SIZE_THRESHOLD + s.total_out) > max_packsize) ++ || (pack_size + PACK_SIZE_THRESHOLD + s.total_out) < pack_size) { + + /* This new object needs to *not* have the current pack_id. */ + e->pack_id = pack_id + 1; +@@ + int status = Z_OK; + + /* Determine if we should auto-checkpoint. */ +- if ((max_packsize && (pack_size + 60 + len) > max_packsize) +- || (pack_size + 60 + len) < pack_size) ++ if ((max_packsize ++ && (pack_size + PACK_SIZE_THRESHOLD + len) > max_packsize) ++ || (pack_size + PACK_SIZE_THRESHOLD + len) < pack_size) + cycle_packfile(); + + hashfile_checkpoint(pack_file, &checkpoint); @@ c += e->name->str_len + 1; hashcpy(e->versions[0].oid.hash, (unsigned char *)c); @@ -34,7 +72,8 @@ uintmax_t num_notes = 0; struct object_id oid; - char realpath[60]; -+ char realpath[GIT_MAX_RAWSZ * 3]; ++ /* hex oid + '/' between each pair of hex digits + NUL */ ++ char realpath[GIT_MAX_HEXSZ + ((GIT_MAX_HEXSZ / 2) - 1) + 1]; + const unsigned hexsz = the_hash_algo->hexsz; if (!root->tree) @@ -71,7 +110,7 @@ commit_type, &size, &commit_oid); - if (!buf || size < 46) -+ if (!buf || size < the_hash_algo->hexsz) ++ if (!buf || size < the_hash_algo->hexsz + 6) die("Not a valid commit: %s", p); free(buf); } else @@ -89,7 +128,7 @@ commit_type, &size, &n->oid); - if (!buf || size < 46) -+ if (!buf || size < the_hash_algo->hexsz) ++ if (!buf || size < the_hash_algo->hexsz + 6) die("Not a valid commit: %s", from); free(buf); } else 12: 366df3eeb4 = 14: 66999cae86 fast-import: replace sha1_to_hex 13: 3d3b79cbed = 15: 8dd1749b3d builtin/am: make hash size independent 14: 3a3f8ddd55 < -: ---------- builtin/pull: make hash-size independent -: ---------- > 16: 253a42571e builtin/pull: make hash-size independent 15: fc22aed0ad = 17: 16c417edda http-push: convert to use the_hash_algo 16: a2da549b64 = 18: 6d867f375d http-backend: allow 64-character hex names 17: c7481c69d8 = 19: 9e53e3be47 http-push: remove remaining uses of sha1_to_hex 18: dbb3840e5c ! 20: 0124870940 http-walker: replace sha1_to_hex @@ -3,7 +3,7 @@ http-walker: replace sha1_to_hex Since sha1_to_hex is limited to SHA-1, replace the uses of it in this - file with hasH_to_hex. Rename several variables accordingly to reflect + file with hash_to_hex. Rename several variables accordingly to reflect that they are no longer limited to SHA-1. Signed-off-by: brian m. carlson <sandals@xxxxxxxxxxxxxxxxxxxx> 19: 9b0a16a9d9 = 21: d9107144c0 http: replace hard-coded constant with the_hash_algo 20: bf433661da = 22: f8d7da7253 http: compute hash of downloaded objects using the_hash_algo 21: a66eb80a2f = 23: f0e47aa063 http: replace sha1_to_hex 22: 423b42feca = 24: 26f115ab3f remote-curl: make hash size independent 23: 4b15d67a24 < -: ---------- archive-tar: make hash size independent -: ---------- > 25: b22c25095c hash: add a function to lookup hash algorithm by length -: ---------- > 26: 89d15c7609 get-tar-commit-id: parse comment record -: ---------- > 27: 5ccbfd2ff1 builtin/get-tar-commit-id: make hash size independent 24: ee52d16b11 ! 28: ede9b2c9b9 archive: convert struct archiver_args to object_id @@ -3,8 +3,8 @@ archive: convert struct archiver_args to object_id Change the commit_sha1 member to be called "commit_oid" and change it to - be a pointer to struct object_id. Additionally, update two uses of - GIT_SHA1_HEXSZ to use the_hash_algo instead. + be a pointer to struct object_id. Additionally, update some uses of + GIT_SHA1_HEXSZ and hard-coded values to use the_hash_algo instead. Signed-off-by: brian m. carlson <sandals@xxxxxxxxxxxxxxxxxxxx> @@ -15,20 +15,21 @@ static void write_global_extended_header(struct archiver_args *args) { -- const unsigned char *hash = args->commit_sha1; +- const unsigned char *sha1 = args->commit_sha1; + const struct object_id *oid = args->commit_oid; struct strbuf ext_header = STRBUF_INIT; struct ustar_header header; unsigned int mode; -- if (hash) +- if (sha1) + if (oid) strbuf_append_ext_header(&ext_header, "comment", -- hash_to_hex(hash), +- sha1_to_hex(sha1), 40); + oid_to_hex(oid), - the_hash_algo->hexsz); ++ the_hash_algo->hexsz); if (args->time > USTAR_MAX_MTIME) { strbuf_append_ext_header_uint(&ext_header, "mtime", + args->time); diff --git a/archive-zip.c b/archive-zip.c --- a/archive-zip.c 25: 47ddaca720 = 29: 4334a5d833 refspec: make hash size independent 26: 8f2437f0ef ! 30: c74bb05533 builtin/difftool: use parse_oid_hex @@ -5,6 +5,11 @@ Instead of using get_oid_hex and adding constants to the result, use parse_oid_hex to make this code independent of the hash size. + Additionally, correct a typo that would cause us to print one too few + characters on error, since we will already have incremented the pointer + to point to the beginning of the object ID before we get to printing the + error message. + Signed-off-by: brian m. carlson <sandals@xxxxxxxxxxxxxxxxxxxx> diff --git a/builtin/difftool.c b/builtin/difftool.c @@ -15,15 +20,17 @@ if (*p != ' ') return error("expected ' ', got '%c'", *p); - if (get_oid_hex(++p, oid1)) -+ if (parse_oid_hex(++p, oid1, (const char **)&p)) - return error("expected object ID, got '%s'", p + 1); +- return error("expected object ID, got '%s'", p + 1); - p += GIT_SHA1_HEXSZ; ++ if (parse_oid_hex(++p, oid1, (const char **)&p)) ++ return error("expected object ID, got '%s'", p); if (*p != ' ') return error("expected ' ', got '%c'", *p); - if (get_oid_hex(++p, oid2)) -+ if (parse_oid_hex(++p, oid2, (const char **)&p)) - return error("expected object ID, got '%s'", p + 1); +- return error("expected object ID, got '%s'", p + 1); - p += GIT_SHA1_HEXSZ; ++ if (parse_oid_hex(++p, oid2, (const char **)&p)) ++ return error("expected object ID, got '%s'", p); if (*p != ' ') return error("expected ' ', got '%c'", *p); *status = *++p; 27: bf2f8ae68b = 31: d7618969e2 dir: make untracked cache extension hash size independent 28: 0465f487fd = 32: faa9e37821 read-cache: read data in a hash-independent way 29: 7396961044 = 33: 0086840da3 Git.pm: make hash size independent 30: 8777c5e1f3 ! 34: c91abe5eb7 gitweb: make hash size independent @@ -8,11 +8,15 @@ hex characters, and use this variable anywhere we would have previously hard-coded a 40 in a regex. + Add some helper functions which allow us to write tighter regexes that + match exactly the number of hex characters we're expecting. + Similarly, switch the code that looks for deleted diffinfo information to look for either 40 or 64 zeros, and update one piece of code to use this function. Finally, when formatting a log line, allow an abbreviated describe output to contain up to 64 characters. + Helped-by: Ævar Arnfjörð Bjarmason <avarab@xxxxxxxxx> Signed-off-by: brian m. carlson <sandals@xxxxxxxxxxxxxxxxxxxx> diff --git a/gitweb/gitweb.perl b/gitweb/gitweb.perl @@ -22,8 +26,37 @@ # ====================================================================== # input validation and dispatch ++# Various hash size-related values. ++my $sha1_len = 40; ++my $sha256_extra_len = 24; ++my $sha256_len = $sha1_len + $sha256_extra_len; ++ ++# A regex matching $len hex characters. $len may be a range (e.g. 7,64). ++sub oid_nlen_regex { ++ my $len = shift; ++ my $hchr = qr/[0-9a-fA-F]/; ++ return qr/(?:(?:$hchr){$len})/; ++} ++ ++# A regex matching two sets of $nlen hex characters, prefixed by the literal ++# string $prefix and with the literal string $infix between them. ++sub oid_nlen_prefix_infix_regex { ++ my $nlen = shift; ++ my $prefix = shift; ++ my $infix = shift; ++ ++ my $rx = oid_nlen_regex($nlen); ++ ++ return qr/^\Q$prefix\E$rx\Q$infix\E$rx$/; ++} ++ +# A regex matching a valid object ID. -+our $oid_regex = qr/(?:[0-9a-fA-F]{40}(?:[0-9a-fA-F]{24})?)/; ++our $oid_regex; ++{ ++ my $x = oid_nlen_regex($sha1_len); ++ my $y = oid_nlen_regex($sha256_extra_len); ++ $oid_regex = qr/(?:$x(?:$y)?)/; ++} + # input parameters can be collected from a variety of sources (presently, CGI # and PATH_INFO), so we define an %input_params hash that collects them all @@ -37,16 +70,26 @@ return 1; } # it must be correct pathname +@@ + sub format_log_line_html { + my $line = shift; + ++ # Potentially abbreviated OID. ++ my $regex = oid_nlen_regex("7,64"); ++ + $line = esc_html($line, -nbsp=>1); + $line =~ s{ + \b @@ (?<!-) # see strbuf_check_tag_ref(). Tags can't start with - [A-Za-z0-9.-]+ (?!\.) # refs can't end with ".", see check_refname_format() - -g[0-9a-fA-F]{7,40} -+ -g[0-9a-fA-F]{7,64} ++ -g$regex | # Just a normal looking Git SHA1 - [0-9a-fA-F]{7,40} -+ [0-9a-fA-F]{7,64} ++ $regex ) \b }{ @@ -55,7 +98,8 @@ } # match <hash> - if ($line =~ m/^index [0-9a-fA-F]{40},[0-9a-fA-F]{40}/) { -+ if ($line =~ m/^index $oid_regex,$oid_regex/) { ++ if ($line =~ oid_nlen_prefix_infix_regex($sha1_len, "index ", ",") | ++ $line =~ oid_nlen_prefix_infix_regex($sha256_len, "index ", ",")) { # can match only for combined diff $line = 'index '; for (my $i = 0; $i < $diffinfo->{'nparents'}; $i++) { @@ -64,7 +108,8 @@ } - } elsif ($line =~ m/^index [0-9a-fA-F]{40}..[0-9a-fA-F]{40}/) { -+ } elsif ($line =~ m/^index $oid_regex..$oid_regex/) { ++ } elsif ($line =~ oid_nlen_prefix_infix_regex($sha1_len, "index ", "..") | ++ $line =~ oid_nlen_prefix_infix_regex($sha256_len, "index ", "..")) { # can match only for ordinary diff my ($from_link, $to_link); if ($from->{'href'}) {