On Fri, Jan 17, 2025 at 03:55:53PM +0000, Koakuma wrote: > This diff does fix the issue in `cmd_unpack_objects`, however... > > > I'm curious if it's enough. After we write to this unaligned buffer, > > naturally the next thing we'll do is read from it, and the reading > > routines will do the same cast (see unpack_all() in unpack-objects). > > It crashes in `unpack_all`, just as you guessed: > > #0 unpack_all () at builtin/unpack-objects.c:583 > 583 nr_objects = ntohl(hdr->hdr_entries); > > So I suppose the reading part needs to be adjusted as well? I guess that's not too surprising. Probably an application of get_be32() would solve it. But I do wonder if it would be simpler just to make sure the buffer is aligned. You mentioned that you tried that before and it worked. How did you do it? With a pragma/attribute, or with a union (as below)? The union thing should be portable, I'd think, but unfortunately has a lot of fallout through the code because the name of "buffer" changes (we could also declare the storage separately and make "buffer" a pointer to it, but we'd have to be careful about calls to sizeof()). diff --git a/builtin/unpack-objects.c b/builtin/unpack-objects.c index 2197d6d933..65db435b46 100644 --- a/builtin/unpack-objects.c +++ b/builtin/unpack-objects.c @@ -23,7 +23,10 @@ static int dry_run, quiet, recover, has_errors, strict; static const char unpack_usage[] = "git unpack-objects [-n] [-q] [-r] [--strict]"; /* We always read in 4kB chunks. */ -static unsigned char buffer[4096]; +static union { + struct pack_header hdr; + unsigned char bytes[4096]; +} buffer; static unsigned int offset, len; static off_t consumed_bytes; static off_t max_input_size; @@ -65,24 +68,24 @@ static void add_object_buffer(struct object *object, char *buffer, unsigned long static void *fill(int min) { if (min <= len) - return buffer + offset; - if (min > sizeof(buffer)) + return buffer.bytes + offset; + if (min > sizeof(buffer.bytes)) die("cannot fill %d bytes", min); if (offset) { - the_hash_algo->update_fn(&ctx, buffer, offset); - memmove(buffer, buffer + offset, len); + the_hash_algo->update_fn(&ctx, buffer.bytes, offset); + memmove(buffer.bytes, buffer.bytes + offset, len); offset = 0; } do { - ssize_t ret = xread(0, buffer + len, sizeof(buffer) - len); + ssize_t ret = xread(0, buffer.bytes + len, sizeof(buffer.bytes) - len); if (ret <= 0) { if (!ret) die("early EOF"); die_errno("read error on input"); } len += ret; } while (len < min); - return buffer; + return buffer.bytes; } static void use(int bytes) @@ -645,18 +648,16 @@ int cmd_unpack_objects(int argc, continue; } if (starts_with(arg, "--pack_header=")) { - struct pack_header *hdr; char *c; - hdr = (struct pack_header *)buffer; - hdr->hdr_signature = htonl(PACK_SIGNATURE); - hdr->hdr_version = htonl(strtoul(arg + 14, &c, 10)); + buffer.hdr.hdr_signature = htonl(PACK_SIGNATURE); + buffer.hdr.hdr_version = htonl(strtoul(arg + 14, &c, 10)); if (*c != ',') die("bad %s", arg); - hdr->hdr_entries = htonl(strtoul(c + 1, &c, 10)); + buffer.hdr.hdr_entries = htonl(strtoul(c + 1, &c, 10)); if (*c) die("bad %s", arg); - len = sizeof(*hdr); + len = sizeof(buffer.hdr); continue; } if (skip_prefix(arg, "--max-input-size=", &arg)) { @@ -671,7 +672,7 @@ int cmd_unpack_objects(int argc, } the_hash_algo->init_fn(&ctx); unpack_all(); - the_hash_algo->update_fn(&ctx, buffer, offset); + the_hash_algo->update_fn(&ctx, buffer.bytes, offset); the_hash_algo->init_fn(&tmp_ctx); the_hash_algo->clone_fn(&tmp_ctx, &ctx); the_hash_algo->final_oid_fn(&oid, &tmp_ctx); @@ -686,7 +687,7 @@ int cmd_unpack_objects(int argc, use(the_hash_algo->rawsz); /* Write the last part of the buffer to stdout */ - write_in_full(1, buffer + offset, len); + write_in_full(1, buffer.bytes + offset, len); /* All done */ return has_errors;