I just stumbled upon this while checking a few mailing lists.
I haven't found any mails about in in the archives yet, so I assume, that
no mail have been written yet.
The problem is described here:
http://sourceforge.net/projects/sevenzip/forums/forum/45798/topic/5322604
Basically, while this is not a problem for GNU tar, the correct checksum
should be computed using unsigned values.
Attached trivial testcase shows the difference.
Patch making the change shown in the testcase also attached.
#include <stdio.h>
#include <string.h>
static unsigned int ustar_header_chksum(const void *buffer, int sign)
{
const char *p = (const char *)buffer;
unsigned int chksum = 0;
while (p < (const char *)buffer + strlen(buffer))
{
if (sign) chksum += *p++; else chksum += (unsigned char)*p++;
}
return chksum;
}
int main(int argc, char** argv)
{
const char* teststring = "żółte źrebię";
printf("%u\n", ustar_header_chksum(teststring, 0));
printf("%u\n", ustar_header_chksum(teststring, 1));
return 0;
}
--- archive-tar.c 2012-04-26 21:25:49.000000000 +0200
+++ archive-tar.c 2012-06-13 16:43:59.220945967 +0200
@@ -104,11 +104,11 @@ static unsigned int ustar_header_chksum(
char *p = (char *)header;
unsigned int chksum = 0;
while (p < header->chksum)
- chksum += *p++;
+ chksum += (unsigned char)*p++;
chksum += sizeof(header->chksum) * ' ';
p += sizeof(header->chksum);
while (p < (char *)header + sizeof(struct ustar_header))
- chksum += *p++;
+ chksum += (unsigned char)*p++;
return chksum;
}