PATCH - Optimization to isomd5sum media check

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hi-

I'm submitting a patch that provides an optimization to the isomd5sum
media check functionality.  Presently, the algorithm calculates a
single md5sum after reading the entire disc, and reports a "PASS" or
"FAIL" at that time.  When there is corrupted data on the disc, a user
will not know until the entire disc has been read--which can take
quite a while.  It would be optimal for the algorithm to exit the
checking and report a failure as soon after reading the corrupted
section as possible.

I've accomplished this by adding two extra fields to the application
data header of the ISO image.  The ISO 9660 spec allows for a
~512-character string, of which were 224 characters are used.  I've
added two fields, "FRAGMENT SUMS" and "FRAGMENT COUNT", after which
326 characters are used.

The "FRAGMENT COUNT" is an integer representing the number of
fragments in which to break the entire ISO.  The "FRAGMENT SUMS" is an
ascii string of concatenated leading characters of md5sums of
subsequent fragments.

I've #define'd the length of FRAGMENT SUMS to 60 characters, as this
number is evenly divisible by (2, 3, 4, 5, 6, 10, 12, 15, 20, 30). 
I've also #define'd FRAGMENT COUNT to be 20, which means that ISO will
be broken into 20 fragments, each fragment represented by a
3-character string (the first 3 characters of the fragment's md5sum)
in FRAGMENT SUMS.

Looking at an example, the application data header used to look like:
ISO MD5SUM = 1fca2222a947399d5b4a4226629be254;SKIPSECTORS =
15;RHLISOSTATUS=1;THIS IS NOT THE SAME AS RUNNING MD5SUM ON THIS ISO!!

It now looks like:
ISO MD5SUM = 1fca2222a947399d5b4a4226629be254;SKIPSECTORS =
15;RHLISOSTATUS=0;FRAGMENT SUMS =
67bd3263cccfda2d851a493b9a238e8877c45bca78e276e94437dd9f26e4;FRAGMENT
COUNT= 20;THIS IS NOT THE SAME AS RUNNING MD5SUM ON THIS ISO!!

The string [67bd3263cccfda2d851a493b9a238e8877c45bca78e276e94437dd9f26e4]
is actually a concatenation of:
md5(frag(0))[0-2] = 67b
md5(frag(1))[0-2] = d32
md5(frag(2))[0-2] = 63c
md5(frag(3))[0-2] = ccf
md5(frag(4))[0-2] = da2
md5(frag(5))[0-2] = d85
md5(frag(6))[0-2] = 1a4
md5(frag(7))[0-2] = 93b
md5(frag(8))[0-2] = 9a2
md5(frag(9))[0-2] = 38e
md5(frag(10))[0-2] = 887
md5(frag(11))[0-2] = 7c4
md5(frag(12))[0-2] = 5bc
md5(frag(13))[0-2] = a78
md5(frag(14))[0-2] = e27
md5(frag(15))[0-2] = 6e9
md5(frag(16))[0-2] = 443
md5(frag(17))[0-2] = 7dd
md5(frag(18))[0-2] = 9f2
md5(frag(19))[0-2] = 6e4

Thus, if any one of these prescribed strings does not match what is
calculated, the checking exits immediately, as a corruption has been
detected.  In the cases where the error is near the beginning of the
disc, the time to identify the problem is greatly reduced.

Note that there is a chance of collision.  When the disc is broken
into 20 fragments, each fragment is represented by a 3-character
string, yielding a collision 1 in 16^3=4096 cases.  Such a collision
only means that the user will not benefit from the optimization, as
the entire disc will be checked by the last operation just like it has
always been done and the error will be detected at that time.  One can
reduce the chances of collision by decreasing the number of fragments,
or expanding the length of the FRAGMENT SUMS string.

This optimization occurs at virtually no performance cost.  Media
checking is overwhelmingly I/O bound, reading from disc.  The
additional calculations of smaller sums adds little  additional
processing time, as most of that time was being spent waiting on new
data from disc.

Please consider for inclusion in Anaconda, and I'll be glad to answer
any questions or address concerns.  I think the patch is pretty
simple, and isolated to two files.

Thanks!
Dustin Kirkland
diff -Naur anaconda-10.1.0.2/isomd5sum/libcheckisomd5.c anaconda-10.1.0.2.dustin/isomd5sum/libcheckisomd5.c
--- anaconda-10.1.0.2/isomd5sum/libcheckisomd5.c	2005-03-25 13:42:57.325502288 -0500
+++ anaconda-10.1.0.2.dustin/isomd5sum/libcheckisomd5.c	2005-03-25 14:16:33.118055240 -0500
@@ -15,16 +15,20 @@
 #define APPDATA_OFFSET 883
 #define SIZE_OFFSET 84
 
+/* Length in characters of string used for fragment md5sum checking  */
+#define FRAGMENT_SUM_LENGTH 60
+
 #define MAX(x, y)  ((x > y) ? x : y)
 #define MIN(x, y)  ((x < y) ? x : y)
 
 /* finds primary volume descriptor and returns info from it */
 /* mediasum must be a preallocated buffer at least 33 bytes long */
-static int parsepvd(int isofd, char *mediasum, int *skipsectors, long long *isosize, int *supported) {
+/* fragmentsums must be a preallocated buffer at least FRAGMENT_SUM_LENGTH+1 bytes long */
+static int parsepvd(int isofd, char *mediasum, int *skipsectors, long long *isosize, int *supported, char *fragmentsums, long long *fragmentcount) {
     unsigned char buf[2048];
     unsigned char buf2[512];
     unsigned char tmpbuf[512];
-    int skipfnd, md5fnd, supportedfnd;
+    int skipfnd, md5fnd, supportedfnd, fragsumfnd, fragcntfnd;
     unsigned int loc;
     long long offset;
     unsigned char *p;
@@ -54,6 +58,8 @@
 
     md5fnd = 0;
     skipfnd = 0;
+    fragsumfnd = 0;
+    fragcntfnd = 0;
     supportedfnd = 0;
     loc = 0;
     while (loc < 512) {
@@ -92,14 +98,46 @@
 	} else if (!strncmp(buf2 + loc, "RHLISOSTATUS=1", 14)) {
 	    *supported = 1;
 	    supportedfnd = 1;
+	    for (p=buf2+loc; *p != ';' && loc < 512; p++, loc++);
 	} else if (!strncmp(buf2 + loc, "RHLISOSTATUS=0", 14)) {
 	    *supported = 0;
 	    supportedfnd = 1;
-	} else {
+	    for (p=buf2+loc; *p != ';' && loc < 512; p++, loc++);
+	} else if (!strncmp(buf2 + loc, "FRAGMENT SUMS = ", 16)) {
+            /* make sure we dont walk off end */
+            if ((loc + FRAGMENT_SUM_LENGTH) > 511)
+                return -1;
+
+            memcpy(fragmentsums, buf2 + loc + 16, FRAGMENT_SUM_LENGTH);
+            fragmentsums[FRAGMENT_SUM_LENGTH] = '\0';
+            fragsumfnd = 1;
+            loc += FRAGMENT_SUM_LENGTH + 16;
+            for (p=buf2+loc; *p != ';' && loc < 512; p++, loc++);
+        } else if (!strncmp(buf2 + loc, "FRAGMENT COUNT = ", 17)) {
+            char *errptr;
+            /* make sure we dont walk off end */
+            if ((loc + 17) > 511)
+                return -1;
+
+            loc = loc + 17;
+            for (p=tmpbuf; buf2[loc] != ';' && loc < 512; p++, loc++)
+                *p = buf2[loc];
+
+            *p = '\0';
+
+            *fragmentcount = strtol(tmpbuf, &errptr, 10);
+            if (errptr && *errptr) {
+                return -1;
+            } else {
+                fragcntfnd = 1;
+            }
+
+            for (p=buf2+loc; *p != ';' && loc < 512; p++, loc++);
+        } else {
 	    loc++;
 	}
 
-	if ((skipfnd & md5fnd) & supportedfnd)
+	if ((skipfnd & md5fnd & fragsumfnd & fragcntfnd) & supportedfnd)
  	    break;
     }
 	    
@@ -119,19 +157,26 @@
 /* both strings must be pre-allocated at least 33 chars in length        */
 static int checkmd5sum(int isofd, char *mediasum, char *computedsum, int quiet) {
     int nread;
-    int i;
+    int i, j;
     int appdata_start_offset, appdata_end_offset;
     int nattempt;
     int skipsectors;
     int supported;
+    int current_fragment = 0;
+    int previous_fragment = 0;
     unsigned int bufsize = 32768;
     unsigned char md5sum[16];
+    unsigned char fragmd5sum[16];
     unsigned int len;
     unsigned char *buf;
     long long isosize, offset, pvd_offset, apoff;
+    char fragmentsums[FRAGMENT_SUM_LENGTH];
+    char thisfragsum[FRAGMENT_SUM_LENGTH];
+    long long fragmentcount = 0;
     MD5_CTX md5ctx;
+    MD5_CTX fragmd5ctx;
 
-    if ((pvd_offset = parsepvd(isofd, mediasum, &skipsectors, &isosize, &supported)) < 0)
+    if ((pvd_offset = parsepvd(isofd, mediasum, &skipsectors, &isosize, &supported, fragmentsums, &fragmentcount)) < 0)
 	return -1;
 
     /*    printf("Mediasum = %s\n",mediasum); */
@@ -140,6 +185,9 @@
     lseek(isofd, 0L, SEEK_SET);
 
     MD5_Init(&md5ctx);
+    if (fragmentcount) 
+        MD5_Init(&fragmd5ctx);
+
 
     offset = 0;
     apoff = pvd_offset + APPDATA_OFFSET;
@@ -178,7 +226,33 @@
 	    memset(buf+appdata_start_offset, ' ', len);
 	}
 
+	/* update overall md5ctx */
 	MD5_Update(&md5ctx, buf, nread);
+        if (fragmentcount) {
+  	    /* update the fragment md5ctx */
+            current_fragment = offset * (fragmentcount+1) / (isosize - skipsectors*2048);
+            MD5_Update(&fragmd5ctx, buf, nread);
+            /* if we're onto the next fragment, calculate the previous sum and check */
+            if ( current_fragment != previous_fragment ) {
+                MD5_Final(fragmd5sum, &fragmd5ctx);
+                *computedsum = '\0';
+                j = (current_fragment-1)*FRAGMENT_SUM_LENGTH/fragmentcount;
+                for (i=0; i<FRAGMENT_SUM_LENGTH/fragmentcount; i++) {
+                    char tmpstr[2];
+                    snprintf(tmpstr, 2, "%01x", fragmd5sum[i]);
+                    strncat(computedsum, tmpstr, 2);
+                    thisfragsum[i] = fragmentsums[j++];
+                }
+                thisfragsum[j] = '\0';
+                /*  printf("\nFragment [%i]: %s ?= %s\n", previous_fragment, computedsum, thisfragsum);   */
+                previous_fragment = current_fragment;
+                /* Exit immediatiately if current fragment sum is incorrect */
+                if (strcmp(thisfragsum, computedsum) != 0) {
+                    free(buf);
+                    return 0;
+                }
+            }
+        }
 	offset = offset + nread;
 	
 	if (!quiet) {
@@ -201,7 +275,7 @@
     for (i=0; i<16; i++) {
 	char tmpstr[4];
 	snprintf (tmpstr, 4, "%02x", md5sum[i]);
-	strcat(computedsum, tmpstr);
+	strncat(computedsum, tmpstr, 2);
     }
 
     /*    printf("mediasum, computedsum = %s %s\n", mediasum, computedsum); */
@@ -210,7 +284,7 @@
 	return 0;
     else
 	return 1;
-    }
+}
 
 
 #if 0
@@ -233,8 +307,10 @@
 static int doMediaCheck(int isofd, char *mediasum, char *computedsum, long long *isosize, int *supported, int quiet) {
     int rc;
     int skipsectors;
+    long long fragmentcount = 0;
+    char fragmentsums[FRAGMENT_SUM_LENGTH+1];
 
-    if (parsepvd(isofd, mediasum, &skipsectors, isosize, supported) < 0) {
+    if (parsepvd(isofd, mediasum, &skipsectors, isosize, supported, fragmentsums, &fragmentcount) < 0) {
 	fprintf(stderr, "Unable to read the disc checksum from the "
 			 "primary volume descriptor.\nThis probably "
 			 "means the disc was created without adding the "
@@ -290,6 +366,8 @@
     int isofd;
     char mediasum[64];
     long long isosize;
+    char fragmentsums[FRAGMENT_SUM_LENGTH+1];
+    long long fragmentcount = 0;
     int supported;
     int skipsectors;
 
@@ -300,7 +378,7 @@
 	exit(1);
     }
 
-    if (parsepvd(isofd, mediasum, &skipsectors, &isosize, &supported) < 0) {
+    if (parsepvd(isofd, mediasum, &skipsectors, &isosize, &supported, fragmentsums, &fragmentcount) < 0) {
 	fprintf(stderr, "%s: Could not get pvd data", file);
 	fprintf(stderr, "\nUnable to read the disc checksum from the "
 			 "primary volume descriptor.\nThis probably "
@@ -312,4 +390,8 @@
     close(isofd);
     
     printf("%s:   %s\n", file, mediasum);
+    if ( (strlen(fragmentsums) > 0) && (fragmentcount > 0) ) {
+        printf("Fragment sums: %s\n", fragmentsums);
+        printf("Fragment count: %lld\n", fragmentcount); 
+    }
 }
diff -Naur anaconda-10.1.0.2/isomd5sum/libimplantisomd5.c anaconda-10.1.0.2.dustin/isomd5sum/libimplantisomd5.c
--- anaconda-10.1.0.2/isomd5sum/libimplantisomd5.c	2005-03-25 09:34:34.000000000 -0500
+++ anaconda-10.1.0.2.dustin/isomd5sum/libimplantisomd5.c	2005-03-25 11:45:43.000000000 -0500
@@ -15,6 +15,12 @@
 #define APPDATA_OFFSET 883
 #define SIZE_OFFSET 84
 
+/* Length in characters of string used for fragment md5sum checking  */
+#define FRAGMENT_SUM_LENGTH 60
+/* FRAGMENT_COUNT must be an integral divisor or FRAGMENT_SUM_LENGTH */
+/* 60 => 2, 3, 4, 5, 6, 10, 12, 15, 20, or 30 */
+#define FRAGMENT_COUNT 20
+
 /* number of sectors to ignore at end of iso when computing sum */
 #define SKIPSECTORS 15
 
@@ -87,15 +93,22 @@
     int nread;
     int dirty;
     int pvd_offset;
+    int current_fragment = 0;
+    int previous_fragment = 0;
+    int nattempt;
     long long isosize, total;
     unsigned char md5sum[16];
+    unsigned char fragmd5sum[16];
     unsigned int loc;
-    unsigned char buf[2048];
+    unsigned int bufsize = 32768;
+    unsigned char *buf;
     unsigned char orig_appdata[512];
     unsigned char new_appdata[512];
     unsigned char mediasum[33];
     char md5str[40];
+    char fragstr[FRAGMENT_SUM_LENGTH+1];
     MD5_CTX md5ctx;
+    MD5_CTX fragmd5ctx;
 
     isofd = open(fname, O_RDWR);
 
@@ -138,18 +151,39 @@
     lseek(isofd, 0L, SEEK_SET);
 
     MD5_Init(&md5ctx);
+    MD5_Init(&fragmd5ctx);
+    *fragstr = '\0';
+    buf = malloc(bufsize * sizeof(unsigned char));
 
     total = 0;
     /* read up to 15 sectors from end, due to problems reading last few */
     /* sectors on burned CDs                                            */
     while (total < isosize - SKIPSECTORS*2048) {
-	nread = read(isofd, buf, 2048);
+        nattempt = MIN(isosize - SKIPSECTORS*2048 - total, bufsize);
+	nread = read(isofd, buf, nattempt);
+
 	if (nread <= 0)
 	    break;
 
 	MD5_Update(&md5ctx, buf, nread);
+        MD5_Update(&fragmd5ctx, buf, nread);
+
+        /* if we're onto the next fragment, calculate the previous sum and write */
+        current_fragment = total * (FRAGMENT_COUNT+1) / (isosize - SKIPSECTORS*2048);
+        if ( current_fragment != previous_fragment ) {
+            MD5_Final(fragmd5sum, &fragmd5ctx);
+            for (i=0; i<FRAGMENT_SUM_LENGTH/FRAGMENT_COUNT; i++) {
+                char tmpstr[2];
+                snprintf(tmpstr, 2, "%01x", fragmd5sum[i]);
+                strncat(fragstr, tmpstr, 2);
+            }
+            /*  printf("\nFragment [%i]: %s\n", previous_fragment, fragstr);  */
+            previous_fragment = current_fragment;
+        }
+
 	total = total + nread;
     }
+    free(buf);
 
     MD5_Final(md5sum, &md5ctx);
 
@@ -157,12 +191,15 @@
     for (i=0; i<16; i++) {
 	char tmpstr[4];
 	snprintf (tmpstr, 4, "%02x", md5sum[i]);
-	strcat(md5str, tmpstr);
+	strncat(md5str, tmpstr, 2);
     }
 
     if (!quiet) {
 	printf("Inserting md5sum into iso image...\n");
 	printf("md5 = %s\n", md5str);
+	printf("Inserting fragment md5sums into iso image...\n");
+	printf("fragmd5 = %s\n", fragstr);
+	printf("frags = %d\n", FRAGMENT_COUNT);
     }
     /*    memcpy(new_appdata, orig_appdata, 512); */
     memset(new_appdata, ' ', 512);
@@ -171,9 +208,12 @@
     loc = writeAppData(new_appdata, "ISO MD5SUM = ", loc);
     loc = writeAppData(new_appdata, md5str, loc);
     loc = writeAppData(new_appdata, ";", loc);
-    snprintf(buf, sizeof(buf), "SKIPSECTORS = %d", SKIPSECTORS);
+
+    buf = malloc(512 * sizeof(unsigned char));
+    snprintf(buf, 512, "SKIPSECTORS = %d", SKIPSECTORS);
     loc = writeAppData(new_appdata, buf, loc);
     loc = writeAppData(new_appdata, ";", loc);
+    free(buf);
 
     if (supported) {
 	if (!quiet)
@@ -187,6 +227,16 @@
 	
     loc = writeAppData(new_appdata, ";", loc);
 
+    loc = writeAppData(new_appdata, "FRAGMENT SUMS = ", loc);
+    loc = writeAppData(new_appdata, fragstr, loc);
+    loc = writeAppData(new_appdata, ";", loc);
+
+    buf = malloc(512 * sizeof(unsigned char));
+    snprintf(buf, 512, "FRAGMENT COUNT = %d", FRAGMENT_COUNT);
+    loc = writeAppData(new_appdata, buf, loc);
+    loc = writeAppData(new_appdata, ";", loc);
+    free(buf);
+
     loc = writeAppData(new_appdata, "THIS IS NOT THE SAME AS RUNNING MD5SUM ON THIS ISO!!", loc);
     
     i = lseek(isofd, pvd_offset + APPDATA_OFFSET, SEEK_SET);

[Index of Archives]     [Kickstart]     [Fedora Users]     [Fedora Legacy List]     [Fedora Maintainers]     [Fedora Desktop]     [Fedora SELinux]     [Big List of Linux Books]     [Yosemite News]     [Yosemite Photos]     [KDE Users]     [Fedora Tools]
  Powered by Linux