Some distributors ship CD and DVD files with SHA1 hash sums instead of MD5 hash sums, so let's extend the kvm_utils functions to evaluate and compare SHA1 hashes: * hash_file(): Calculate a hash sum for a file, supports SHA1 check as well, replaces md5sum_file() * unmap_url_cache(): Reimplementation of a function present on autotest utils that downloads a file and caches it. The reason I'm keeping it is that I want more testing before I move all needed function definitions to the autotest API Also, extend VM.create() method to support SHA1 check. 2nd try, addressing Uri's comments. Signed-off-by: Lucas Meneghel Rodrigues <lmr@xxxxxxxxxx> --- client/tests/kvm/calc_md5sum_1m.py | 2 +- client/tests/kvm/kvm_utils.py | 105 ++++++++++++++++++++++++++++++++---- client/tests/kvm/kvm_vm.py | 20 +++++--- 3 files changed, 108 insertions(+), 19 deletions(-) diff --git a/client/tests/kvm/calc_md5sum_1m.py b/client/tests/kvm/calc_md5sum_1m.py index 2325673..153a1e0 100755 --- a/client/tests/kvm/calc_md5sum_1m.py +++ b/client/tests/kvm/calc_md5sum_1m.py @@ -18,4 +18,4 @@ else: if not os.access(fname, os.F_OK) or not os.access(fname, os.R_OK): print 'bad file name or permissions' else: - print kvm_utils.md5sum_file(fname, 1024*1024) + print kvm_utils.hash_file(fname, 1024*1024, method="md5") diff --git a/client/tests/kvm/kvm_utils.py b/client/tests/kvm/kvm_utils.py index 53b664a..f72984a 100644 --- a/client/tests/kvm/kvm_utils.py +++ b/client/tests/kvm/kvm_utils.py @@ -4,8 +4,8 @@ KVM test utility functions. @copyright: 2008-2009 Red Hat Inc. """ -import md5, thread, subprocess, time, string, random, socket, os, signal, pty -import select, re, logging, commands, cPickle +import md5, sha, thread, subprocess, time, string, random, socket, os, signal +import select, re, logging, commands, cPickle, pty from autotest_lib.client.bin import utils from autotest_lib.client.common_lib import error import kvm_subprocess @@ -760,23 +760,35 @@ def wait_for(func, timeout, first=0.0, step=1.0, text=None): return None -def md5sum_file(filename, size=None): +def hash_file(filename, size=None, method="md5"): """ - Calculate the md5sum of filename. + Calculate the hash of filename. If size is not None, limit to first size bytes. Throw exception if something is wrong with filename. Can be also implemented with bash one-liner (assuming size%1024==0): - dd if=filename bs=1024 count=size/1024 | md5sum - + dd if=filename bs=1024 count=size/1024 | sha1sum - - @param filename: Path of the file that will have its md5sum calculated. - @param returns: md5sum of the file. + @param filename: Path of the file that will have its hash calculated. + @param method: Method used to calculate the hash. Supported methods: + * md5 + * sha1 + @returns: Hash of the file, if something goes wrong, return None. """ chunksize = 4096 fsize = os.path.getsize(filename) - if not size or size>fsize: + + if not size or size > fsize: size = fsize f = open(filename, 'rb') - o = md5.new() + + if method == "md5": + hash = md5.new() + elif method == "sha1": + hash = sha.new() + else: + logging.error("Unknown hash type %s, returning None" % method) + return None + while size > 0: if chunksize > size: chunksize = size @@ -784,7 +796,78 @@ def md5sum_file(filename, size=None): if len(data) == 0: logging.debug("Nothing left to read but size=%d" % size) break - o.update(data) + hash.update(data) size -= len(data) f.close() - return o.hexdigest() + return hash.hexdigest() + + +def get_hash_from_file(hash_path, dvd_basename): + """ + Get the a hash from a given DVD image from a hash file + (Hash files are usually named MD5SUM or SHA1SUM and are located inside the + download directories of the DVDs) + + @param hash_path: Local path to a hash file. + @param cd_image: Basename of a CD image + """ + hash_file = open(hash_path, 'r') + for line in hash_file.readlines(): + if dvd_basename in line: + return line.split()[0] + + +def unmap_url_cache(cachedir, url, expected_hash, method="md5"): + """ + Downloads a file from a URL to a cache directory. If the file is already + at the expected position and has the expected hash, let's not download it + again. + + @param cachedir: Directory that might hold a copy of the file we want to + download. + @param url: URL for the file we want to download. + @param expected_hash: Hash string that we expect the file downloaded to + have. + @param method: Method used to calculate the hash string (md5, sha1). + """ + # Let's convert cachedir to a canonical path, if it's not already + cachedir = os.path.realpath(cachedir) + if not os.path.isdir(cachedir): + try: + os.makedirs(cachedir) + except: + raise ValueError('Could not create cache directory %s' % cachedir) + file_from_url = os.path.basename(url) + file_local_path = os.path.join(cachedir, file_from_url) + + file_hash = None + failure_counter = 0 + while not file_hash == expected_hash: + if os.path.isfile(file_local_path): + if method == "md5": + file_hash = hash_file(file_local_path, method="md5") + elif method == "sha1": + file_hash = hash_file(file_local_path, method="sha1") + + if file_hash == expected_hash: + # File is already at the expected position and ready to go + src = file_from_url + else: + # Let's download the package again, it's corrupted... + logging.error("Seems that file %s is corrupted, trying to " + "download it again" % file_from_url) + src = url + failure_counter += 1 + else: + # File is not there, let's download it + src = url + if failure_counter > 1: + raise EnvironmentError("Consistently failed to download the " + "package %s. Aborting further download " + "attempts. This might mean either the " + "network connection has problems or the " + "expected hash string that was determined " + "for this file is wrong" % file_from_url) + file_path = utils.unmap_url(cachedir, src, cachedir) + + return file_path diff --git a/client/tests/kvm/kvm_vm.py b/client/tests/kvm/kvm_vm.py index a8d96ca..9aada61 100755 --- a/client/tests/kvm/kvm_vm.py +++ b/client/tests/kvm/kvm_vm.py @@ -339,20 +339,26 @@ class VM: if params.get("md5sum_1m"): logging.debug("Comparing expected MD5 sum with MD5 sum of " "first MB of ISO file...") - actual_md5sum = kvm_utils.md5sum_file(iso, 1048576) - expected_md5sum = params.get("md5sum_1m") + actual_hash = kvm_utils.hash_file(iso, 1048576, method="md5") + expected_hash = params.get("md5sum_1m") compare = True elif params.get("md5sum"): logging.debug("Comparing expected MD5 sum with MD5 sum of ISO " "file...") - actual_md5sum = kvm_utils.md5sum_file(iso) - expected_md5sum = params.get("md5sum") + actual_hash = kvm_utils.md5sum_file(iso, method="md5") + expected_hash = params.get("md5sum") + compare = True + elif params.get("sha1sum"): + logging.debug("Comparing expected SHA1 sum with SHA1 sum of " + "ISO file...") + actual_hash = kvm_utils.md5sum_file(iso, method="sha1") + expected_hash = params.get("md5sum") compare = True if compare: - if actual_md5sum == expected_md5sum: - logging.debug("MD5 sums match") + if actual_hash == expected_hash: + logging.debug("Hashes match") else: - logging.error("Actual MD5 sum differs from expected one") + logging.error("Actual hash differs from expected one") return False # Make sure the following code is not executed by more than one thread -- 1.6.2.5 -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html