On 07/14/2011 11:03 PM, Michael K. Johnson wrote:
Thanks again to David for the review. Here's what I've done this
evening. Tested working installs and a bunch of failure cases,
including bad URLs, unsupported archive types, and corrupted
archives.
I like this patch, but have some comments as well. See below.
#
# archive.py: An anaconda backend to install from a system archive
#
# The intent is to be able to install an archive (or set of archives)
# similarly to a livecd install, except that there is no need
# to move files around afterward to handle multiple filesystems,
# or to resize a filesystem. This archive could be located at a
# network location or be on install media. The archive is assumed
# to contain all package-managed content.
#
# Copyright (C) 2011 Michael K Johnson. All rights reserved.
# Copyright (C) 2007 Red Hat, Inc. All rights reserved.
Lose the 'All rights reserved.' here. If it's in our file, that's
wrong. I've received talkings-to about this. Bottom line is we need to
not include the 'All rights reserved.' statement.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see<http://www.gnu.org/licenses/>.
#
# Author(s): Michael K Johnson<a1237@xxxxxxxxx>
#
import os
import signal
import stat
import subprocess
import sys
from urlgrabber.grabber import URLGrabber, URLGrabError
import storage
from constants import *
import gettext
_ = lambda x: gettext.ldgettext("anaconda", x)
import backend
import isys
import iutil
import network
import packages
import logging
log = logging.getLogger("anaconda")
class ProgressCallback(object):
def __init__(self, progress):
self.progress = progress
def update(self, current, total=None):
Style comment: Blank line after a function definition and before the
start of a new one.
if total is not None:
self.progress.set_fraction(current / float(total))
total = '%d' % (total / 1024)
if total is None:
total = 'unknown'
self.progress.set_label('Unpacked %d of %s KiB' % (
current / 1024, total))
self.progress.processEvents()
class ChunkedData(object):
def __init__(self, fobj, sourceObj):
self.fobj = fobj
self.sourceObj = sourceObj
def __iter__(self):
return self
def next(self):
data = self.fobj.read(1024*1024)
if not data:
raise StopIteration
self.sourceObj.update(len(data))
return data
class MissingUtility(RuntimeError):
def __init__(self, utilityName, formatName, *args, **kw):
self.utilityName = utilityName
self.formatName = formatName
RuntimeError.__init__(self, *args, **kw)
class FailedUtility(RuntimeError):
def __init__(self, message, *args, **kw):
self.args = args
self.message = message
RuntimeError.__init__(self, *args, **kw)
class InvalidRepository(RuntimeError):
def __init__(self, message, *args, **kw):
self.args = args
self.message = message
RuntimeError.__init__(self, *args, **kw)
class MissingNetwork(RuntimeError):
pass
def archiveFormat(filename):
# this should be updated with whatever archive and compression
# formats are supported in the future
formatkeys = (
(('.tar.gz', '.tgz', '.tar.Z'), ('tar', 'gz')),
(('.tar.bz2', '.tbz2'), ('tar', 'bz2')),
(('.tar.xz', 'txz'), ('tar', 'xz')),
(('.tar',), ('tar', '')),
(('.cpio.gz', '.cpio.Z'), ('cpio', 'gz')),
(('.cpio.bz2',), ('cpio', 'bz2')),
(('.cpio.xz',), ('cpio', 'xz')),
(('.cpio',), ('cpio', '')),
)
for extensions, formats in formatkeys:
for extension in extensions:
if filename.endswith(extension):
return formats
return False
How about using the 'magic' module (yum install python-magic) here
instead of keying on specific file name endings? The magic module can
give you the description that file(1) would print out or the mime type.
I think the mime type would be useful here and would prevent us from
having users file bugs when they have an image file named *.TAR.GZ and
this code doesn't work, for example.
Example for magic module:
import magic
m = magic.open(magic.MAGIC_MIME)
m.load()
type = m.file("/path/to/image/file")
'type' will be something like:
'application/x-gzip; charset=binary'
'application/x-bzip2; charset=binary'
'application/x-xz; charset=binary'
'application/x-tar; charset=binary'
'application/octet-stream; charset=binary' <- cpio gives me this, meh?
This will require separating the type checks for the compressed image
and the datastream, but I think this approach will be more flexible down
the road.
class AbstractSource(object):
def __init__(self):
self.curLen = 0
self.totalLen = None
self.progress = None
def setProgress(self, progress):
self.progress = progress
def setTotalLen(self):
if None not in set(x[1] for x in self.sources):
self.totalLen = sum(x[1] for x in self.sources)
def update(self, length):
self.curLen += length
self.progress.update(self.curLen, self.totalLen)
def processDescription(self, dirname, description):
self.sources = []
for line in description.readlines():
# tab-delimited:
# filename (relative to directory .desc is in),
# (optional) size in bytes (in decimal)
filename, size = (line.strip().split('\t') + [None])[0:2]
filename = '/'.join((dirname, filename))
if isinstance(size, str):
size = int(size)
self.sources.append((filename, size))
def __iter__(self):
return self
def next(self):
if not self.sources:
raise StopIteration
filename, size = self.sources.pop(0)
archiveType, compressionType = archiveFormat(filename)
dataSource = self.openfile(filename)
return archiveType, compressionType, ChunkedData(dataSource, self)
class URLSource(AbstractSource):
def __init__(self, url):
AbstractSource.__init__(self)
if url.endswith('.desc'):
description = URLGrabber().urlopen(url)
self.processDescription(os.path.dirname(url), description)
else:
self.sources = [(url, None)]
# We need sizes in order to give progress during the install.
# If the desc file is missing, or does not contain sizes, then
# we'll get the headers twice. Small price for simplicity, and
# if you don't like that, create a .desc file...
for i in range(len(self.sources)):
if self.sources[i][1] is None:
h = URLGrabber().urlopen(self.sources[i][0])
l = h.hdr.getheader('Content-Length')
if l is not None:
t = list(self.sources[i])
t[1] = int(l)
self.sources[i] = tuple(t)
del h
self.setTotalLen()
def openfile(self, url):
return URLGrabber().urlopen(url)
class DirectorySource(AbstractSource):
def __init__(self, directory):
AbstractSource.__init__(self)
descriptions = []
archives = []
for dirname, dirs, files in os.walk(directory):
descriptions.extend('/'.join((dirname, x))
for x in files if x.endswith('.desc'))
archives.extend('/'.join((dirname, x))
for x in files if archiveFormat(x))
if len(descriptions)> 1:
raise InvalidRepository(_('Only one .desc file allowed (%s)')
% ' '.join(descriptions))
if len(archives)> 1 and not descriptions:
raise InvalidRepository(
_('More than one archive requires .desc file (%s)')
% ' '.join(archives))
if descriptions:
d = descriptions[0]
self.processDescription(os.path.dirname(d), open(d))
else:
source = archives[0]
size = os.stat(source).st_size
self.sources = [(source, size)]
for i in range(len(self.sources)):
if self.sources[i][1] is None:
size = os.stat(self.sources[i][0]).st_size
t = list(self.sources[i])
t[1] = size
self.sources[i] = tuple(t)
self.setTotalLen()
def openfile(self, filename):
return open(filename)
# http://www.chiark.greenend.org.uk/ucgi/~cjwatson/blosxom/2009-07-02-python-sigpipe.html
def subprocess_setup():
# tar/cpio need to get SIGPIPE when gzip is done
signal.signal(signal.SIGPIPE, signal.SIG_DFL)
class ArchiveExtractor(object):
extractMap = {
'tar': ['tar', 'vvvixSf', '-'],
'cpio': ['cpio', '-ivumd']
}
decompressMap = {
'': ['cat'],
'Z': ['gzip', '-dc'],
'gz': ['gunzip', '-dc'],
'bz2': ['bunzip2', '-dc'],
'xz': ['xz', '-dc'],
}
def __init__(self, root, compression, archiveFormat):
self.root = root
self.compression = compression
self.archiveFormat = archiveFormat
self.decompressor = self.decompressMap[compression]
self.extractor = self.extractMap[archiveFormat]
if not iutil.find_program_in_path(self.decompressor[0]):
raise MissingUtility(self.decompressor[0], compression)
if not iutil.find_program_in_path(self.extractor[0]):
raise MissingUtility(self.extractor[0], archiveFormat)
def open(self):
root = self.root
self.outlog = open(root + '/root/archiveInstall.out.log', 'a')
self.errlog = open(root + '/root/archiveInstall.err.log', 'a')
self.decompress = subprocess.Popen(
self.decompressor,
stdin=subprocess.PIPE,
stdout=subprocess.PIPE,
stderr=self.errlog,
close_fds=True,
preexec_fn=subprocess_setup,
cwd=root)
self.unarchive = subprocess.Popen(
self.extractor,
stdin=self.decompress.stdout,
stdout=self.outlog,
stderr=self.errlog,
close_fds=True,
preexec_fn=subprocess_setup,
cwd=root)
# http://www.enricozini.org/2009/debian/python-pipes/
self.decompress.stdout.close()
def write(self, data):
self.decompress.stdin.write(data)
def flush(self):
self.decompress.stdin.flush()
def close(self):
self.flush()
self.decompress.stdin.close()
ec = self.unarchive.wait()
if ec:
raise FailedUtility(_("Failed to unpack archive"))
ec = self.decompress.wait()
if ec:
raise FailedUtility(_("Failed to decompress archive"))
self.outlog.close()
self.errlog.close()
self.unarchive = None
self.decompress = None
For extraction, is there any reason we can't use Python's 'tarfile' module?
Likewise, for cpio I see there is the 'cpioarchive' module (yum install
python-cpio), but it does not seem as functional as the cpio program.
I think we should favor Python modules over running external programs
when possible, even if the module is really just doing the same. The
advantage here is that there will be other users of the Python module
and someone else maintaining the module. In instances where there is a
module but it doesn't quite work for us, I think we should either (a)
work with the module author to incorporate the changes we need or
failing that (b) take to invoking the external tool directory, be it a
library or console program.
def archiveSource(directory=None, url=None):
if url:
return URLSource(url)
else:
return DirectorySource(directory)
class ArchiveBackend(backend.AnacondaBackend):
def __init__(self, anaconda):
backend.AnacondaBackend.__init__(self, anaconda)
self.supportsUpgrades = False
self.supportsPackageSelection = False
self.archiveSource = None
def doBackendSetup(self, anaconda):
if anaconda.dir == DISPATCH_BACK:
return DISPATCH_BACK
intf = anaconda.intf
m = anaconda.methodstr
while True:
if m is None:
# long blanks make entry window longer...
m = intf.entryWindow(_("Please enter archive URL"), 60*" ")
try:
if m.startswith('cdrom:'):
method, location = m.split(':', 1)
if not location:
location = '/mnt/source'
self._getArchiveSource(topdirectory=location)
else:
try:
if m.startswith('nfs:'):
if not network.hasActiveNetDev():
if not intf.enableNetwork():
raise MissingNetwork
(opts, server, path) = iutil.parseNfsUrl(m)
isys.mount(server+':'+path, '/mnt/source', 'nfs',
options=opts)
self._getArchiveSource(directory='/mnt/source')
Is /mnt/source what we use for the install source? I thought it was
/mnt/install/source. I could be wrong, but for consistency across
install methods, we should probably make sure all these sync up.
Unrelated to this patch, but just a general improvement that could be
made: a patch to centralize the /mnt paths we use throughout anaconda.
Define them as constants somewhere.
elif m.startswith('http:') or m.startswith('ftp:'):
if not network.hasActiveNetDev():
if not intf.enableNetwork():
raise MissingNetwork
self._getArchiveSource(url=m)
except MissingNetwork:
# Keep this error in sync with yuminstall.py
# for translation purposes
rc = intf.messageWindow(_("No Network Available"),
_("Some of your software repositories require "
"networking, but there was an error enabling the "
"network on your system."),
type="custom", custom_icon="error",
custom_buttons = [_("_Exit installer"), _("_Back")])
if rc == 0:
sys.exit(1)
elif rc == 1:
return DISPATCH_BACK
except (InvalidRepository, SystemError), e:
# SystemError on failure to mount NFS
rc = intf.messageWindow(_("Invalid Repository"),
e.message, type="custom", custom_icon="error",
custom_buttons = [_("_Exit installer"), _("_Retry")])
if rc == 0:
sys.exit(1)
m = intf.entryWindow(_("Please enter archive URL"), m)
continue
return
def _getArchiveSource(self, topdirectory=None, directory=None, url=None):
if topdirectory:
directory = topdirectory + '/archives/'
s = archiveSource(directory=directory, url=url)
self.archiveSource = s
def doInstall(self, anaconda):
log.info("Preparing to install archive")
intf = anaconda.intf
progress = intf.instProgress
progress.set_label(_("Unpacking archive to hard drive."))
progress.processEvents()
progressCallback = ProgressCallback(intf.instProgress)
self.archiveSource.setProgress(progressCallback)
try:
for archiveType, compressionType, inputData in self.archiveSource:
e = ArchiveExtractor(self.instPath, compressionType, archiveType)
e.open()
for dataChunk in inputData:
e.write(dataChunk)
e.flush()
try:
e.close()
except FailedUtility, e:
intf.messageWindow(_("Subprocess Failed"),
e.message, type="custom", custom_icon="error",
custom_buttons = [_("_Exit installer"),])
sys.exit(1)
except MissingUtility, e:
intf.messageWindow(_("Unsupported Format"),
_("The %s format requires the %s utility, "
"which is not present") % (
e.formatName, e.utilityName),
type="custom", custom_icon="error",
custom_buttons = [_("_Exit installer"),])
sys.exit(1)
intf.setInstallProgressClass(None)
def doPostInstall(self, anaconda):
packages.rpmSetupGraphicalSystem(anaconda)
# now write out the "real" fstab and mtab
anaconda.storage.write(anaconda.rootPath)
# rebuild the initrd(s) for this hardware
self._rebuildInitrds(anaconda)
backend.AnacondaBackend.doPostInstall(self, anaconda)
def _rebuildInitrds(self, anaconda):
vers = self.kernelVersionList(anaconda.rootPath)
for (n, arch, tag) in vers:
packages.recreateInitrd(n, anaconda.rootPath)
def kernelVersionList(self, rootPath = "/"):
return packages.rpmKernelVersionList(rootPath)
Aside from my comments above, the rest of the patch I like. Thanks for
the patch!
--
David Cantrell <dcantrell@xxxxxxxxxx>
Supervisor, Installer Engineering Team
Red Hat, Inc. | Westford, MA | EST5EDT
_______________________________________________
Anaconda-devel-list mailing list
Anaconda-devel-list@xxxxxxxxxx
https://www.redhat.com/mailman/listinfo/anaconda-devel-list