archive-installer backend: initial review requested

"Michael K. Johnson" <johnsonm@xxxxxxxxx> · Sun, 10 Jul 2011 22:52:54 -0400

Let me start with review on the backend.

I've run through installing both a single archive and multiple
archives of different kinds (tar and cpio) over NFS, HTTP, and FTP.
I have not tested the latest version by burning physical media
with a tarball on it, but that does not exercise any additional
code that has been changed since last time I tested it, so that
should be OK at least for review purposes.

Note that this code supports xz for decompression; that clearly
requires that xz be included on the media (as it is not in F15,
as I discovered in testing).  I'm leaving it there on the theory
that it can be added if anyone cares.

I don't know if there's a best practice for error handling that
goes beyond raising generic exceptions, for the kinds of "should
not happen" errors that this covers.  Let me know if there is an
error case here that it is important to handle some other way,
and please give me a hint of what I should do differently.  :)

Thanks!
#
# archive.py: An anaconda backend to install from a system archive
#
# The intent is to be able to install an archive (or set of archives)
# similarly to a livecd install, except that there is no need
# to move files around afterward to handle multiple filesystems,
# or to resize a filesystem.  This archive could be located at a
# network location or be on install media.  The archive is assumed
# to contain all package-managed content.
#
# Copyright (C) 2011 Michael K Johnson.  All rights reserved.
# Copyright (C) 2007 Red Hat, Inc.  All rights reserved.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
#
# Author(s): Michael K Johnson <a1237@xxxxxxxxx>
#

import os
import signal
import stat
import subprocess
from urlgrabber.grabber import URLGrabber, URLGrabError

import storage

from constants import *

import gettext
_ = lambda x: gettext.ldgettext("anaconda", x)

import backend
import isys
import iutil

import network
import packages

import logging
log = logging.getLogger("anaconda")

class ProgressCallback(object):
    def __init__(self, progress):
        self.progress = progress
    def update(self, current, total=None):
        if total is not None:
            self.progress.set_fraction(current / float(total))
            total = '%d' % (total / 1024)
        if total is None:
            total = 'unknown'
        self.progress.set_label('Unpacked %d of %s KiB' % (
                                          current / 1024, total))
        self.progress.processEvents()

class ChunkedData(object):
    def __init__(self, fobj, sourceObj):
        self.fobj = fobj
        self.sourceObj = sourceObj
    def __iter__(self):
        return self
    def next(self):
        data = self.fobj.read(1024*1024)
        if not data:
            raise StopIteration

        self.sourceObj.update(len(data))

        return data

def archiveFormat(filename):
    # this should be updated with whatever archive and compression
    # formats are supported in the future
    if filename.endswith('.tar.gz'):
        return 'tar', 'gz'
    if filename.endswith('.tar.bz2'):
        return 'tar', 'bz2'
    if filename.endswith('.tar.xz'):
        return 'tar', 'xz'
    if filename.endswith('.tar.Z'):
        return 'tar', 'gz' # gunzip handles .Z
    if filename.endswith('.tar'):
        return 'tar', ''
    if filename.endswith('.cpio.gz'):
        return 'cpio', 'gz'
    if filename.endswith('.cpio.bz2'):
        return 'cpio', 'bz2'
    if filename.endswith('.cpio.xz'):
        return 'cpio', 'xz'
    if filename.endswith('.cpio.Z'):
        return 'cpio', 'gz'
    if filename.endswith('.cpio'):
        return 'cpio', ''
    return False

class AbstractSource(object):
    def __init__(self):
        self.curLen = 0
        self.totalLen = None
        self.progress = None

    def setProgress(self, progress):
        self.progress = progress

    def setTotalLen(self):
        if None not in set(x[1] for x in self.sources):
            self.totalLen = sum(x[1] for x in self.sources)

    def update(self, length):
        self.curLen += length
        self.progress.update(self.curLen, self.totalLen)

    def processDescription(self, dirname, description):
        self.sources = []
        for line in description.readlines():
            # tab-delimited:
            #   filename (relative to directory .desc is in),
            #   (optional) size in bytes (in decimal)
            filename, size = (line.strip().split('\t') + [None])[0:2]
            filename = '/'.join((dirname, filename))
            if isinstance(size, str):
                size = int(size)
            self.sources.append((filename, size))

    def __iter__(self):
        return self

    def next(self):
        if not self.sources:
            raise StopIteration

        filename, size = self.sources.pop(0)
        archiveType, compressionType = archiveFormat(filename)
        dataSource = self.openfile(filename)
        return archiveType, compressionType, ChunkedData(dataSource, self)

class URLSource(AbstractSource):
    def __init__(self, url):
        AbstractSource.__init__(self)

        if url.endswith('.desc'):
            description = URLGrabber().urlopen(url)
            self.processDescription(os.path.dirname(url), description)
        else:
            self.sources = [(url, None)]

        # We need sizes in order to give progress during the install.
        # If the desc file is missing, or does not contain sizes, then
        # we'll get the headers twice.  Small price for simplicity, and
        # if you don't like that, create a .desc file...
        for i in range(len(self.sources)):
            if self.sources[i][1] is None:
                h = URLGrabber().urlopen(self.sources[i][0])
                l = h.hdr.getheader('Content-Length')
                if l is not None:
                    t = list(self.sources[i])
                    t[1] = int(l)
                    self.sources[i] = tuple(t)
                del h

        self.setTotalLen()

    def openfile(self, url):
        return URLGrabber().urlopen(url)

class DirectorySource(AbstractSource):
    def __init__(self, directory):
        AbstractSource.__init__(self)

        descriptions = []
        archives = []
        for dirname, dirs, files in os.walk(directory):
            descriptions.extend('/'.join((dirname, x))
                                for x in files if x.endswith('.desc'))
            archives.extend('/'.join((dirname, x))
                            for x in files if archiveFormat(x))

        if len(descriptions) > 1:
            raise RuntimeError, 'Only one .desc file allowed (%s)' % ' '.join(descriptions)

        if len(archives) > 1 and not descriptions:
            raise RuntimeError, 'More than one archive requires .desc file (%s)' % ' '.join(archives)

        if descriptions:
            d = descriptions[0]
            self.processDescription(os.path.dirname(d), open(d))
        else:
            source = archives[0]
            size = os.stat(source).st_size
            self.sources = [(source, size)]

        for i in range(len(self.sources)):
            if self.sources[i][1] is None:
                size = os.stat(self.sources[i][0]).st_size
                t = list(self.sources[i])
                t[1] = size
                self.sources[i] = tuple(t)

        self.setTotalLen()

    def openfile(self, filename):
        return open(filename)

# http://www.chiark.greenend.org.uk/ucgi/~cjwatson/blosxom/2009-07-02-python-sigpipe.html
def subprocess_setup():
    # tar/cpio need to get SIGPIPE when gzip is done
    signal.signal(signal.SIGPIPE, signal.SIG_DFL)

class ArchiveExtractor(object):
    extractMap = {
        'tar': ['tar', 'vvvixSf', '-'],
        'cpio': ['cpio', '-ivumd']
    }
    decompressMap = {
        '': ['cat'],
        'Z': ['gzip', '-dc'],
        'gz': ['gunzip', '-dc'],
        'bz2': ['bunzip2', '-dc'],
        'xz': ['xz', '-dc'],
    }

    def __init__(self, root, compression, archiveFormat):
        self.root = root
        self.compression = compression
        self.archiveFormat = archiveFormat

    def open(self):
        root = self.root
        self.outlog = open(root + '/root/archiveInstall.out.log', 'a')
        self.errlog = open(root + '/root/archiveInstall.err.log', 'a')
        self.decompress = subprocess.Popen(
            self.decompressMap[self.compression],
            stdin=subprocess.PIPE,
            stdout=subprocess.PIPE,
            stderr=self.errlog,
            close_fds=True,
            preexec_fn=subprocess_setup,
            cwd=root)
        self.unarchive = subprocess.Popen(
            self.extractMap[self.archiveFormat],
            stdin=self.decompress.stdout,
            stdout=self.outlog,
            stderr=self.errlog,
            close_fds=True,
            preexec_fn=subprocess_setup,
            cwd=root)
        # http://www.enricozini.org/2009/debian/python-pipes/
        self.decompress.stdout.close()

    def write(self, data):
        self.decompress.stdin.write(data)

    def flush(self):
        self.decompress.stdin.flush()

    def close(self):
        self.flush()
        self.decompress.stdin.close()
        ec = self.unarchive.wait()
        if ec:
            raise RuntimeError, "Failed to unpack archive"
        ec = self.decompress.wait()
        if ec:
            raise RuntimeError, "Failed to decompress archive"
        self.outlog.close()
        self.errlog.close()
        self.unarchive = None
        self.decompress = None

def archiveSource(directory=None, url=None):
    if url:
        return URLSource(url)
    else:
        return DirectorySource(directory)
    return source

class ArchiveBackend(backend.AnacondaBackend):
    def __init__(self, anaconda):
        backend.AnacondaBackend.__init__(self, anaconda)
        self.supportsUpgrades = False
        self.supportsPackageSelection = False
        self.archiveSource = None

    def doBackendSetup(self, anaconda):
        if anaconda.dir == DISPATCH_BACK:
            return DISPATCH_BACK

        m = anaconda.methodstr
        if m.startswith('cdrom:'):
            method, location = self.anaconda.methodstr.split(':', 1)
            if not location:
                location = '/mnt/source'
            self._getArchiveSource(topdirectory=location)

        elif m.startswith('nfs:'):
            if not network.hasActiveNetDev():
                if not self.anaconda.intf.enableNetwork():
                    raise RuntimeError, "could not enable network"

            (opts, server, path) = iutil.parseNfsUrl(m)
            isys.mount(server+':'+path, '/mnt/source', 'nfs', options=opts)
            self._getArchiveSource(directory='/mnt/source')

        elif m.startswith('http:') or m.startswith('ftp:'):
            if not network.hasActiveNetDev():
                if not self.anaconda.intf.enableNetwork():
                    raise RuntimeError, "could not enable network"

            self._getArchiveSource(url=m)

    def _getArchiveSource(self, topdirectory=None, directory=None, url=None):
        if topdirectory:
            directory = topdirectory + '/archives/'
        s = archiveSource(directory=directory, url=url)
        self.archiveSource = s

    def doInstall(self, anaconda):
        log.info("Preparing to install archive")

        progress = anaconda.intf.instProgress
        progress.set_label(_("Unpacking archive to hard drive."))
        progress.processEvents()
        progressCallback = ProgressCallback(anaconda.intf.instProgress)
        self.archiveSource.setProgress(progressCallback)

        for archiveType, compressionType, inputData in self.archiveSource:
            e = ArchiveExtractor(self.instPath, compressionType, archiveType)
            e.open()
            for dataChunk in inputData:
                e.write(dataChunk)
                e.flush()
            e.close()

        anaconda.intf.setInstallProgressClass(None)

    def doPostInstall(self, anaconda):
        packages.rpmSetupGraphicalSystem(anaconda)

        # now write out the "real" fstab and mtab
        anaconda.storage.write(anaconda.rootPath)

        # rebuild the initrd(s) for this hardware
        self._rebuildInitrds(anaconda)

        backend.AnacondaBackend.doPostInstall(self, anaconda)

    def _rebuildInitrds(self, anaconda):
        vers = self.kernelVersionList(anaconda.rootPath)
        for (n, arch, tag) in vers:
            packages.recreateInitrd(n, anaconda.rootPath)

    def kernelVersionList(self, rootPath = "/"):
        return packages.rpmKernelVersionList(rootPath)
_______________________________________________
Anaconda-devel-list mailing list
Anaconda-devel-list@xxxxxxxxxx
https://www.redhat.com/mailman/listinfo/anaconda-devel-list