After spending some time examining http-replicator I think that it's a much better foundation for further development of InstantMirror features than mod_python is. Running as a standalone daemon lets it handle concurrent requests more naturally. As a bonus, http-replicator supports upstream ftp servers as well as http, and deals with byte-range requests properly (more properly than InstantMirror does, at least). Unlike InstantMirror, the current http-replicator implements a traditional http proxy rather than a transparent proxy. It was pretty easy to hack in a new --mirror option that lets it support either mode. With the attached patch (applied atop http-replicator_4.0alpha1), http-replicator acts as a drop-in replacement for InstantMirror: ./http-replicator --port 80 --root /mirrors --mirror http://download.fedora.redhat.com --nohost --daemon mirror.log If this works for someone besides me, I'll submit the patch to the http-replicator maintainer. --Ed
Index: http-replicator_4.0alpha1/Request.py =================================================================== --- http-replicator_4.0alpha1.orig/Request.py +++ http-replicator_4.0alpha1/Request.py @@ -79,20 +79,21 @@ class HttpRequest: self.__recvbuf = self.__recvbuf[ bytes: ] assert not self.__recvbuf, 'client sends junk data after message header' - if self.__url.startswith( 'http://' ): - host = self.__url[ 7: ] + url = Params.MIRRORPREFIX + self.__url + if url.startswith( 'http://' ): + host = url[ 7: ] port = 80 if self.__cmd == 'GET': self.Protocol = Protocol.HttpProtocol else: self.Protocol = Protocol.BlindProtocol - elif self.__url.startswith( 'ftp://' ): + elif url.startswith( 'ftp://' ): assert self.__cmd == 'GET', '%s request unsupported for ftp' % self.__cmd self.Protocol = Protocol.FtpProtocol - host = self.__url[ 6: ] + host = url[ 6: ] port = 21 else: - raise AssertionError, 'invalid url: %s' % self.__url + raise AssertionError, 'invalid url: %s' % url if '/' in host: host, path = host.split( '/', 1 ) else: Index: http-replicator_4.0alpha1/Protocol.py =================================================================== --- http-replicator_4.0alpha1.orig/Protocol.py +++ http-replicator_4.0alpha1/Protocol.py @@ -60,7 +60,7 @@ class HttpProtocol( Cache.File ): def __init__( self, request ): - Cache.File.__init__( self, '%s:%i/%s' % request.url() ) + Cache.File.__init__( self, Params.NOHOST and request.url()[2] or ( '%s:%i/%s' % request.url() ) ) if Params.STATIC and self.full(): print 'Static mode; serving file directly from cache' @@ -215,7 +215,7 @@ class FtpProtocol( Cache.File ): def __init__( self, request ): - Cache.File.__init__( self, '%s:%i/%s' % request.url() ) + Cache.File.__init__( self, Params.NOHOST and request.url()[2] or ( '%s:%i/%s' % request.url() ) ) if Params.STATIC and self.full(): self.__socket = None Index: http-replicator_4.0alpha1/Params.py =================================================================== --- http-replicator_4.0alpha1.orig/Params.py +++ http-replicator_4.0alpha1/Params.py @@ -5,10 +5,12 @@ _args = iter( sys.argv ) PROG = _args.next() PORT = 8080 ROOT = os.getcwd() + os.sep +MIRRORPREFIX = '' VERBOSE = 0 TIMEOUT = 15 FAMILY = socket.AF_INET FLAT = False +NOHOST = False STATIC = False ONLINE = True LIMIT = False @@ -23,10 +25,12 @@ options: -h --help show this help message and exit -p --port PORT listen on this port for incoming connections, default %(PORT)i -r --root DIR set cache root directory, default current: %(ROOT)s + --mirror URL mirror this URL prefix directly instead of acting as a proxy -v --verbose show http headers and other info -t --timeout SEC break connection after so many seconds of inactivity, default %(TIMEOUT)i -6 --ipv6 try ipv6 addresses if available --flat flat mode; cache all files in root directory (dangerous!) + --nohost do not use host:port as top level of directory hierarchy --static static mode; assume files never change --offline offline mode; never connect to server --limit RATE limit download rate at a fixed K/s @@ -51,6 +55,10 @@ for _arg in _args: sys.exit( 'Error: %s requires a directory argument' % _arg ) except: sys.exit( 'Error: invalid cache directory %s' % ROOT ) + elif _arg == '--mirror': + MIRRORPREFIX = _args.next() + if not MIRRORPREFIX.startswith( 'http://' ) and not MIRRORPREFIX.startswith( 'ftp://' ): + sys.exit( 'Error: %s requires an argument starting with http:// or ftp://' % _arg ) elif _arg in ( '-v', '--verbose' ): VERBOSE += 1 elif _arg in ( '-t', '--timeout' ): @@ -63,6 +71,8 @@ for _arg in _args: FAMILY = socket.AF_UNSPEC elif _arg == '--flat': FLAT = True + elif _arg == '--nohost': + NOHOST = True elif _arg == '--static': STATIC = True elif _arg == '--offline':
-- fedora-devel-list mailing list fedora-devel-list@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/fedora-devel-list