PATCH: arcboot cache

Keith M Wesolowski <wesolows@xxxxxxxxxxxx> · Mon, 22 Nov 2004 22:40:12 -0800

Let's make arcboot 20x faster, shall we?  Tested on ip22, ip32.

diff -urN arcboot-0.3.8.4/e2fslib/bitops.h arcboot-working/e2fslib/bitops.h

--- arcboot-0.3.8.4/e2fslib/bitops.h	2002-02-03 14:53:42.000000000 -0800
+++ arcboot-working/e2fslib/bitops.h	2004-11-22 12:56:19.000000000 -0800
@@ -410,6 +410,9 @@
 #endif /* !_EXT2_HAVE_ASM_SWAB */
 
 #if !defined(_EXT2_HAVE_ASM_FINDBIT_)
+
+extern int ffs(int);
+
 _INLINE_ int ext2fs_find_first_bit_set(void * addr, unsigned size)
 {
 	char	*cp = (unsigned char *) addr;
diff -urN arcboot-0.3.8.4/ext2load/arcboot.h arcboot-working/ext2load/arcboot.h
--- arcboot-0.3.8.4/ext2load/arcboot.h	2003-04-26 12:51:15.000000000 -0700
+++ arcboot-working/ext2load/arcboot.h	2004-11-21 23:34:36.000000000 -0800
@@ -21,5 +21,6 @@
 CHAR** ReadConfFile(char **partition, const char *filename, char* config);
 
 /* ext2io.c */
+extern int arc_do_progress;
 void print_ext2fs_error(long status);
 #endif /* _ARCBOOT_H */
diff -urN arcboot-0.3.8.4/ext2load/ext2io.c arcboot-working/ext2load/ext2io.c
--- arcboot-0.3.8.4/ext2load/ext2io.c	2004-03-01 03:24:04.000000000 -0800
+++ arcboot-working/ext2load/ext2io.c	2004-11-22 21:03:25.000000000 -0800
@@ -18,15 +18,115 @@
 #include <ext2fs.h>
 #include <arc.h>
 
+/*
+ * All About the Cache
+ *
+ * Without this cache, reading is horribly slow - it can take 30-60 seconds
+ * (or even more) to read a kernel.  While this is a bootloader and we only
+ * do it once, that's still a very long time for the user to sit there with
+ * nothing happening (a progress indicator has also been added).  The
+ * read workload looks like this: reads of the inode and indirection blocks
+ * interleaved with single-block reads of what are essentially contiguous
+ * regions of data blocks.  As a concrete example, we might see:
+ *
+ * 100, 200, 100, 201, 100, 202, 100, 101, 203, 100, 101, 204, ...
+ *
+ * Therefore we could simply cache the last 4 or so blocks and get an
+ * immediate 50-67% speedup with minimal waste.  However, it's possible to
+ * do better - in fact, a lot better.  The ARCS calls are so expensive that
+ * it's worthwhile also to try doing readahead.  Unless the filesystem is
+ * horribly fragmented, this will up our hit ratio to at least 85% or so
+ * with just 16 cache blocks (in fact if the fs is 0% fragmented, we could
+ * see 99% hits on the indirection blocks and about 92% on the data blocks,
+ * or about 96% overall! - even 80% would be adequate however).
+ *
+ * We really have two caches: a traditional LRU single-block cache, and a
+ * readahead multiblock scatter/gather cache.  They are however unified to
+ * speed lookup.  CACHE_SIZE is the total number of cacheable blocks, and
+ * CACHE_SG_MAX is the maximum size of a s/g request.  The overall
+ * implementation is based on the one in unix_io, but has a lot of changes
+ * to accomodate readahead.
+ *
+ * Lookup is straightforward: the cache is fully associative, so we do a
+ * linear search for the requested block number (it is only possible to
+ * search for one block at a time).  Alloc requests are handled differently.
+ * We start with the age of the oldest block and work newer until we have
+ * enough blocks to satisfy the sg request.  These blocks have their bufs
+ * point into the per-cache arc_sg_buf and the number of successfully allocated
+ * blocks is then returned after invalidating each allocated cache block and
+ * recording the block it will reference.  A later call to fill_sg_blocks
+ * will perform a single read to fill the entire cache "line."
+ *
+ * When any sg cache block is reused, the sg cached data is first copied into
+ * the per-cache buffer for all sg cache blocks, and then all buffer pointers
+ * in the sg cache blocks are reset.  Note that we only do this for the
+ * cache blocks we aren't going to immediately reuse.
+ *
+ * We don't have any reliable replacement for time(2), so instead we just use
+ * a monotonically increasing counter incremented by any function that looks
+ * into the cache.  We do risk overflow, but if we do 2**32 cache lookups
+ * the machine has probably failed to do anything useful anyway.
+ *
+ * Important: there are two large simplifying assumptions here:
+ * (1) The filesystem is universally read-only.  There are no other processes
+ *     which can write to this filesystem on this or any remote system.
+ * (2) We are single-threaded.
+ *
+ * As such, we do not have code here to handle locking, coherency, or aliases.
+ * This is fine for a bootloader but dangerous in other situations.  If
+ * ARC_IO_ALLOW_WRITE is enabled (it's off by default), then on any write will
+ * the cache will act as write-through, and the entire cache will be
+ * invalidated.  This is the most naive correct implementation.  If writing
+ * becomes an important task, this will need to be revisited; the unix_io
+ * writeback cache is a good starting point.
+ */
+
+#define CACHE_SIZE	16
+#define CACHE_SG_MAX	12
+
+#define CACHE_IS_SG(_cache)	((_cache)->buf != (_cache)->alloc_buf)
+
+struct arc_cache {
+	char		*buf;
+	char		*alloc_buf;
+	unsigned long	block;
+	int		last_use;
+	int		in_use:1;
+};
+
+static struct arc_cache *sg_cblocks[CACHE_SG_MAX];
+static unsigned long virtual_time;
 
 struct arc_private_data {
-	int magic;
-	OPENMODE mode;
-	ULONG fileID;
+	int			magic;
+	OPENMODE		mode;
+	ULONG			fileID;
+	struct arc_cache	cache[CACHE_SIZE];
+	char			*arc_sg_buf;
+	unsigned long		total_read;
+	unsigned long		seek_pos;
+	int			seek_pos_valid:1;
 };
 
-static errcode_t arc_open(const char *name, int flags,
-			  io_channel * channel);
+static void arc_progress(struct arc_private_data *, unsigned long);
+
+static errcode_t alloc_cache(io_channel, struct arc_private_data *);
+static void free_cache(io_channel, struct arc_private_data *);
+static void reset_one_cache(io_channel, struct arc_private_data *,
+    struct arc_cache *, int);
+static void reset_sg_cache(io_channel, struct arc_private_data *, int);
+static struct arc_cache *find_cached_block(io_channel,
+    struct arc_private_data *, unsigned long);
+static int alloc_sg_blocks(io_channel, struct arc_private_data *,
+    unsigned long, int);
+static errcode_t fill_sg_blocks(io_channel, struct arc_private_data *, int);
+
+static errcode_t raw_read_blk(io_channel, struct arc_private_data *,
+    unsigned long, int, char *);
+static void mul64(unsigned long, int, LARGEINTEGER *);
+static errcode_t arc_seek(io_channel, unsigned long);
+
+static errcode_t arc_open(const char *name, int flags, io_channel * channel);
 static errcode_t arc_close(io_channel channel);
 static errcode_t arc_set_blksize(io_channel channel, int blksize);
 static errcode_t arc_read_blk
@@ -47,6 +147,299 @@
 };
 io_manager arc_io_manager = &struct_arc_manager;
 
+int arc_do_progress = 0;
+
+static int hits, misses;
+
+static void
+arc_progress(struct arc_private_data *priv, unsigned long count)
+{
+	int hitrate_w = (hits * 1000) / (hits + misses) / 10;
+	int hitrate_f = (hits * 1000) / (hits + misses) % 10;
+
+	priv->total_read += count;
+	printf("\r%lx      (cache: %u.%u%%)", priv->total_read, hitrate_w,
+	    hitrate_f);
+
+#ifdef DEBUG
+	if ((hits + misses) % 100 == 0)
+		printf("hits: %u misses %u\n\r", hits, misses);
+#endif
+}
+
+/*
+ * Allocates memory for a single file's cache.
+ */
+static errcode_t
+alloc_cache(io_channel channel, struct arc_private_data *priv)
+{
+	errcode_t		status;
+	struct arc_cache	*cache;
+	int			i;
+
+	for(i = 0, cache = priv->cache; i < CACHE_SIZE; i++, cache++) {
+		memset(cache, 0, sizeof (struct arc_cache));
+		if ((status = ext2fs_get_mem(channel->block_size,
+		    (void **) &cache->alloc_buf)) != 0)
+			return (status);
+		cache->buf = cache->alloc_buf;
+	}
+
+	return (ext2fs_get_mem(channel->block_size * CACHE_SG_MAX,
+	    (void **) &priv->arc_sg_buf));
+}
+
+/*
+ * Frees all memory associated with a single file's cache.
+ */
+static void
+free_cache(io_channel channel, struct arc_private_data *priv)
+{
+	struct arc_cache	*cache;
+	int			i;
+
+	for (i = 0, cache = priv->cache; i < CACHE_SIZE; i++, cache++) {
+		if (cache->alloc_buf)
+			ext2fs_free_mem((void **) &cache->alloc_buf);
+		memset(cache, 0, sizeof (struct arc_cache));
+	}
+
+	ext2fs_free_mem((void **) &priv->arc_sg_buf);
+}
+
+/*
+ * Resets a cache block.  If the cache block is a valid sg block, the contents
+ * will be copied from the sg buffer into the private buffer.  For all blocks,
+ * the private buffer will be current.  If discard is set, the block will
+ * also be invalidated.
+ */
+static void
+reset_one_cache(io_channel channel, struct arc_private_data *priv,
+    struct arc_cache *cache, int discard)
+{
+	if (CACHE_IS_SG(cache) && discard == 0 && cache->in_use != 0)
+		memcpy(cache->alloc_buf, cache->buf, channel->block_size);
+
+	if (discard != 0)
+		cache->in_use = 0;
+
+	cache->buf = cache->alloc_buf;
+}
+
+/*
+ * Resets all sg cache blocks.  If a block is in the first
+ * alloc_count entries in sg_cblocks (meaning it has been allocated for
+ * immediate reuse) then also discards the contents.
+ */
+static void
+reset_sg_cache(io_channel channel, struct arc_private_data *priv,
+    int alloc_count)
+{
+	struct arc_cache	*cache;
+	int			i, j, discard;
+
+	for (i = 0, cache = priv->cache; i < CACHE_SIZE; i++, cache++) {
+		if (CACHE_IS_SG(cache)) {
+			discard = 0;
+			for (j = 0; j < alloc_count; j++) {
+				if (sg_cblocks[j] == cache) {
+					discard = 1;
+					break;
+				}
+			}
+			reset_one_cache(channel, priv, cache, discard);
+		}
+	}
+}
+
+/*
+ * Read count blocks starting at block directly from channel into buf, which
+ * must be of size >= channel->block_size * count.  No attempt is made to
+ * use or update any caches; however, if the last ARC read left the file
+ * pointer at the requested block, we avoid seeking.
+ */
+static errcode_t
+raw_read_blk(io_channel channel, struct arc_private_data *priv,
+    unsigned long block, int count, char *buf)
+{
+	errcode_t	status;
+	size_t		length = 0;
+
+	if (priv->seek_pos_valid == 0 || priv->seek_pos != block) {
+		status = arc_seek(channel, block);
+		priv->seek_pos = block + count;
+	} else {
+		status = 0;
+	}
+	/* If something fails, priv->seek_pos is bogus. */
+	priv->seek_pos_valid = 0;
+	if (status == 0) {
+		length = (count < 0) ? -count : count * channel->block_size;
+		ULONG nread = 0;
+
+		status = ArcRead(priv->fileID, buf, length, &nread);
+		if ((nread > 0) && (nread < length)) {
+			status = EXT2_ET_SHORT_READ;
+			memset(((char *) buf) + nread, 0, length - nread);
+		}
+		if (status != 0 && channel->read_error != NULL) {
+			status = (channel->read_error)
+			    (channel, block, count, buf, length, nread, status);
+		}
+	} else {
+		status = EXT2_ET_BAD_BLOCK_NUM;
+	}
+
+	if (status == 0) {
+		priv->seek_pos_valid = 1;
+		if (arc_do_progress != 0)
+			arc_progress(priv, (unsigned long) length);
+	}
+
+	return (status);
+}
+
+/*
+ * For the file associated with channel and priv, find block in the cache.
+ * In the case of a miss, return NULL.  The last access "time" will be
+ * updated to refresh the LRU.  Note that this is much different from the
+ * unix_io.c version of the same function; because our allocation step is
+ * far more complex to cover readahead, it is dealt with in alloc_sg_blocks.
+ */
+static struct arc_cache *
+find_cached_block(io_channel channel, struct arc_private_data *priv,
+    unsigned long block)
+{
+	struct arc_cache	*cache;
+	int			i;
+
+	++virtual_time;
+
+	for (i = 0, cache = priv->cache; i < CACHE_SIZE; i++, cache++)
+		if (cache->block == block) {
+			cache->last_use = virtual_time;
+			++hits;
+			return (cache);
+		}
+
+	++misses;
+	return (NULL);
+}
+
+/*
+ * Allocate a set of cache blocks whose buffers are contiguous.  The cache
+ * blocks are found in sg_cblocks.  The number of allocated blocks is the
+ * return value; a return value of 0 indicates an error.  The cache blocks
+ * are not filled here; use fill_sg_blocks for that.
+ */
+static int
+alloc_sg_blocks(io_channel channel, struct arc_private_data *priv,
+    unsigned long block, int count)
+{
+	struct arc_cache	*cache, *oldest_cache;
+	int			i, unused_count, age_mark;
+
+	if (count > CACHE_SG_MAX)
+		count = CACHE_SG_MAX;
+
+	++virtual_time;
+	oldest_cache = NULL;
+	unused_count = 0;
+
+	/* First use unused blocks, if any are available. */
+	for (i = 0, cache = priv->cache; i < CACHE_SIZE && unused_count < count;
+	    i++, cache++) {
+		if (cache->in_use == 0) {
+			sg_cblocks[unused_count++] = cache;
+			continue;
+		}
+		if (!oldest_cache || cache->last_use < oldest_cache->last_use)
+			oldest_cache = cache;
+	}
+
+	/* If we don't have enough blocks yet, evict the LRUs. */
+	if (unused_count < count) {
+		for (age_mark = oldest_cache->last_use;
+		    unused_count < count && age_mark <= virtual_time;
+		    age_mark++) {
+			for (i = 0, cache = priv->cache;
+			    i < CACHE_SIZE && unused_count < count;
+			    i++, cache++) {
+				if (cache->in_use == 0)
+					continue;
+				if (cache->last_use == age_mark)
+					sg_cblocks[unused_count++] = cache;
+			}
+		}
+	}
+
+	/*
+	 * At this point it's impossible not to have count blocks.  However,
+	 * even if we somehow don't, it's not fatal - perhaps someone
+	 * decided to use some future lru timestamp to lock an entry or
+	 * something.  In this case, we just continue on, and make sure the
+	 * caller knows we didn't allocate as much as was requested.
+	 */
+
+	/*
+	 * Now we set up the cache blocks.  Their buffers need to be
+	 * set to the sg buffer and they must be marked invalid (we will
+	 * mark them valid once fill_sg_blocks fills them).
+	 */
+	reset_sg_cache(channel, priv, count);
+
+	for (i = 0; i < count; i++) {
+		cache = sg_cblocks[i];
+		cache->in_use = 0;
+		cache->block = block + i;
+		cache->buf = priv->arc_sg_buf + i * channel->block_size;
+	}
+
+	return (count);
+}
+
+/*
+ * Fill the first count cache blocks in sg_cblocks with contiguous data from
+ * the file.  The block numbers are already stored in the cache metadata
+ * by a mandatory previous call to alloc_sg_blocks.  This can fail if there
+ * is an i/o error.
+ */
+static errcode_t
+fill_sg_blocks(io_channel channel, struct arc_private_data *priv, int count)
+{
+	errcode_t	status;
+	int		i;
+
+	status = raw_read_blk(channel, priv, sg_cblocks[0]->block, count,
+	    priv->arc_sg_buf);
+
+	/*
+	 * XXX Handle short read here: it may be that we've reached EOF and
+	 * can mark some of the blocks valid.
+	 */
+	if (status == 0) {
+		for (i = 0; i < count; i++) {
+			sg_cblocks[i]->in_use = 1;
+			sg_cblocks[i]->last_use = virtual_time;
+		}
+	}
+
+	return (status);
+}
+
+/*
+ * Mark the entire contents of the cache invalid, and reset any sg blocks
+ * to private buffers.
+ */
+static void
+cache_invalidate(io_channel channel, struct arc_private_data *priv)
+{
+	struct arc_cache	*cache;
+	int			i;
+
+	for (i = 0, cache = priv->cache; i < CACHE_SIZE; i++, cache++)
+		reset_one_cache(channel, priv, cache, 1);
+}
 
 static errcode_t
 arc_open(const char *name, int flags, io_channel * pchannel)
@@ -84,6 +477,8 @@
 			    ext2fs_get_mem(sizeof(struct arc_private_data),
 					   (void **) &priv);
 			if (status == 0) {
+				memset(priv, 0,
+				    sizeof(struct arc_private_data));
 				channel->private_data = priv;
 				priv->magic = EXT2_ET_BAD_MAGIC;
 				priv->mode =
@@ -99,6 +494,9 @@
 		}
 	}
 
+	if (status == 0)
+		status = alloc_cache(channel, priv);
+
 	if (status == 0) {
 		*pchannel = channel;
 	} else if (channel != NULL) {
@@ -123,12 +521,14 @@
 
 	if (--channel->refcount == 0) {
 		status = ArcClose(priv->fileID);
+		free_cache(channel, priv);
 		if (channel->name != NULL)
 			ext2fs_free_mem((void **) &channel->name);
 		if (channel->private_data != NULL)
 			ext2fs_free_mem((void **) &channel->private_data);
 		ext2fs_free_mem((void **) &channel);
 	}
+
 	return status;
 }
 
@@ -136,18 +536,23 @@
 static errcode_t arc_set_blksize(io_channel channel, int blksize)
 {
 	struct arc_private_data *priv;
+	errcode_t		status;
 
 	EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
 	priv = (struct arc_private_data *) channel->private_data;
 	EXT2_CHECK_MAGIC(priv, EXT2_ET_BAD_MAGIC);
 
-	if (channel->block_size != blksize)
+	if (channel->block_size != blksize) {
 		channel->block_size = blksize;
+		free_cache(channel, priv);
+		if ((status = alloc_cache(channel, priv)) != 0)
+			return (status);
+	}
 	return 0;
 }
 
-
-void mul64(unsigned long block, int blocksize, LARGEINTEGER * result)
+static void
+mul64(unsigned long block, int blocksize, LARGEINTEGER *result)
 {
 	ULONG m1l = block & 0x0FFFF, m1h = (block >> 16) & 0x0FFFF;
 	ULONG m2l = blocksize & 0x0FFFF, m2h = (blocksize >> 16) & 0x0FFFF;
@@ -162,8 +567,8 @@
 	result->HighPart += (i1 >> 16) & 0x0FFFF;
 }
 
-
-static errcode_t arc_seek(io_channel channel, unsigned long block)
+static errcode_t
+arc_seek(io_channel channel, unsigned long block)
 {
 	struct arc_private_data *priv;
 	LARGEINTEGER position;
@@ -173,42 +578,103 @@
 	return ArcSeek(priv->fileID, &position, SeekAbsolute);
 }
 
-
+/*
+ * Perform a cacheable read.  First, the cache will be checked for an
+ * existing copy of the blocks.  If present, they are copied into buf.
+ * Otherwise, we set up and execute a readahead, then copy the results into
+ * buf.  The unix_io way is a little nicer; since it doesn't have readahead
+ * it knows that buf is always big enough in multicount scenarios and thus
+ * dispenses with the extra memcpy.  There is an opportunity to improve this.
+ */
 static errcode_t
 arc_read_blk(io_channel channel, unsigned long block, int count, void *buf)
 {
 	struct arc_private_data *priv;
-	errcode_t status;
+	errcode_t		status = 0;
+	struct arc_cache	*cache;
+	char			*cbuf = (char *) buf;
+	int			cb_alloc;
 
 	EXT2_CHECK_MAGIC(channel, EXT2_ET_MAGIC_IO_CHANNEL);
 	priv = (struct arc_private_data *) channel->private_data;
 	EXT2_CHECK_MAGIC(priv, EXT2_ET_BAD_MAGIC);
 
-	status = arc_seek(channel, block);
-	if (status == 0) {
-		size_t length =
-		    (count < 0) ? -count : count * channel->block_size;
-		ULONG nread = 0;
+#ifdef DEBUG
+	printf("req %lu id %lu count %u\n\r", block, priv->fileID, count);
+#endif
+
+	/* Odd-sized reads can't be cached. */
+	if (count < 0)
+		status = raw_read_blk(channel, priv, block, count, cbuf);
+
+	while (count > 0) {
+		if ((cache = find_cached_block(channel, priv, block)) == NULL)
+			break;
+#ifdef DEBUG
+		printf("Cache hit on block %lu\n\r", block);
+#endif
+		memcpy(cbuf, cache->buf, channel->block_size);
+		count--;
+		block++;
+		cbuf += channel->block_size;
+	}
 
-		status = ArcRead(priv->fileID, buf, length, &nread);
-		if ((nread > 0) && (nread < length)) {
-			status = EXT2_ET_SHORT_READ;
-			memset(((char *) buf) + nread, 0, length - nread);
+	/*
+	 * Cache miss.  Although it could be that there's just a hole
+	 * in the cache, it's far more likely and easier to handle
+	 * that we've reached the end of a readahead blockset.  Thus
+	 * we just stop looking in the cache for the rest until after
+	 * we do a readahead.  We could try to put in some
+	 * heuristics here to avoid trashing the cache unnecessarily
+	 * for reads we expect are not part of a sequential set.
+	 */
+	while (count > 0) {
+#ifdef DEBUG
+		printf("Cache miss on block %lu (readahead %u)\n\r",
+		    block, CACHE_SG_MAX);
+#endif
+		if ((cb_alloc = alloc_sg_blocks(channel, priv, block,
+		    CACHE_SG_MAX)) == 0) {
+#ifdef DEBUG
+			printf("%s\n\r", "Cache error: can't alloc any blocks");
+#endif
+			/* Cache is broken, so do the raw read. */
+			cache_invalidate(channel, priv);
+			status = raw_read_blk(channel, priv, block, count,
+			    cbuf);
+			break;
 		}
-		if ((status != ESUCCESS) && (channel->read_error != NULL)) {
-			status = (channel->read_error)
-			    (channel, block, count, buf, length, nread, status);
+
+		if ((status = fill_sg_blocks(channel, priv, cb_alloc)) != 0) {
+#ifdef DEBUG
+			printf("Cache error (status %lu at block %lu(%u)\n\r",
+			    (unsigned long) status, block, count);
+#endif
+			/* Cache is broken, so do the raw read. */
+			cache_invalidate(channel, priv);
+			status = raw_read_blk(channel, priv, block, count,
+			    cbuf);
+			break;
 		}
-	} else {
-		status = EXT2_ET_BAD_BLOCK_NUM;
+
+		if (cb_alloc >= count) {
+			memcpy(cbuf, priv->arc_sg_buf,
+			    count * channel->block_size);
+			return (0);
+		}
+
+		memcpy(cbuf, priv->arc_sg_buf, cb_alloc * channel->block_size);
+		count -= cb_alloc;
+		block += cb_alloc;
+		cbuf += cb_alloc * channel->block_size;
 	}
-	return status;
-}
 
+	return (status);
+}
 
 static errcode_t
-    arc_write_blk
-    (io_channel channel, unsigned long block, int count, const void *buf) {
+arc_write_blk (io_channel channel, unsigned long block, int count,
+    const void *buf) {
 	struct arc_private_data *priv;
 	errcode_t status;
 
@@ -218,6 +684,8 @@
 
 	status = arc_seek(channel, block);
 #ifdef ARC_IO_ALLOW_WRITE
+	cache_invalidate(channel, priv);
+	priv->seek_pos_valid = 0;
 	if (status == 0) {
 		size_t length =
 		    (count < 0) ? -count : count * channel->block_size;
@@ -287,7 +755,7 @@
 
 void print_ext2fs_error(long error)
 {
-	char* msg;
+	const char* msg;
 
 	msg = ext2fs_strerror(error);
 	if(msg)
diff -urN arcboot-0.3.8.4/ext2load/loader.c arcboot-working/ext2load/loader.c
--- arcboot-0.3.8.4/ext2load/loader.c	2004-09-25 13:53:52.000000000 -0700
+++ arcboot-working/ext2load/loader.c	2004-11-22 21:05:43.000000000 -0800
@@ -18,6 +18,7 @@
 #include <ext2fs.h>
 
 #include <asm/addrspace.h>
+#include <asm/mipsregs.h>
 #include "arcboot.h"
 
 #include <subarch.h>
@@ -242,10 +243,13 @@
 				    Fatal("Cannot seek to program segment\n\r");
 			    }
 
+			    arc_do_progress = 1;
 			    status = ext2fs_file_read(file,
 						  (void *) (KSEG0ADDR(
 						            segment->p_vaddr)),
 						  segment->p_filesz, NULL);
+			    printf("\n\n\r");	/* Clear progress */
+			    arc_do_progress = 0;
 			    if (status != 0) {
 				print_ext2fs_error(status);
 				Fatal("Cannot read program segment\n\r");
@@ -289,10 +293,12 @@
 				    Fatal("Cannot seek to program segment\n\r");
 			    }
 
+			    arc_do_progress = 1;
 			    status = ext2fs_file_read(file,
 						  (void *) (KSEG0ADDR(
 						            segment->p_vaddr)),
 						  segment->p_filesz, NULL);
+			    arc_do_progress = 0;
 			    if (status != 0) {
 				print_ext2fs_error(status);
 				Fatal("Cannot read program segment\n\r");


-- 
Keith M Wesolowski
"Site launched. Many things not yet working." --Hector Urtubia