Ross Vandegrift wrote: > > > > Are there any HOWTOs that anybody knows about on how to do a RAID-5 with > > > LVM and a Reiser-FS filesystem? Are there are reasons not to do this? If > > > I can dynamically resize (hopefully using the raidreconf with no > > > problems) the array does LVM get me anything? > > There's currently an issue with 2.4 kernels that causes read starvation while > heavy writes are going on. As a consequence, if your array is rebuilding while > you have a journal replay going on, your boot will stall at replaying the > journal for a very long time. Yeah also bitmap blocks reading, especially if you formated a large volume... This is a fix against 2.4.17. (still not in kernel). Let us know, if you have any problem (or if you want a patch against another kernel). Edward. diff -u -r linux/fs/reiserfs/journal.c linux-patched/fs/reiserfs/journal.c --- linux/fs/reiserfs/journal.c Fri Dec 21 17:42:03 2001 +++ linux-patched/fs/reiserfs/journal.c Fri Feb 1 14:27:51 2002 @@ -71,7 +71,9 @@ static DECLARE_WAIT_QUEUE_HEAD(reiserfs_commit_thread_done) ; DECLARE_TASK_QUEUE(reiserfs_commit_thread_tq) ; -#define JOURNAL_TRANS_HALF 1018 /* must be correct to keep the desc and commit structs at 4k */ +#define JOURNAL_TRANS_HALF 1018 /* must be correct to keep the desc and commit + structs at 4k */ +#define BUFNR 64 /*read ahead */ /* cnode stat bits. Move these into reiserfs_fs.h */ @@ -1593,6 +1595,41 @@ ** ** On exit, it sets things up so the first transaction will work correctly. */ +struct buffer_head * reiserfs_breada (kdev_t dev, int block, int bufsize, + unsigned int max_block) +{ + struct buffer_head * bhlist[BUFNR]; + unsigned int blocks = BUFNR; + struct buffer_head * bh; + int i, j; + + bh = getblk (dev, block, bufsize); + if (buffer_uptodate (bh)) + return (bh); + + if (block + BUFNR > max_block) { + blocks = max_block - block; + } + bhlist[0] = bh; + j = 1; + for (i = 1; i < blocks; i++) { + bh = getblk (dev, block + i, bufsize); + if (buffer_uptodate (bh)) { + brelse (bh); + break; + } + else bhlist[j++] = bh; + } + ll_rw_block (READ, j, bhlist); + for(i = 1; i < j; i++) + brelse (bhlist[i]); + bh = bhlist[0]; + wait_on_buffer (bh); + if (buffer_uptodate (bh)) + return bh; + brelse (bh); + return NULL; +} static int journal_read(struct super_block *p_s_sb) { struct reiserfs_journal_desc *desc ; unsigned long last_flush_trans_id = 0 ; @@ -1663,7 +1700,8 @@ ** all the valid transactions, and pick out the oldest. */ while(continue_replay && cur_dblock < (reiserfs_get_journal_block(p_s_sb) + JOURNAL_BLOCK_COUNT)) { - d_bh = bread(p_s_sb->s_dev, cur_dblock, p_s_sb->s_blocksize) ; + d_bh = reiserfs_breada(p_s_sb->s_dev, cur_dblock, p_s_sb->s_blocksize, + reiserfs_get_journal_block(p_s_sb) + JOURNAL_BLOCK_COUNT) ; ret = journal_transaction_is_valid(p_s_sb, d_bh, &oldest_invalid_trans_id, &newest_mount_id) ; if (ret == 1) { desc = (struct reiserfs_journal_desc *)d_bh->b_data ; diff -u -r linux/fs/reiserfs/super.c linux-patched/fs/reiserfs/super.c --- linux/fs/reiserfs/super.c Fri Dec 21 17:42:03 2001 +++ linux-patched/fs/reiserfs/super.c Fri Feb 1 14:42:26 2002 @@ -558,28 +558,30 @@ static int read_bitmaps (struct super_block * s) { - int i, bmp, dl ; - struct reiserfs_super_block * rs = SB_DISK_SUPER_BLOCK(s); + int i, bmp; - SB_AP_BITMAP (s) = reiserfs_kmalloc (sizeof (struct buffer_head *) * sb_bmap_nr(rs), GFP_NOFS, s); + SB_AP_BITMAP (s) = reiserfs_kmalloc (sizeof (struct buffer_head *) * SB_BMAP_NR(s), GFP_NOFS, s); if (SB_AP_BITMAP (s) == 0) - return 1; - memset (SB_AP_BITMAP (s), 0, sizeof (struct buffer_head *) * sb_bmap_nr(rs)); - - /* reiserfs leaves the first 64k unused so that any partition - labeling scheme currently used will have enough space. Then we - need one block for the super. -Hans */ - bmp = (REISERFS_DISK_OFFSET_IN_BYTES / s->s_blocksize) + 1; /* first of bitmap blocks */ - SB_AP_BITMAP (s)[0] = reiserfs_bread (s, bmp, s->s_blocksize); - if(!SB_AP_BITMAP(s)[0]) - return 1; - for (i = 1, bmp = dl = s->s_blocksize * 8; i < sb_bmap_nr(rs); i ++) { - SB_AP_BITMAP (s)[i] = reiserfs_bread (s, bmp, s->s_blocksize); - if (!SB_AP_BITMAP (s)[i]) - return 1; - bmp += dl; + return 1; + for (i = 0, bmp = REISERFS_DISK_OFFSET_IN_BYTES / s->s_blocksize + 1; + i < SB_BMAP_NR(s); i++, bmp = s->s_blocksize * 8 * i) { + SB_AP_BITMAP (s)[i] = getblk (s->s_dev, bmp, s->s_blocksize); + if (!buffer_uptodate(SB_AP_BITMAP(s)[i])) + ll_rw_block(READ, 1, SB_AP_BITMAP(s) + i); } - + for (i = 0; i < SB_BMAP_NR(s); i++) { + wait_on_buffer(SB_AP_BITMAP (s)[i]); + if (!buffer_uptodate(SB_AP_BITMAP(s)[i])) { + reiserfs_warning("sh-2029: reiserfs read_bitmaps: " + "bitmap block (#%lu) reading failed\n", + SB_AP_BITMAP(s)[i]->b_blocknr); + for (i = 0; i < SB_BMAP_NR(s); i++) + brelse(SB_AP_BITMAP(s)[i]); + reiserfs_kfree(SB_AP_BITMAP(s), sizeof(struct buffer_head *) * SB_BMAP_NR(s), s); + SB_AP_BITMAP(s) = NULL; + return 1; + } + } return 0; } > This isn't just a problem with RAID/ReiserFS, > it's more generalized than that. Also, I do believe there is a path somewhere > that fixes the issue. > > Ross Vandegrift > ross@willow.seitz.com > - > To unsubscribe from this list: send the line "unsubscribe linux-raid" in > the body of a message to majordomo@vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html - To unsubscribe from this list: send the line "unsubscribe linux-raid" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html