Re: Raid-5, LVM, Reiser FS

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Ross Vandegrift wrote:
> 
> > > Are there any HOWTOs that anybody knows about on how to do a RAID-5 with
> > > LVM and a Reiser-FS filesystem? Are there are reasons not to do this? If
> > > I can dynamically resize (hopefully using the raidreconf with no
> > > problems) the array does LVM get me anything?
> 
> There's currently an issue with 2.4 kernels that causes read starvation while
> heavy writes are going on.  As a consequence, if your array is rebuilding while
> you have a journal replay going on, your boot will stall at replaying the
> journal for a very long time.  

Yeah also bitmap blocks reading, especially if you formated a large volume...  
This is a fix against 2.4.17. (still not in kernel). Let us know, if you have any 
problem (or if you want a patch against another kernel).
Edward. 
 
diff -u -r linux/fs/reiserfs/journal.c linux-patched/fs/reiserfs/journal.c
--- linux/fs/reiserfs/journal.c Fri Dec 21 17:42:03 2001
+++ linux-patched/fs/reiserfs/journal.c Fri Feb  1 14:27:51 2002
@@ -71,7 +71,9 @@
 static DECLARE_WAIT_QUEUE_HEAD(reiserfs_commit_thread_done) ;
 DECLARE_TASK_QUEUE(reiserfs_commit_thread_tq) ;
 
-#define JOURNAL_TRANS_HALF 1018   /* must be correct to keep the desc and commit structs at 4k */
+#define JOURNAL_TRANS_HALF 1018   /* must be correct to keep the desc and commit
+                                    structs at 4k */
+#define BUFNR 64 /*read ahead */
 
 /* cnode stat bits.  Move these into reiserfs_fs.h */
 
@@ -1593,6 +1595,41 @@
 **
 ** On exit, it sets things up so the first transaction will work correctly.
 */
+struct buffer_head * reiserfs_breada (kdev_t dev, int block, int bufsize,
+                           unsigned int max_block)
+{
+       struct buffer_head * bhlist[BUFNR];
+       unsigned int blocks = BUFNR;
+       struct buffer_head * bh;
+       int i, j;
+       
+       bh = getblk (dev, block, bufsize);
+       if (buffer_uptodate (bh))
+               return (bh);   
+               
+       if (block + BUFNR > max_block) {
+               blocks = max_block - block;
+       }
+       bhlist[0] = bh;
+       j = 1;
+       for (i = 1; i < blocks; i++) {
+               bh = getblk (dev, block + i, bufsize);
+               if (buffer_uptodate (bh)) {
+                       brelse (bh);
+                       break;
+               }
+               else bhlist[j++] = bh;
+       }
+       ll_rw_block (READ, j, bhlist);
+       for(i = 1; i < j; i++) 
+               brelse (bhlist[i]);
+       bh = bhlist[0];
+       wait_on_buffer (bh);
+       if (buffer_uptodate (bh))
+               return bh;
+       brelse (bh);
+       return NULL;
+}
 static int journal_read(struct super_block *p_s_sb) {
   struct reiserfs_journal_desc *desc ;
   unsigned long last_flush_trans_id = 0 ;
@@ -1663,7 +1700,8 @@
   ** all the valid transactions, and pick out the oldest.
   */
   while(continue_replay && cur_dblock < (reiserfs_get_journal_block(p_s_sb) + JOURNAL_BLOCK_COUNT))
{
-    d_bh = bread(p_s_sb->s_dev, cur_dblock, p_s_sb->s_blocksize) ;
+         d_bh = reiserfs_breada(p_s_sb->s_dev, cur_dblock, p_s_sb->s_blocksize,
+                                reiserfs_get_journal_block(p_s_sb) + JOURNAL_BLOCK_COUNT) ;
     ret = journal_transaction_is_valid(p_s_sb, d_bh, &oldest_invalid_trans_id, &newest_mount_id) ;
     if (ret == 1) {
       desc = (struct reiserfs_journal_desc *)d_bh->b_data ;
diff -u -r linux/fs/reiserfs/super.c linux-patched/fs/reiserfs/super.c
--- linux/fs/reiserfs/super.c   Fri Dec 21 17:42:03 2001
+++ linux-patched/fs/reiserfs/super.c   Fri Feb  1 14:42:26 2002
@@ -558,28 +558,30 @@
 
 static int read_bitmaps (struct super_block * s)
 {
-    int i, bmp, dl ;
-    struct reiserfs_super_block * rs = SB_DISK_SUPER_BLOCK(s);
+    int i, bmp;
 
-    SB_AP_BITMAP (s) = reiserfs_kmalloc (sizeof (struct buffer_head *) * sb_bmap_nr(rs), GFP_NOFS,
s);
+    SB_AP_BITMAP (s) = reiserfs_kmalloc (sizeof (struct buffer_head *) * SB_BMAP_NR(s), GFP_NOFS,
s);
     if (SB_AP_BITMAP (s) == 0)
-       return 1;
-    memset (SB_AP_BITMAP (s), 0, sizeof (struct buffer_head *) * sb_bmap_nr(rs));
-
-    /* reiserfs leaves the first 64k unused so that any partition
-       labeling scheme currently used will have enough space. Then we
-       need one block for the super.  -Hans */
-    bmp = (REISERFS_DISK_OFFSET_IN_BYTES / s->s_blocksize) + 1;        /* first of bitmap blocks */
-    SB_AP_BITMAP (s)[0] = reiserfs_bread (s, bmp, s->s_blocksize);
-    if(!SB_AP_BITMAP(s)[0])
-       return 1;
-    for (i = 1, bmp = dl = s->s_blocksize * 8; i < sb_bmap_nr(rs); i ++) {
-       SB_AP_BITMAP (s)[i] = reiserfs_bread (s, bmp, s->s_blocksize);
-       if (!SB_AP_BITMAP (s)[i])
-           return 1;
-       bmp += dl;
+      return 1;
+    for (i = 0, bmp = REISERFS_DISK_OFFSET_IN_BYTES / s->s_blocksize + 1;
+        i < SB_BMAP_NR(s); i++, bmp = s->s_blocksize * 8 * i) {
+      SB_AP_BITMAP (s)[i] = getblk (s->s_dev, bmp, s->s_blocksize);
+      if (!buffer_uptodate(SB_AP_BITMAP(s)[i]))
+       ll_rw_block(READ, 1, SB_AP_BITMAP(s) + i); 
     }
-
+    for (i = 0; i < SB_BMAP_NR(s); i++) {
+      wait_on_buffer(SB_AP_BITMAP (s)[i]);
+      if (!buffer_uptodate(SB_AP_BITMAP(s)[i])) {
+       reiserfs_warning("sh-2029: reiserfs read_bitmaps: "
+                        "bitmap block (#%lu) reading failed\n",
+                        SB_AP_BITMAP(s)[i]->b_blocknr);
+       for (i = 0; i < SB_BMAP_NR(s); i++)
+         brelse(SB_AP_BITMAP(s)[i]);
+       reiserfs_kfree(SB_AP_BITMAP(s), sizeof(struct buffer_head *) * SB_BMAP_NR(s), s);
+       SB_AP_BITMAP(s) = NULL;
+       return 1;
+      }
+    }   
     return 0;
 }
 
> This isn't just a problem with RAID/ReiserFS,
> it's more generalized than that.  Also, I do believe there is a path somewhere
> that fixes the issue.
> 
> Ross Vandegrift
> ross@willow.seitz.com
> -
> To unsubscribe from this list: send the line "unsubscribe linux-raid" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
-
To unsubscribe from this list: send the line "unsubscribe linux-raid" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Index of Archives]     [Linux RAID Wiki]     [ATA RAID]     [Linux SCSI Target Infrastructure]     [Linux Block]     [Linux IDE]     [Linux SCSI]     [Linux Hams]     [Device Mapper]     [Device Mapper Cryptographics]     [Kernel]     [Linux Admin]     [Linux Net]     [GFS]     [RPM]     [git]     [Yosemite Forum]


  Powered by Linux