RE: [md PATCH 05/22] md/raid5: don't complete make_request on barrieruntil writes are scheduled

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



I tried these patches and I still don’t see barriers working. Do I have to enable to device mapper to see the effect ?

 

 

-------- log -----

~ # 

~ # mount -o barrier=1 /dev/md0 /mnt/tmpmnt/

kjournald starting.  Commit interval 5 seconds

EXT3 FS on md0, internal journal

EXT3-fs: mounted filesystem with writeback data mode.

~ # touch /mnt/tmpmnt/test

~ # sync

JBD: barrier-based sync failed on md0 - disabling barriers

~ #

 

 

From: linux-raid-owner@xxxxxxxxxxxxxxx [mailto:linux-raid-owner@xxxxxxxxxxxxxxx] On Behalf Of NeilBrown
Sent: Thursday, December 03, 2009 10:48 PM
To: linux-raid@xxxxxxxxxxxxxxx
Subject: [md PATCH 05/22] md/raid5: don't complete make_request on barrieruntil writes are scheduled

 

This message has been archived. View the original item <http://sdcmailvault.ad.amcc.com/EnterpriseVault/ViewMessage.asp?VaultId=1E9560FDB597EB744B7F046F24F9462D91110000sdcmailvault.ad.amcc.com&SavesetId=821000000000000~200912040648020000~0~73C17918B4764F9EBF4C8875EC1437B> 

The post-barrier-flush is sent be md as soon as make_request on the
barrier write completes.  For raid5, the data might not be in the
per-device queues yet.  So for barrier requests, wait for any
pre-reading to be done so that the request will be in the per-device
queues.

We use the 'preread_active' count to check that nothing is still in
the preread phase, and delay the decrement of this count until after
write requests have been submitted to the underlying devices.

Signed-off-by: NeilBrown <neilb@xxxxxxx>
---
 drivers/md/raid5.c |   51 +++++++++++++++++++++++++++++++++++++++------------
 1 files changed, 39 insertions(+), 12 deletions(-)

diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index ecf89c8..8c772b2 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -2947,6 +2947,7 @@ static void handle_stripe5(struct stripe_head *sh)
     struct r5dev *dev;
     mdk_rdev_t *blocked_rdev = NULL;
     int prexor;
+    int dec_preread_active = 0;
 
     memset(&s, 0, sizeof(s));
     pr_debug("handling stripe %llu, state=%#lx cnt=%d, pd_idx=%d check:%d "
@@ -3096,12 +3097,8 @@ static void handle_stripe5(struct stripe_head *sh)
                     set_bit(STRIPE_INSYNC, &sh->state);
             }
         }
-        if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
-            atomic_dec(&conf->preread_active_stripes);
-            if (atomic_read(&conf->preread_active_stripes) <
-                IO_THRESHOLD)
-                md_wakeup_thread(conf->mddev->thread);
-        }
+        if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state))
+            dec_preread_active = 1;
     }
 
     /* Now to consider new write requests and what else, if anything
@@ -3208,6 +3205,16 @@ static void handle_stripe5(struct stripe_head *sh)
 
     ops_run_io(sh, &s);
 
+    if (dec_preread_active) {
+        /* We delay this until after ops_run_io so that if make_request
+         * is waiting on a barrier, it won't continue until the writes
+         * have actually been submitted.
+         */
+        atomic_dec(&conf->preread_active_stripes);
+        if (atomic_read(&conf->preread_active_stripes) <
+            IO_THRESHOLD)
+            md_wakeup_thread(conf->mddev->thread);
+    }
     return_io(return_bi);
 }
 
@@ -3221,6 +3228,7 @@ static void handle_stripe6(struct stripe_head *sh)
     struct r6_state r6s;
     struct r5dev *dev, *pdev, *qdev;
     mdk_rdev_t *blocked_rdev = NULL;
+    int dec_preread_active = 0;
 
     pr_debug("handling stripe %llu, state=%#lx cnt=%d, "
         "pd_idx=%d, qd_idx=%d, check:%d, reconstruct:%d",
@@ -3380,12 +3388,8 @@ static void handle_stripe6(struct stripe_head *sh)
                     set_bit(STRIPE_INSYNC, &sh->state);
             }
         }
-        if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
-            atomic_dec(&conf->preread_active_stripes);
-            if (atomic_read(&conf->preread_active_stripes) <
-                IO_THRESHOLD)
-                md_wakeup_thread(conf->mddev->thread);
-        }
+        if (test_and_clear_bit(STRIPE_PREREAD_ACTIVE, &sh->state))
+            dec_preread_active = 1;
     }
 
     /* Now to consider new write requests and what else, if anything
@@ -3494,6 +3498,18 @@ static void handle_stripe6(struct stripe_head *sh)
 
     ops_run_io(sh, &s);
 
+
+    if (dec_preread_active) {
+        /* We delay this until after ops_run_io so that if make_request
+         * is waiting on a barrier, it won't continue until the writes
+         * have actually been submitted.
+         */
+        atomic_dec(&conf->preread_active_stripes);
+        if (atomic_read(&conf->preread_active_stripes) <
+            IO_THRESHOLD)
+            md_wakeup_thread(conf->mddev->thread);
+    }
+
     return_io(return_bi);
 }
 
@@ -3996,6 +4012,9 @@ static int make_request(struct request_queue *q, struct bio * bi)
             finish_wait(&conf->wait_for_overlap, &w);
             set_bit(STRIPE_HANDLE, &sh->state);
             clear_bit(STRIPE_DELAYED, &sh->state);
+            if (mddev->barrier &&
+                !test_and_set_bit(STRIPE_PREREAD_ACTIVE, &sh->state))
+                atomic_inc(&conf->preread_active_stripes);
             release_stripe(sh);
         } else {
             /* cannot get stripe for read-ahead, just give-up */
@@ -4015,6 +4034,14 @@ static int make_request(struct request_queue *q, struct bio * bi)
 
         bio_endio(bi, 0);
     }
+
+    if (mddev->barrier) {
+        /* We need to wait for the stripes to all be handled.
+         * So: wait for preread_active_stripes to drop to 0.
+         */
+        wait_event(mddev->thread->wqueue,
+               atomic_read(&conf->preread_active_stripes) == 0);
+    }
     return 0;
 }
 


--
To unsubscribe from this list: send the line "unsubscribe linux-raid" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

<<winmail.dat>>


[Index of Archives]     [Linux RAID Wiki]     [ATA RAID]     [Linux SCSI Target Infrastructure]     [Linux Block]     [Linux IDE]     [Linux SCSI]     [Linux Hams]     [Device Mapper]     [Device Mapper Cryptographics]     [Kernel]     [Linux Admin]     [Linux Net]     [GFS]     [RPM]     [git]     [Yosemite Forum]


  Powered by Linux