[patch 1/8] raid5: add a per-stripe lock

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Add a per-stripe lock to protect stripe specific data, like dev->read,
written, ... The purpose is to reduce lock contention of conf->device_lock.

Signed-off-by: Shaohua Li <shli@xxxxxxxxxxxx>
---
 drivers/md/raid5.c |   17 +++++++++++++++++
 drivers/md/raid5.h |    1 +
 2 files changed, 18 insertions(+)

Index: linux/drivers/md/raid5.c
===================================================================
--- linux.orig/drivers/md/raid5.c	2012-06-01 13:38:54.705210229 +0800
+++ linux/drivers/md/raid5.c	2012-06-01 13:43:05.594056130 +0800
@@ -749,6 +749,7 @@ static void ops_complete_biofill(void *s
 
 	/* clear completed biofills */
 	spin_lock_irq(&conf->device_lock);
+	spin_lock_irq(&sh->stripe_lock);
 	for (i = sh->disks; i--; ) {
 		struct r5dev *dev = &sh->dev[i];
 
@@ -774,6 +775,7 @@ static void ops_complete_biofill(void *s
 			}
 		}
 	}
+	spin_unlock_irq(&sh->stripe_lock);
 	spin_unlock_irq(&conf->device_lock);
 	clear_bit(STRIPE_BIOFILL_RUN, &sh->state);
 
@@ -798,8 +800,10 @@ static void ops_run_biofill(struct strip
 		if (test_bit(R5_Wantfill, &dev->flags)) {
 			struct bio *rbi;
 			spin_lock_irq(&conf->device_lock);
+			spin_lock_irq(&sh->stripe_lock);
 			dev->read = rbi = dev->toread;
 			dev->toread = NULL;
+			spin_unlock_irq(&sh->stripe_lock);
 			spin_unlock_irq(&conf->device_lock);
 			while (rbi && rbi->bi_sector <
 				dev->sector + STRIPE_SECTORS) {
@@ -1137,10 +1141,12 @@ ops_run_biodrain(struct stripe_head *sh,
 			struct bio *wbi;
 
 			spin_lock_irq(&sh->raid_conf->device_lock);
+			spin_lock_irq(&sh->stripe_lock);
 			chosen = dev->towrite;
 			dev->towrite = NULL;
 			BUG_ON(dev->written);
 			wbi = dev->written = chosen;
+			spin_unlock_irq(&sh->stripe_lock);
 			spin_unlock_irq(&sh->raid_conf->device_lock);
 
 			while (wbi && wbi->bi_sector <
@@ -1446,6 +1452,8 @@ static int grow_one_stripe(struct r5conf
 	init_waitqueue_head(&sh->ops.wait_for_ops);
 	#endif
 
+	spin_lock_init(&sh->stripe_lock);
+
 	if (grow_buffers(sh)) {
 		shrink_buffers(sh);
 		kmem_cache_free(conf->slab_cache, sh);
@@ -2327,6 +2335,7 @@ static int add_stripe_bio(struct stripe_
 
 
 	spin_lock_irq(&conf->device_lock);
+	spin_lock_irq(&sh->stripe_lock);
 	if (forwrite) {
 		bip = &sh->dev[dd_idx].towrite;
 		if (*bip == NULL && sh->dev[dd_idx].written == NULL)
@@ -2360,6 +2369,7 @@ static int add_stripe_bio(struct stripe_
 		if (sector >= sh->dev[dd_idx].sector + STRIPE_SECTORS)
 			set_bit(R5_OVERWRITE, &sh->dev[dd_idx].flags);
 	}
+	spin_unlock_irq(&sh->stripe_lock);
 	spin_unlock_irq(&conf->device_lock);
 
 	pr_debug("added bi b#%llu to stripe s#%llu, disk %d.\n",
@@ -2376,6 +2386,7 @@ static int add_stripe_bio(struct stripe_
 
  overlap:
 	set_bit(R5_Overlap, &sh->dev[dd_idx].flags);
+	spin_unlock_irq(&sh->stripe_lock);
 	spin_unlock_irq(&conf->device_lock);
 	return 0;
 }
@@ -2427,6 +2438,7 @@ handle_failed_stripe(struct r5conf *conf
 			}
 		}
 		spin_lock_irq(&conf->device_lock);
+		spin_lock_irq(&sh->stripe_lock);
 		/* fail all writes first */
 		bi = sh->dev[i].towrite;
 		sh->dev[i].towrite = NULL;
@@ -2488,6 +2500,7 @@ handle_failed_stripe(struct r5conf *conf
 				bi = nextbi;
 			}
 		}
+		spin_unlock_irq(&sh->stripe_lock);
 		spin_unlock_irq(&conf->device_lock);
 		if (bitmap_end)
 			bitmap_endwrite(conf->mddev->bitmap, sh->sector,
@@ -2695,6 +2708,7 @@ static void handle_stripe_clean_event(st
 				int bitmap_end = 0;
 				pr_debug("Return write for disc %d\n", i);
 				spin_lock_irq(&conf->device_lock);
+				spin_lock_irq(&sh->stripe_lock);
 				wbi = dev->written;
 				dev->written = NULL;
 				while (wbi && wbi->bi_sector <
@@ -2709,6 +2723,7 @@ static void handle_stripe_clean_event(st
 				}
 				if (dev->towrite == NULL)
 					bitmap_end = 1;
+				spin_unlock_irq(&sh->stripe_lock);
 				spin_unlock_irq(&conf->device_lock);
 				if (bitmap_end)
 					bitmap_endwrite(conf->mddev->bitmap,
@@ -3168,6 +3183,7 @@ static void analyse_stripe(struct stripe
 	/* Now to look around and see what can be done */
 	rcu_read_lock();
 	spin_lock_irq(&conf->device_lock);
+	spin_lock_irq(&sh->stripe_lock);
 	for (i=disks; i--; ) {
 		struct md_rdev *rdev;
 		sector_t first_bad;
@@ -3313,6 +3329,7 @@ static void analyse_stripe(struct stripe
 				do_recovery = 1;
 		}
 	}
+	spin_unlock_irq(&sh->stripe_lock);
 	spin_unlock_irq(&conf->device_lock);
 	if (test_bit(STRIPE_SYNCING, &sh->state)) {
 		/* If there is a failed device being replaced,
Index: linux/drivers/md/raid5.h
===================================================================
--- linux.orig/drivers/md/raid5.h	2012-06-01 13:38:54.717210079 +0800
+++ linux/drivers/md/raid5.h	2012-06-01 13:44:19.229127709 +0800
@@ -210,6 +210,7 @@ struct stripe_head {
 	int			disks;		/* disks in stripe */
 	enum check_states	check_state;
 	enum reconstruct_states reconstruct_state;
+	spinlock_t		stripe_lock;
 	/**
 	 * struct stripe_operations
 	 * @target - STRIPE_OP_COMPUTE_BLK target

--
To unsubscribe from this list: send the line "unsubscribe linux-raid" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [Linux RAID Wiki]     [ATA RAID]     [Linux SCSI Target Infrastructure]     [Linux Block]     [Linux IDE]     [Linux SCSI]     [Linux Hams]     [Device Mapper]     [Device Mapper Cryptographics]     [Kernel]     [Linux Admin]     [Linux Net]     [GFS]     [RPM]     [git]     [Yosemite Forum]


  Powered by Linux