Re: [PATCH] md linear: Protecting mddev with rcu locks to avoid races in

SandeepKsinha <sandeepksinha@xxxxxxxxx> · Wed, 17 Jun 2009 15:47:57 +0530

Hi Neil,

I am attaching the patch file as I am seeing some issues with
copy-paste into the mail.

On Wed, Jun 17, 2009 at 3:45 PM, SandeepKsinha<sandeepksinha@xxxxxxxxx> wrote:
> Hi Neil,
> My Mistake. I missed "stg refresh". Here is the updated one.
> Sorry for the inconvinience.
>
> Signed-off-by: Sandeep K Sinha <sandeepksinha@xxxxxxxxx>
>     Due to the lack of memory ordering guarantees, we may have races around
> mddev->conf. This patch addresses the same using rcu protection to avoid
> such race conditions.
> diff --git a/drivers/md/linear.c b/drivers/md/linear.c
> index 9ad6ec4..f254a8a 100644
> --- a/drivers/md/linear.c
> +++ b/drivers/md/linear.c
> @@ -28,10 +28,11 @@
>  static inline dev_info_t *which_dev(mddev_t *mddev, sector_t sector)
>  {
>   int lo, mid, hi;
> - linear_conf_t *conf = mddev_to_conf(mddev);
> -
> + linear_conf_t *conf;
> +
>   lo = 0;
>   hi = mddev->raid_disks - 1;
> + conf = rcu_dereference(mddev->private);
>
>   /*
>   * Binary Search
> @@ -45,7 +46,6 @@ static inline dev_info_t *which_dev(mddev_t *mddev,
> sector_t sector)
>   else
>   lo = mid + 1;
>   }
> -
>   return conf->disks + lo;
>  }
>
> @@ -66,7 +66,9 @@ static int linear_mergeable_bvec(struct request_queue *q,
>   unsigned long maxsectors, bio_sectors = bvm->bi_size >> 9;
>   sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev);
>
> + rcu_read_lock();
>   dev0 = which_dev(mddev, sector);
> + rcu_read_unlock();
>   maxsectors = dev0->end_sector - sector;
>
>   if (maxsectors < bio_sectors)
> @@ -86,36 +88,49 @@ static int linear_mergeable_bvec(struct request_queue
> *q,
>  static void linear_unplug(struct request_queue *q)
>  {
>   mddev_t *mddev = q->queuedata;
> - linear_conf_t *conf = mddev_to_conf(mddev);
> + linear_conf_t *conf;
>   int i;
>
> + rcu_read_lock();
> + conf = rcu_dereference(mddev->private);
> +
>   for (i=0; i < mddev->raid_disks; i++) {
>   struct request_queue *r_queue = bdev_get_queue(conf->disks[i].rdev->bdev);
>   blk_unplug(r_queue);
>   }
> + rcu_read_unlock();
>  }
>
>  static int linear_congested(void *data, int bits)
>  {
>   mddev_t *mddev = data;
> - linear_conf_t *conf = mddev_to_conf(mddev);
> + linear_conf_t *conf;
>   int i, ret = 0;
>
> + rcu_read_lock();
> + conf = rcu_dereference(mddev->private);
> +
>   for (i = 0; i < mddev->raid_disks && !ret ; i++) {
>   struct request_queue *q = bdev_get_queue(conf->disks[i].rdev->bdev);
>   ret |= bdi_congested(&q->backing_dev_info, bits);
>   }
> +
> + rcu_read_unlock();
>   return ret;
>  }
>
>  static sector_t linear_size(mddev_t *mddev, sector_t sectors, int
> raid_disks)
>  {
> - linear_conf_t *conf = mddev_to_conf(mddev);
> -
> + linear_conf_t *conf;
> + sector_t array_sectors;
> + rcu_read_lock();
> + conf = rcu_dereference(mddev->private);
>   WARN_ONCE(sectors || raid_disks,
>    "%s does not support generic reshape\n", __func__);
> -
> - return conf->array_sectors;
> + array_sectors = conf->array_sectors;
> + rcu_read_unlock();
> +
> + return array_sectors;
>  }
>
>  static linear_conf_t *linear_conf(mddev_t *mddev, int raid_disks)
> @@ -215,15 +230,14 @@ static int linear_add(mddev_t *mddev, mdk_rdev_t
> *rdev)
>   return -EINVAL;
>
>   rdev->raid_disk = rdev->saved_raid_disk;
> -
> - newconf = linear_conf(mddev,mddev->raid_disks+1);
> + newconf = linear_conf(mddev,mddev->raid_disks + 1);
>
>   if (!newconf)
>   return -ENOMEM;
>
>   newconf->prev = mddev_to_conf(mddev);
> - mddev->private = newconf;
>   mddev->raid_disks++;
> + rcu_assign_pointer(mddev->private,newconf);
>   md_set_array_sectors(mddev, linear_size(mddev, 0, 0));
>   set_capacity(mddev->gendisk, mddev->array_sectors);
>   return 0;
> @@ -231,8 +245,15 @@ static int linear_add(mddev_t *mddev, mdk_rdev_t *rdev)
>
>  static int linear_stop (mddev_t *mddev)
>  {
> - linear_conf_t *conf = mddev_to_conf(mddev);
> -
> + linear_conf_t *conf;
> +
> + /*
> + * We do not require rcu protection here since
> + * we hold reconfig_mutex for both linear_add and
> + * linear_stop, so they cannot race.
> + */
> +
> + conf = rcu_dereference(mddev->private);
>   blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/
>   do {
>   linear_conf_t *t = conf->prev;
> @@ -262,7 +283,9 @@ static int linear_make_request (struct request_queue *q,
> struct bio *bio)
>        bio_sectors(bio));
>   part_stat_unlock();
>
> + rcu_read_lock();
>   tmp_dev = which_dev(mddev, bio->bi_sector);
> + rcu_read_unlock();
>      start_sector = tmp_dev->end_sector - tmp_dev->rdev->sectors;
>
>   if (unlikely(bio->bi_sector >= (tmp_dev->end_sector)
> --
> Regards,
> Sandeep.
>
>
>
>
>
>
> “To learn is to change. Education is a process that changes the learner.”
>



-- 
Regards,
Sandeep.





 	
“To learn is to change. Education is a process that changes the learner.”
commit 848cea3974a6c63a8a810b8e74c87a42ea727399
Author: Sandeep K Sinha <sandeepksinha@xxxxxxxxx>
Date:   Wed Jun 17 15:33:37 2009 +0530

    Due to the lack of memory ordering guarantees, we may have races around
mddev->conf. This patch addresses the same using rcu protection to avoid
such race conditions.

diff --git a/drivers/md/linear.c b/drivers/md/linear.c
index 9ad6ec4..f254a8a 100644
--- a/drivers/md/linear.c
+++ b/drivers/md/linear.c
@@ -28,10 +28,11 @@
 static inline dev_info_t *which_dev(mddev_t *mddev, sector_t sector)
 {
 	int lo, mid, hi;
-	linear_conf_t *conf = mddev_to_conf(mddev);
-	
+	linear_conf_t *conf;	
+
 	lo = 0;
 	hi = mddev->raid_disks - 1;
+	conf = rcu_dereference(mddev->private);
 
 	/*
 	 * Binary Search
@@ -45,7 +46,6 @@ static inline dev_info_t *which_dev(mddev_t *mddev, sector_t sector)
 		else
 			lo = mid + 1;
 	}
-
 	return conf->disks + lo;
 }
 
@@ -66,7 +66,9 @@ static int linear_mergeable_bvec(struct request_queue *q,
 	unsigned long maxsectors, bio_sectors = bvm->bi_size >> 9;
 	sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev);
 
+	rcu_read_lock();
 	dev0 = which_dev(mddev, sector);
+	rcu_read_unlock();
 	maxsectors = dev0->end_sector - sector;
 
 	if (maxsectors < bio_sectors)
@@ -86,36 +88,49 @@ static int linear_mergeable_bvec(struct request_queue *q,
 static void linear_unplug(struct request_queue *q)
 {
 	mddev_t *mddev = q->queuedata;
-	linear_conf_t *conf = mddev_to_conf(mddev);
+	linear_conf_t *conf;
 	int i;
 
+	rcu_read_lock();
+	conf = rcu_dereference(mddev->private);
+	
 	for (i=0; i < mddev->raid_disks; i++) {
 		struct request_queue *r_queue = bdev_get_queue(conf->disks[i].rdev->bdev);
 		blk_unplug(r_queue);
 	}
+	rcu_read_unlock();
 }
 
 static int linear_congested(void *data, int bits)
 {
 	mddev_t *mddev = data;
-	linear_conf_t *conf = mddev_to_conf(mddev);
+	linear_conf_t *conf;
 	int i, ret = 0;
 
+	rcu_read_lock();
+	conf = rcu_dereference(mddev->private);
+	
 	for (i = 0; i < mddev->raid_disks && !ret ; i++) {
 		struct request_queue *q = bdev_get_queue(conf->disks[i].rdev->bdev);
 		ret |= bdi_congested(&q->backing_dev_info, bits);
 	}
+
+	rcu_read_unlock();
 	return ret;
 }
 
 static sector_t linear_size(mddev_t *mddev, sector_t sectors, int raid_disks)
 {
-	linear_conf_t *conf = mddev_to_conf(mddev);
-
+	linear_conf_t *conf;
+	sector_t array_sectors;
+	rcu_read_lock();
+	conf = rcu_dereference(mddev->private);
 	WARN_ONCE(sectors || raid_disks,
 		  "%s does not support generic reshape\n", __func__);
-
-	return conf->array_sectors;
+	array_sectors = conf->array_sectors;
+	rcu_read_unlock();
+	
+	return array_sectors;
 }
 
 static linear_conf_t *linear_conf(mddev_t *mddev, int raid_disks)
@@ -215,15 +230,14 @@ static int linear_add(mddev_t *mddev, mdk_rdev_t *rdev)
 		return -EINVAL;
 
 	rdev->raid_disk = rdev->saved_raid_disk;
-
-	newconf = linear_conf(mddev,mddev->raid_disks+1);
+	newconf = linear_conf(mddev,mddev->raid_disks + 1);
 
 	if (!newconf)
 		return -ENOMEM;
 
 	newconf->prev = mddev_to_conf(mddev);
-	mddev->private = newconf;
 	mddev->raid_disks++;
+	rcu_assign_pointer(mddev->private,newconf);
 	md_set_array_sectors(mddev, linear_size(mddev, 0, 0));
 	set_capacity(mddev->gendisk, mddev->array_sectors);
 	return 0;
@@ -231,8 +245,15 @@ static int linear_add(mddev_t *mddev, mdk_rdev_t *rdev)
 
 static int linear_stop (mddev_t *mddev)
 {
-	linear_conf_t *conf = mddev_to_conf(mddev);
-  
+	linear_conf_t *conf;
+
+	/*
+	 * We do not require rcu protection here since
+	 * we hold reconfig_mutex for both linear_add and
+	 * linear_stop, so they cannot race.
+	 */
+
+	conf = rcu_dereference(mddev->private);
 	blk_sync_queue(mddev->queue); /* the unplug fn references 'conf'*/
 	do {
 		linear_conf_t *t = conf->prev;
@@ -262,7 +283,9 @@ static int linear_make_request (struct request_queue *q, struct bio *bio)
 		      bio_sectors(bio));
 	part_stat_unlock();
 
+	rcu_read_lock();
 	tmp_dev = which_dev(mddev, bio->bi_sector);
+	rcu_read_unlock();
     start_sector = tmp_dev->end_sector - tmp_dev->rdev->sectors;
 
 	if (unlikely(bio->bi_sector >= (tmp_dev->end_sector)