3.2.73-rc1 review patch. If anyone has any objections, please let me know. ------------------ From: NeilBrown <neilb@xxxxxxxx> commit 199dc6ed5179251fa6158a461499c24bdd99c836 upstream. When a (e.g.) RAID5 array is reshaped to RAID0, the updating of queue parameters (e.g. max number of sectors per bio) is done in the wrong place. It should be part of ->run, but it is actually part of ->takeover. This means it happens before level_store() calls: blk_set_stacking_limits(&mddev->queue->limits); and so it ineffective. This can lead to errors from underlying devices. So move all the relevant settings out of create_stripe_zones() and into raid0_run(). As this can lead to a bug-on it is suitable for any -stable kernel which supports reshape to RAID0. So 2.6.35 or later. As the bug has been present for five years there is no urgency, so no need to rush into -stable. Fixes: 9af204cf720c ("md: Add support for Raid5->Raid0 and Raid10->Raid0 takeover") Reported-by: Yi Zhang <yizhan@xxxxxxxxxx> Signed-off-by: NeilBrown <neilb@xxxxxxxx> [bwh: Backported to 3.2: - md has no discard or write-same support - md is not used by dm-raid so mddev->queue is never null - Open-code rdev_for_each() - Adjust context] Signed-off-by: Ben Hutchings <ben@xxxxxxxxxxxxxxx> --- --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -88,6 +88,7 @@ static int create_strip_zones(struct mdd char b[BDEVNAME_SIZE]; char b2[BDEVNAME_SIZE]; struct r0conf *conf = kzalloc(sizeof(*conf), GFP_KERNEL); + unsigned short blksize = 512; if (!conf) return -ENOMEM; @@ -102,6 +103,9 @@ static int create_strip_zones(struct mdd sector_div(sectors, mddev->chunk_sectors); rdev1->sectors = sectors * mddev->chunk_sectors; + blksize = max(blksize, queue_logical_block_size( + rdev1->bdev->bd_disk->queue)); + list_for_each_entry(rdev2, &mddev->disks, same_set) { pr_debug("md/raid0:%s: comparing %s(%llu)" " with %s(%llu)\n", @@ -138,6 +142,18 @@ static int create_strip_zones(struct mdd } pr_debug("md/raid0:%s: FINAL %d zones\n", mdname(mddev), conf->nr_strip_zones); + /* + * now since we have the hard sector sizes, we can make sure + * chunk size is a multiple of that sector size + */ + if ((mddev->chunk_sectors << 9) % blksize) { + printk(KERN_ERR "md/raid0:%s: chunk_size of %d not multiple of block size %d\n", + mdname(mddev), + mddev->chunk_sectors << 9, blksize); + err = -EINVAL; + goto abort; + } + err = -ENOMEM; conf->strip_zone = kzalloc(sizeof(struct strip_zone)* conf->nr_strip_zones, GFP_KERNEL); @@ -186,8 +202,6 @@ static int create_strip_zones(struct mdd } dev[j] = rdev1; - disk_stack_limits(mddev->gendisk, rdev1->bdev, - rdev1->data_offset << 9); /* as we don't honour merge_bvec_fn, we must never risk * violating it, so limit ->max_segments to 1, lying within * a single page. @@ -263,21 +277,6 @@ static int create_strip_zones(struct mdd mddev->queue->backing_dev_info.congested_fn = raid0_congested; mddev->queue->backing_dev_info.congested_data = mddev; - /* - * now since we have the hard sector sizes, we can make sure - * chunk size is a multiple of that sector size - */ - if ((mddev->chunk_sectors << 9) % queue_logical_block_size(mddev->queue)) { - printk(KERN_ERR "md/raid0:%s: chunk_size of %d not valid\n", - mdname(mddev), - mddev->chunk_sectors << 9); - goto abort; - } - - blk_queue_io_min(mddev->queue, mddev->chunk_sectors << 9); - blk_queue_io_opt(mddev->queue, - (mddev->chunk_sectors << 9) * mddev->raid_disks); - pr_debug("md/raid0:%s: done.\n", mdname(mddev)); *private_conf = conf; @@ -340,6 +339,7 @@ static int raid0_run(struct mddev *mddev { struct r0conf *conf; int ret; + struct md_rdev *rdev; if (mddev->chunk_sectors == 0) { printk(KERN_ERR "md/raid0:%s: chunk size must be set.\n", @@ -348,7 +348,6 @@ static int raid0_run(struct mddev *mddev } if (md_check_no_bitmap(mddev)) return -EINVAL; - blk_queue_max_hw_sectors(mddev->queue, mddev->chunk_sectors); /* if private is not null, we are here after takeover */ if (mddev->private == NULL) { @@ -359,6 +358,16 @@ static int raid0_run(struct mddev *mddev } conf = mddev->private; + list_for_each_entry(rdev, &mddev->disks, same_set) { + disk_stack_limits(mddev->gendisk, rdev->bdev, + rdev->data_offset << 9); + } + blk_queue_max_hw_sectors(mddev->queue, mddev->chunk_sectors); + + blk_queue_io_min(mddev->queue, mddev->chunk_sectors << 9); + blk_queue_io_opt(mddev->queue, + (mddev->chunk_sectors << 9) * mddev->raid_disks); + /* calculate array device size */ md_set_array_sectors(mddev, raid0_size(mddev, 0, 0)); -- To unsubscribe from this list: send the line "unsubscribe stable" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html