Hi Junxiao, While this is reported by md, is it possible to reproduce the error on purpose with other device (e.g., loop) and add a test to blktests? Dongli Zhang On 8/6/19 4:01 AM, Junxiao Bi wrote: > When md raid1 was used with imsm metadata, during the boot stage, > the raid device will first be set to readonly, then mdmon will set > it read-write later. When there were some partitions in this device, > the following race would make some partition left ro and fail to mount. > > CPU 1: CPU 2: > add_partition() set_disk_ro() //set disk RW > //disk was RO, so partition set to RO > p->policy = get_disk_ro(disk); > if (disk->part0.policy != flag) { > set_disk_ro_uevent(disk, flag); > // disk set to RW > disk->part0.policy = flag; > } > // set all exit partition to RW > while ((part = disk_part_iter_next(&piter))) > part->policy = flag; > // this part was not yet added, so it was still RO > rcu_assign_pointer(ptbl->part[partno], p); > > Move RO status setting of partitions after they were added into partition > table and introduce a mutex to sync RO status between disk and partitions. > > Signed-off-by: Junxiao Bi <junxiao.bi@xxxxxxxxxx> > --- > block/genhd.c | 3 +++ > block/partition-generic.c | 5 ++++- > include/linux/genhd.h | 1 + > 3 files changed, 8 insertions(+), 1 deletion(-) > > diff --git a/block/genhd.c b/block/genhd.c > index 54f1f0d381f4..f3cce1d354cf 100644 > --- a/block/genhd.c > +++ b/block/genhd.c > @@ -1479,6 +1479,7 @@ struct gendisk *__alloc_disk_node(int minors, int node_id) > } > ptbl = rcu_dereference_protected(disk->part_tbl, 1); > rcu_assign_pointer(ptbl->part[0], &disk->part0); > + mutex_init(&disk->part_lock); > > /* > * set_capacity() and get_capacity() currently don't use > @@ -1570,6 +1571,7 @@ void set_disk_ro(struct gendisk *disk, int flag) > struct disk_part_iter piter; > struct hd_struct *part; > > + mutex_lock(&disk->part_lock); > if (disk->part0.policy != flag) { > set_disk_ro_uevent(disk, flag); > disk->part0.policy = flag; > @@ -1579,6 +1581,7 @@ void set_disk_ro(struct gendisk *disk, int flag) > while ((part = disk_part_iter_next(&piter))) > part->policy = flag; > disk_part_iter_exit(&piter); > + mutex_unlock(&disk->part_lock); > } > > EXPORT_SYMBOL(set_disk_ro); > diff --git a/block/partition-generic.c b/block/partition-generic.c > index aee643ce13d1..63cb6fb996ff 100644 > --- a/block/partition-generic.c > +++ b/block/partition-generic.c > @@ -345,7 +345,6 @@ struct hd_struct *add_partition(struct gendisk *disk, int partno, > queue_limit_discard_alignment(&disk->queue->limits, start); > p->nr_sects = len; > p->partno = partno; > - p->policy = get_disk_ro(disk); > > if (info) { > struct partition_meta_info *pinfo = alloc_part_info(disk); > @@ -401,6 +400,10 @@ struct hd_struct *add_partition(struct gendisk *disk, int partno, > /* everything is up and running, commence */ > rcu_assign_pointer(ptbl->part[partno], p); > > + mutex_lock(&disk->part_lock); > + p->policy = get_disk_ro(disk); > + mutex_unlock(&disk->part_lock); > + > /* suppress uevent if the disk suppresses it */ > if (!dev_get_uevent_suppress(ddev)) > kobject_uevent(&pdev->kobj, KOBJ_ADD); > diff --git a/include/linux/genhd.h b/include/linux/genhd.h > index 8b5330dd5ac0..df6ddca8a92c 100644 > --- a/include/linux/genhd.h > +++ b/include/linux/genhd.h > @@ -201,6 +201,7 @@ struct gendisk { > */ > struct disk_part_tbl __rcu *part_tbl; > struct hd_struct part0; > + struct mutex part_lock; > > const struct block_device_operations *fops; > struct request_queue *queue; >