> -----Original Message----- > From: Jane Chu <jane.chu@xxxxxxxxxx> > Subject: Re: [PATCH RESEND v6 8/9] md: Implement dax_holder_operations > > On 7/30/2021 3:01 AM, Shiyang Ruan wrote: > > This is the case where the holder represents a mapped device, or a > > list of mapped devices more exactly(because it is possible to create > > more than one mapped device on one pmem device). > > Could you share how do you test this scenario? Do you mean "more than one mapped device on one pmem device"? 1. Create 2 partitions on a pmem device(fsdax mode). 2. Create LVM(one LV) on each partition. 3. Create xfs filesystem on each LVM. 4. Memory failure on this pmem. In this case, there are 2 LVMs on one pmem device. So we should register this 2 LVMs in dax_holder, and iterate them when notifying the failure. -- Thanks, Ruan. > > thanks, > -jane > > > > > Find out which mapped device the offset belongs to, and translate the > > offset from target device to mapped device. When it is done, call > > dax_corrupted_range() for the holder of this mapped device. > > > > Signed-off-by: Shiyang Ruan <ruansy.fnst@xxxxxxxxxxx> > > --- > > drivers/md/dm.c | 126 > +++++++++++++++++++++++++++++++++++++++++++++++- > > 1 file changed, 125 insertions(+), 1 deletion(-) > > > > diff --git a/drivers/md/dm.c b/drivers/md/dm.c index > > 2c5f9e585211..a35b9a97a73f 100644 > > --- a/drivers/md/dm.c > > +++ b/drivers/md/dm.c > > @@ -626,7 +626,11 @@ static void dm_put_live_table_fast(struct > mapped_device *md) __releases(RCU) > > } > > > > static char *_dm_claim_ptr = "I belong to device-mapper"; > > - > > +static const struct dax_holder_operations dm_dax_holder_ops; struct > > +dm_holder { > > + struct list_head list; > > + struct mapped_device *md; > > +}; > > /* > > * Open a table device so we can use it as a map destination. > > */ > > @@ -634,6 +638,8 @@ static int open_table_device(struct table_device *td, > dev_t dev, > > struct mapped_device *md) > > { > > struct block_device *bdev; > > + struct list_head *holders; > > + struct dm_holder *holder; > > > > int r; > > > > @@ -651,6 +657,19 @@ static int open_table_device(struct table_device > > *td, dev_t dev, > > > > td->dm_dev.bdev = bdev; > > td->dm_dev.dax_dev = dax_get_by_host(bdev->bd_disk->disk_name); > > + if (!td->dm_dev.dax_dev) > > + return 0; > > + > > + holders = dax_get_holder(td->dm_dev.dax_dev); > > + if (!holders) { > > + holders = kmalloc(sizeof(*holders), GFP_KERNEL); > > + INIT_LIST_HEAD(holders); > > + dax_set_holder(td->dm_dev.dax_dev, holders, &dm_dax_holder_ops); > > + } > > + holder = kmalloc(sizeof(*holder), GFP_KERNEL); > > + holder->md = md; > > + list_add_tail(&holder->list, holders); > > + > > return 0; > > } > > > > @@ -659,9 +678,27 @@ static int open_table_device(struct table_device *td, > dev_t dev, > > */ > > static void close_table_device(struct table_device *td, struct > mapped_device *md) > > { > > + struct list_head *holders; > > + struct dm_holder *holder, *n; > > + > > if (!td->dm_dev.bdev) > > return; > > > > + holders = dax_get_holder(td->dm_dev.dax_dev); > > + if (holders) { > > + list_for_each_entry_safe(holder, n, holders, list) { > > + if (holder->md == md) { > > + list_del(&holder->list); > > + kfree(holder); > > + } > > + } > > + if (list_empty(holders)) { > > + kfree(holders); > > + /* unset dax_device's holder_data */ > > + dax_set_holder(td->dm_dev.dax_dev, NULL, NULL); > > + } > > + } > > + > > bd_unlink_disk_holder(td->dm_dev.bdev, dm_disk(md)); > > blkdev_put(td->dm_dev.bdev, td->dm_dev.mode | FMODE_EXCL); > > put_dax(td->dm_dev.dax_dev); > > @@ -1115,6 +1152,89 @@ static int dm_dax_zero_page_range(struct > dax_device *dax_dev, pgoff_t pgoff, > > return ret; > > } > > > > +#if IS_ENABLED(CONFIG_DAX_DRIVER) > > +struct corrupted_hit_info { > > + struct dax_device *dax_dev; > > + sector_t offset; > > +}; > > + > > +static int dm_blk_corrupted_hit(struct dm_target *ti, struct dm_dev *dev, > > + sector_t start, sector_t count, void *data) { > > + struct corrupted_hit_info *bc = data; > > + > > + return bc->dax_dev == (void *)dev->dax_dev && > > + (start <= bc->offset && bc->offset < start + count); } > > + > > +struct corrupted_do_info { > > + size_t length; > > + void *data; > > +}; > > + > > +static int dm_blk_corrupted_do(struct dm_target *ti, struct block_device > *bdev, > > + sector_t sector, void *data) { > > + struct mapped_device *md = ti->table->md; > > + struct corrupted_do_info *bc = data; > > + > > + return dax_holder_notify_failure(md->dax_dev, to_bytes(sector), > > + bc->length, bc->data); > > +} > > + > > +static int dm_dax_notify_failure_one(struct mapped_device *md, > > + struct dax_device *dax_dev, > > + loff_t offset, size_t length, void *data) { > > + struct dm_table *map; > > + struct dm_target *ti; > > + sector_t sect = to_sector(offset); > > + struct corrupted_hit_info hi = {dax_dev, sect}; > > + struct corrupted_do_info di = {length, data}; > > + int srcu_idx, i, rc = -ENODEV; > > + > > + map = dm_get_live_table(md, &srcu_idx); > > + if (!map) > > + return rc; > > + > > + /* > > + * find the target device, and then translate the offset of this target > > + * to the whole mapped device. > > + */ > > + for (i = 0; i < dm_table_get_num_targets(map); i++) { > > + ti = dm_table_get_target(map, i); > > + if (!(ti->type->iterate_devices && ti->type->rmap)) > > + continue; > > + if (!ti->type->iterate_devices(ti, dm_blk_corrupted_hit, &hi)) > > + continue; > > + > > + rc = ti->type->rmap(ti, sect, dm_blk_corrupted_do, &di); > > + break; > > + } > > + > > + dm_put_live_table(md, srcu_idx); > > + return rc; > > +} > > + > > +static int dm_dax_notify_failure(struct dax_device *dax_dev, > > + loff_t offset, size_t length, void *data) { > > + struct dm_holder *holder; > > + struct list_head *holders = dax_get_holder(dax_dev); > > + int rc = -ENODEV; > > + > > + list_for_each_entry(holder, holders, list) { > > + rc = dm_dax_notify_failure_one(holder->md, dax_dev, offset, > > + length, data); > > + if (rc != -ENODEV) > > + break; > > + } > > + return rc; > > +} > > +#else > > +#define dm_dax_notify_failure NULL > > +#endif > > + > > /* > > * A target may call dm_accept_partial_bio only from the map routine. > It is > > * allowed for all bio types except REQ_PREFLUSH, REQ_OP_ZONE_* zone > > management @@ -3057,6 +3177,10 @@ static const struct dax_operations > dm_dax_ops = { > > .zero_page_range = dm_dax_zero_page_range, > > }; > > > > +static const struct dax_holder_operations dm_dax_holder_ops = { > > + .notify_failure = dm_dax_notify_failure, }; > > + > > /* > > * module hooks > > */ > >