dax_recovery_write() dax op is only required for DAX device that export DAXDEV_RECOVERY indicating its capability to recover from poisons. DM may be nested, if part of the base dax devices forming a DM device support dax recovery, the DM device is marked with such capability. Signed-off-by: Jane Chu <jane.chu@xxxxxxxxxx> --- drivers/dax/super.c | 17 +++++++++++++++++ drivers/md/dm-linear.c | 12 ++++++++++++ drivers/md/dm-log-writes.c | 12 ++++++++++++ drivers/md/dm-stripe.c | 13 +++++++++++++ drivers/md/dm.c | 27 +++++++++++++++++++++++++++ drivers/nvdimm/pmem.c | 7 +++++++ include/linux/dax.h | 18 ++++++++++++++++++ include/linux/device-mapper.h | 9 +++++++++ 8 files changed, 115 insertions(+) diff --git a/drivers/dax/super.c b/drivers/dax/super.c index f4f607d9698b..a136fa6b3e36 100644 --- a/drivers/dax/super.c +++ b/drivers/dax/super.c @@ -195,6 +195,23 @@ int dax_zero_page_range(struct dax_device *dax_dev, pgoff_t pgoff, } EXPORT_SYMBOL_GPL(dax_zero_page_range); +size_t dax_recovery_write(struct dax_device *dax_dev, pgoff_t pgoff, + void *addr, size_t bytes, struct iov_iter *i) +{ + if (!dax_recovery_capable(dax_dev) || !dax_dev->ops->recovery_write) + return (size_t)-EOPNOTSUPP; + return dax_dev->ops->recovery_write(dax_dev, pgoff, addr, bytes, i); +} +EXPORT_SYMBOL_GPL(dax_recovery_write); + +bool dax_recovery_started(struct dax_device *dax_dev, void **kaddr) +{ + if (!kaddr || !dax_recovery_capable(dax_dev)) + return false; + return test_bit(DAXDEV_RECOVERY, (unsigned long *)kaddr); +} +EXPORT_SYMBOL_GPL(dax_recovery_started); + #ifdef CONFIG_ARCH_HAS_PMEM_API void arch_wb_cache_pmem(void *addr, size_t size); void dax_flush(struct dax_device *dax_dev, void *addr, size_t size) diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c index 1b97a11d7151..831c565bf024 100644 --- a/drivers/md/dm-linear.c +++ b/drivers/md/dm-linear.c @@ -188,9 +188,20 @@ static int linear_dax_zero_page_range(struct dm_target *ti, pgoff_t pgoff, return dax_zero_page_range(dax_dev, pgoff, nr_pages); } +static size_t linear_dax_recovery_write(struct dm_target *ti, pgoff_t pgoff, + void *addr, size_t bytes, struct iov_iter *i) +{ + struct dax_device *dax_dev = linear_dax_pgoff(ti, &pgoff); + + if (!dax_recovery_capable(dax_dev)) + return (size_t) -EOPNOTSUPP; + return dax_recovery_write(dax_dev, pgoff, addr, bytes, i); +}; + #else #define linear_dax_direct_access NULL #define linear_dax_zero_page_range NULL +#define linear_dax_recovery_write NULL #endif static struct target_type linear_target = { @@ -208,6 +219,7 @@ static struct target_type linear_target = { .iterate_devices = linear_iterate_devices, .direct_access = linear_dax_direct_access, .dax_zero_page_range = linear_dax_zero_page_range, + .dax_recovery_write = linear_dax_recovery_write, }; int __init dm_linear_init(void) diff --git a/drivers/md/dm-log-writes.c b/drivers/md/dm-log-writes.c index 139b09b06eda..22c2d64ef963 100644 --- a/drivers/md/dm-log-writes.c +++ b/drivers/md/dm-log-writes.c @@ -927,9 +927,20 @@ static int log_writes_dax_zero_page_range(struct dm_target *ti, pgoff_t pgoff, return dax_zero_page_range(dax_dev, pgoff, nr_pages << PAGE_SHIFT); } +static size_t log_writes_dax_recovery_write(struct dm_target *ti, + pgoff_t pgoff, void *addr, size_t bytes, struct iov_iter *i) +{ + struct dax_device *dax_dev = log_writes_dax_pgoff(ti, &pgoff); + + if (!dax_recovery_capable(dax_dev)) + return (size_t) -EOPNOTSUPP; + return dax_recovery_write(dax_dev, pgoff, addr, bytes, i); +} + #else #define log_writes_dax_direct_access NULL #define log_writes_dax_zero_page_range NULL +#define log_writes_dax_recovery_write NULL #endif static struct target_type log_writes_target = { @@ -947,6 +958,7 @@ static struct target_type log_writes_target = { .io_hints = log_writes_io_hints, .direct_access = log_writes_dax_direct_access, .dax_zero_page_range = log_writes_dax_zero_page_range, + .dax_recovery_write = log_writes_dax_recovery_write, }; static int __init dm_log_writes_init(void) diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c index e566115ec0bb..78c52c8865ef 100644 --- a/drivers/md/dm-stripe.c +++ b/drivers/md/dm-stripe.c @@ -332,9 +332,21 @@ static int stripe_dax_zero_page_range(struct dm_target *ti, pgoff_t pgoff, return dax_zero_page_range(dax_dev, pgoff, nr_pages); } +static size_t stripe_dax_recovery_write(struct dm_target *ti, pgoff_t pgoff, + void *addr, size_t bytes, struct iov_iter *i) +{ + struct dax_device *dax_dev = stripe_dax_pgoff(ti, &pgoff); + + if (!dax_recovery_capable(dax_dev)) + return (size_t) -EOPNOTSUPP; + + return dax_recovery_write(dax_dev, pgoff, addr, bytes, i); +} + #else #define stripe_dax_direct_access NULL #define stripe_dax_zero_page_range NULL +#define stripe_dax_recovery_write NULL #endif /* @@ -471,6 +483,7 @@ static struct target_type stripe_target = { .io_hints = stripe_io_hints, .direct_access = stripe_dax_direct_access, .dax_zero_page_range = stripe_dax_zero_page_range, + .dax_recovery_write = stripe_dax_recovery_write, }; int __init dm_stripe_init(void) diff --git a/drivers/md/dm.c b/drivers/md/dm.c index c0ae8087c602..bdc142258ace 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1054,6 +1054,32 @@ static int dm_dax_zero_page_range(struct dax_device *dax_dev, pgoff_t pgoff, return ret; } +static size_t dm_dax_recovery_write(struct dax_device *dax_dev, pgoff_t pgoff, + void *addr, size_t bytes, struct iov_iter *i) +{ + struct mapped_device *md = dax_get_private(dax_dev); + sector_t sector = pgoff * PAGE_SECTORS; + struct dm_target *ti; + long ret = 0; + int srcu_idx; + + ti = dm_dax_get_live_target(md, sector, &srcu_idx); + + if (!ti) + goto out; + + if (!ti->type->dax_recovery_write) { + ret = (size_t)-EOPNOTSUPP; + goto out; + } + + ret = ti->type->dax_recovery_write(ti, pgoff, addr, bytes, i); +out: + dm_put_live_table(md, srcu_idx); + + return ret; +} + /* * A target may call dm_accept_partial_bio only from the map routine. It is * allowed for all bio types except REQ_PREFLUSH, REQ_OP_ZONE_* zone management @@ -2980,6 +3006,7 @@ static const struct block_device_operations dm_rq_blk_dops = { static const struct dax_operations dm_dax_ops = { .direct_access = dm_dax_direct_access, .zero_page_range = dm_dax_zero_page_range, + .recovery_write = dm_dax_recovery_write, }; /* diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index e8823813a8df..638e64681db9 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -301,9 +301,16 @@ static long pmem_dax_direct_access(struct dax_device *dax_dev, return __pmem_direct_access(pmem, pgoff, nr_pages, kaddr, pfn); } +static size_t pmem_recovery_write(struct dax_device *dax_dev, pgoff_t pgoff, + void *addr, size_t bytes, struct iov_iter *i) +{ + return 0; +} + static const struct dax_operations pmem_dax_ops = { .direct_access = pmem_dax_direct_access, .zero_page_range = pmem_dax_zero_page_range, + .recovery_write = pmem_recovery_write, }; static ssize_t write_cache_show(struct device *dev, diff --git a/include/linux/dax.h b/include/linux/dax.h index 5449a0a81c83..0b4ce58b2659 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -30,6 +30,9 @@ struct dax_operations { sector_t, sector_t); /* zero_page_range: required operation. Zero page range */ int (*zero_page_range)(struct dax_device *, pgoff_t, size_t); + /* recovery_write: optional operation. */ + size_t (*recovery_write)(struct dax_device *, pgoff_t, void *, size_t, + struct iov_iter *); }; #if IS_ENABLED(CONFIG_DAX) @@ -138,6 +141,9 @@ struct page *dax_layout_busy_page_range(struct address_space *mapping, loff_t st dax_entry_t dax_lock_page(struct page *page); void dax_unlock_page(struct page *page, dax_entry_t cookie); int dax_prep_recovery(struct dax_device *dax_dev, void **kaddr); +bool dax_recovery_started(struct dax_device *dax_dev, void **kaddr); +size_t dax_recovery_write(struct dax_device *dax_dev, pgoff_t pgoff, void *addr, + size_t bytes, struct iov_iter *i); #else static inline struct page *dax_layout_busy_page(struct address_space *mapping) { @@ -170,6 +176,18 @@ static inline int dax_prep_recovery(struct dax_device *dax_dev, void **kaddr) { return -EINVAL; } + +static inline bool dax_recovery_started(struct dax_device *dax_dev, + void **kaddr) +{ + return false; +} + +static size_t dax_recovery_write(struct dax_device *dax_dev, pgoff_t pgoff, + void *addr, size_t bytes, struct iov_iter *i) +{ + return 0; +} #endif int dax_zero_range(struct inode *inode, loff_t pos, loff_t len, bool *did_zero, diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index b26fecf6c8e8..4f134ba63d3c 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -150,6 +150,14 @@ typedef long (*dm_dax_direct_access_fn) (struct dm_target *ti, pgoff_t pgoff, typedef int (*dm_dax_zero_page_range_fn)(struct dm_target *ti, pgoff_t pgoff, size_t nr_pages); +/* + * Returns: + * != 0 : number of bytes transferred, or size_t casted negative error code + * 0 : failure + */ +typedef size_t (*dm_dax_recovery_write_fn)(struct dm_target *ti, pgoff_t pgoff, + void *addr, size_t bytes, struct iov_iter *i); + void dm_error(const char *message); struct dm_dev { @@ -199,6 +207,7 @@ struct target_type { dm_io_hints_fn io_hints; dm_dax_direct_access_fn direct_access; dm_dax_zero_page_range_fn dax_zero_page_range; + dm_dax_recovery_write_fn dax_recovery_write; /* For internal device-mapper use. */ struct list_head list; -- 2.18.4