Prepare dax_direct_access() API with DAXDEV_F_RECOVERY flag such that the API may perform device address translation in spite of the presence of poison(s) in a given range. Signed-off-by: Jane Chu <jane.chu@xxxxxxxxxx> --- drivers/dax/super.c | 9 +++++---- drivers/md/dm-linear.c | 4 ++-- drivers/md/dm-log-writes.c | 5 +++-- drivers/md/dm-stripe.c | 4 ++-- drivers/md/dm-target.c | 2 +- drivers/md/dm-writecache.c | 4 ++-- drivers/md/dm.c | 4 ++-- drivers/nvdimm/pmem.c | 7 ++++--- drivers/nvdimm/pmem.h | 2 +- drivers/s390/block/dcssblk.c | 7 ++++--- fs/dax.c | 12 ++++++++---- fs/fuse/dax.c | 2 +- fs/fuse/virtio_fs.c | 4 ++-- include/linux/dax.h | 7 +++++-- include/linux/device-mapper.h | 2 +- tools/testing/nvdimm/pmem-dax.c | 2 +- 16 files changed, 44 insertions(+), 33 deletions(-) diff --git a/drivers/dax/super.c b/drivers/dax/super.c index fc89e91beea7..67093f1c3341 100644 --- a/drivers/dax/super.c +++ b/drivers/dax/super.c @@ -156,8 +156,8 @@ bool generic_fsdax_supported(struct dax_device *dax_dev, } id = dax_read_lock(); - len = dax_direct_access(dax_dev, pgoff, 1, &kaddr, &pfn); - len2 = dax_direct_access(dax_dev, pgoff_end, 1, &end_kaddr, &end_pfn); + len = dax_direct_access(dax_dev, pgoff, 1, &kaddr, &pfn, 0); + len2 = dax_direct_access(dax_dev, pgoff_end, 1, &end_kaddr, &end_pfn, 0); if (len < 1 || len2 < 1) { pr_info("%pg: error: dax access failed (%ld)\n", @@ -302,12 +302,13 @@ EXPORT_SYMBOL_GPL(dax_attribute_group); * @nr_pages: number of consecutive pages caller can handle relative to @pfn * @kaddr: output parameter that returns a virtual address mapping of pfn * @pfn: output parameter that returns an absolute pfn translation of @pgoff + * @flags: indication whether on dax data recovery code path or not * * Return: negative errno if an error occurs, otherwise the number of * pages accessible at the device relative @pgoff. */ long dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, long nr_pages, - void **kaddr, pfn_t *pfn) + void **kaddr, pfn_t *pfn, unsigned long flags) { long avail; @@ -321,7 +322,7 @@ long dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, long nr_pages, return -EINVAL; avail = dax_dev->ops->direct_access(dax_dev, pgoff, nr_pages, - kaddr, pfn); + kaddr, pfn, flags); if (!avail) return -ERANGE; return min(avail, nr_pages); diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c index 679b4c0a2eea..cb7c8518f02d 100644 --- a/drivers/md/dm-linear.c +++ b/drivers/md/dm-linear.c @@ -165,7 +165,7 @@ static int linear_iterate_devices(struct dm_target *ti, #if IS_ENABLED(CONFIG_DAX_DRIVER) static long linear_dax_direct_access(struct dm_target *ti, pgoff_t pgoff, - long nr_pages, void **kaddr, pfn_t *pfn) + long nr_pages, void **kaddr, pfn_t *pfn, unsigned long flags) { long ret; struct linear_c *lc = ti->private; @@ -177,7 +177,7 @@ static long linear_dax_direct_access(struct dm_target *ti, pgoff_t pgoff, ret = bdev_dax_pgoff(bdev, dev_sector, nr_pages * PAGE_SIZE, &pgoff); if (ret) return ret; - return dax_direct_access(dax_dev, pgoff, nr_pages, kaddr, pfn); + return dax_direct_access(dax_dev, pgoff, nr_pages, kaddr, pfn, flags); } static size_t linear_dax_copy_from_iter(struct dm_target *ti, pgoff_t pgoff, diff --git a/drivers/md/dm-log-writes.c b/drivers/md/dm-log-writes.c index d93a4db23512..6d8b88dcce6c 100644 --- a/drivers/md/dm-log-writes.c +++ b/drivers/md/dm-log-writes.c @@ -950,7 +950,7 @@ static int log_dax(struct log_writes_c *lc, sector_t sector, size_t bytes, } static long log_writes_dax_direct_access(struct dm_target *ti, pgoff_t pgoff, - long nr_pages, void **kaddr, pfn_t *pfn) + long nr_pages, void **kaddr, pfn_t *pfn, unsigned long flags) { struct log_writes_c *lc = ti->private; sector_t sector = pgoff * PAGE_SECTORS; @@ -959,7 +959,8 @@ static long log_writes_dax_direct_access(struct dm_target *ti, pgoff_t pgoff, ret = bdev_dax_pgoff(lc->dev->bdev, sector, nr_pages * PAGE_SIZE, &pgoff); if (ret) return ret; - return dax_direct_access(lc->dev->dax_dev, pgoff, nr_pages, kaddr, pfn); + return dax_direct_access(lc->dev->dax_dev, pgoff, nr_pages, kaddr, pfn, + flags); } static size_t log_writes_dax_copy_from_iter(struct dm_target *ti, diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c index 6660b6b53d5b..0a97d0472a0b 100644 --- a/drivers/md/dm-stripe.c +++ b/drivers/md/dm-stripe.c @@ -302,7 +302,7 @@ static int stripe_map(struct dm_target *ti, struct bio *bio) #if IS_ENABLED(CONFIG_DAX_DRIVER) static long stripe_dax_direct_access(struct dm_target *ti, pgoff_t pgoff, - long nr_pages, void **kaddr, pfn_t *pfn) + long nr_pages, void **kaddr, pfn_t *pfn, unsigned long flags) { sector_t dev_sector, sector = pgoff * PAGE_SECTORS; struct stripe_c *sc = ti->private; @@ -319,7 +319,7 @@ static long stripe_dax_direct_access(struct dm_target *ti, pgoff_t pgoff, ret = bdev_dax_pgoff(bdev, dev_sector, nr_pages * PAGE_SIZE, &pgoff); if (ret) return ret; - return dax_direct_access(dax_dev, pgoff, nr_pages, kaddr, pfn); + return dax_direct_access(dax_dev, pgoff, nr_pages, kaddr, pfn, flags); } static size_t stripe_dax_copy_from_iter(struct dm_target *ti, pgoff_t pgoff, diff --git a/drivers/md/dm-target.c b/drivers/md/dm-target.c index 64dd0b34fcf4..431764b77528 100644 --- a/drivers/md/dm-target.c +++ b/drivers/md/dm-target.c @@ -142,7 +142,7 @@ static void io_err_release_clone_rq(struct request *clone, } static long io_err_dax_direct_access(struct dm_target *ti, pgoff_t pgoff, - long nr_pages, void **kaddr, pfn_t *pfn) + long nr_pages, void **kaddr, pfn_t *pfn, unsigned long flags) { return -EIO; } diff --git a/drivers/md/dm-writecache.c b/drivers/md/dm-writecache.c index 18320444fb0a..c523cb911eca 100644 --- a/drivers/md/dm-writecache.c +++ b/drivers/md/dm-writecache.c @@ -286,7 +286,7 @@ static int persistent_memory_claim(struct dm_writecache *wc) id = dax_read_lock(); - da = dax_direct_access(wc->ssd_dev->dax_dev, offset, p, &wc->memory_map, &pfn); + da = dax_direct_access(wc->ssd_dev->dax_dev, offset, p, &wc->memory_map, &pfn, 0); if (da < 0) { wc->memory_map = NULL; r = da; @@ -309,7 +309,7 @@ static int persistent_memory_claim(struct dm_writecache *wc) do { long daa; daa = dax_direct_access(wc->ssd_dev->dax_dev, offset + i, p - i, - NULL, &pfn); + NULL, &pfn, 0); if (daa <= 0) { r = daa ? daa : -EINVAL; goto err3; diff --git a/drivers/md/dm.c b/drivers/md/dm.c index a011d09cb0fa..e5a14abd45f9 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -998,7 +998,7 @@ static struct dm_target *dm_dax_get_live_target(struct mapped_device *md, } static long dm_dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, - long nr_pages, void **kaddr, pfn_t *pfn) + long nr_pages, void **kaddr, pfn_t *pfn, unsigned long flags) { struct mapped_device *md = dax_get_private(dax_dev); sector_t sector = pgoff * PAGE_SECTORS; @@ -1016,7 +1016,7 @@ static long dm_dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, if (len < 1) goto out; nr_pages = min(len, nr_pages); - ret = ti->type->direct_access(ti, pgoff, nr_pages, kaddr, pfn); + ret = ti->type->direct_access(ti, pgoff, nr_pages, kaddr, pfn, flags); out: dm_put_live_table(md, srcu_idx); diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index 72de88ff0d30..b0b7fd40560e 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -256,7 +256,7 @@ static int pmem_rw_page(struct block_device *bdev, sector_t sector, /* see "strong" declaration in tools/testing/nvdimm/pmem-dax.c */ __weak long __pmem_direct_access(struct pmem_device *pmem, pgoff_t pgoff, - long nr_pages, void **kaddr, pfn_t *pfn) + long nr_pages, void **kaddr, pfn_t *pfn, unsigned long flags) { resource_size_t offset = PFN_PHYS(pgoff) + pmem->data_offset; @@ -295,11 +295,12 @@ static int pmem_dax_zero_page_range(struct dax_device *dax_dev, pgoff_t pgoff, } static long pmem_dax_direct_access(struct dax_device *dax_dev, - pgoff_t pgoff, long nr_pages, void **kaddr, pfn_t *pfn) + pgoff_t pgoff, long nr_pages, void **kaddr, pfn_t *pfn, + unsigned long flags) { struct pmem_device *pmem = dax_get_private(dax_dev); - return __pmem_direct_access(pmem, pgoff, nr_pages, kaddr, pfn); + return __pmem_direct_access(pmem, pgoff, nr_pages, kaddr, pfn, flags); } /* diff --git a/drivers/nvdimm/pmem.h b/drivers/nvdimm/pmem.h index 59cfe13ea8a8..fb769b22777a 100644 --- a/drivers/nvdimm/pmem.h +++ b/drivers/nvdimm/pmem.h @@ -27,7 +27,7 @@ struct pmem_device { }; long __pmem_direct_access(struct pmem_device *pmem, pgoff_t pgoff, - long nr_pages, void **kaddr, pfn_t *pfn); + long nr_pages, void **kaddr, pfn_t *pfn, unsigned long flags); #ifdef CONFIG_MEMORY_FAILURE static inline bool test_and_clear_pmem_poison(struct page *page) diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c index 5be3d1c39a78..6ab2f9badc8d 100644 --- a/drivers/s390/block/dcssblk.c +++ b/drivers/s390/block/dcssblk.c @@ -32,7 +32,7 @@ static int dcssblk_open(struct block_device *bdev, fmode_t mode); static void dcssblk_release(struct gendisk *disk, fmode_t mode); static blk_qc_t dcssblk_submit_bio(struct bio *bio); static long dcssblk_dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, - long nr_pages, void **kaddr, pfn_t *pfn); + long nr_pages, void **kaddr, pfn_t *pfn, unsigned long flags); static char dcssblk_segments[DCSSBLK_PARM_LEN] = "\0"; @@ -62,7 +62,7 @@ static int dcssblk_dax_zero_page_range(struct dax_device *dax_dev, long rc; void *kaddr; - rc = dax_direct_access(dax_dev, pgoff, nr_pages, &kaddr, NULL); + rc = dax_direct_access(dax_dev, pgoff, nr_pages, &kaddr, NULL, 0); if (rc < 0) return rc; memset(kaddr, 0, nr_pages << PAGE_SHIFT); @@ -932,7 +932,8 @@ __dcssblk_direct_access(struct dcssblk_dev_info *dev_info, pgoff_t pgoff, static long dcssblk_dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, - long nr_pages, void **kaddr, pfn_t *pfn) + long nr_pages, void **kaddr, pfn_t *pfn, + unsigned long flags) { struct dcssblk_dev_info *dev_info = dax_get_private(dax_dev); diff --git a/fs/dax.c b/fs/dax.c index 01118de00011..f603a9ce7f20 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -722,7 +722,7 @@ static int copy_cow_page_dax(struct block_device *bdev, struct dax_device *dax_d return rc; id = dax_read_lock(); - rc = dax_direct_access(dax_dev, pgoff, 1, &kaddr, NULL); + rc = dax_direct_access(dax_dev, pgoff, 1, &kaddr, NULL, 0); if (rc < 0) { dax_read_unlock(id); return rc; @@ -1023,7 +1023,7 @@ static int dax_iomap_pfn(const struct iomap *iomap, loff_t pos, size_t size, return rc; id = dax_read_lock(); length = dax_direct_access(iomap->dax_dev, pgoff, PHYS_PFN(size), - NULL, pfnp); + NULL, pfnp, 0); if (length < 0) { rc = length; goto out; @@ -1149,7 +1149,7 @@ s64 dax_iomap_zero(loff_t pos, u64 length, struct iomap *iomap) if (page_aligned) rc = dax_zero_page_range(iomap->dax_dev, pgoff, 1); else - rc = dax_direct_access(iomap->dax_dev, pgoff, 1, &kaddr, NULL); + rc = dax_direct_access(iomap->dax_dev, pgoff, 1, &kaddr, NULL, 0); if (rc < 0) { dax_read_unlock(id); return rc; @@ -1172,6 +1172,7 @@ static loff_t dax_iomap_iter(const struct iomap_iter *iomi, struct block_device *bdev = iomap->bdev; struct dax_device *dax_dev = iomap->dax_dev; loff_t end = pos + length, done = 0; + unsigned long dax_flag = 0; ssize_t ret = 0; size_t xfer; int id; @@ -1199,6 +1200,9 @@ static loff_t dax_iomap_iter(const struct iomap_iter *iomi, (end - 1) >> PAGE_SHIFT); } + if (iomi->flags & IOMAP_RECOVERY) + dax_flag |= DAXDEV_F_RECOVERY; + id = dax_read_lock(); while (pos < end) { unsigned offset = pos & (PAGE_SIZE - 1); @@ -1218,7 +1222,7 @@ static loff_t dax_iomap_iter(const struct iomap_iter *iomi, break; map_len = dax_direct_access(dax_dev, pgoff, PHYS_PFN(size), - &kaddr, NULL); + &kaddr, NULL, dax_flag); if (map_len < 0) { ret = map_len; break; diff --git a/fs/fuse/dax.c b/fs/fuse/dax.c index 281d79f8b3d3..2c45b94647f1 100644 --- a/fs/fuse/dax.c +++ b/fs/fuse/dax.c @@ -1245,7 +1245,7 @@ static int fuse_dax_mem_range_init(struct fuse_conn_dax *fcd) id = dax_read_lock(); nr_pages = dax_direct_access(fcd->dev, 0, PHYS_PFN(dax_size), NULL, - NULL); + NULL, 0); dax_read_unlock(id); if (nr_pages < 0) { pr_debug("dax_direct_access() returned %ld\n", nr_pages); diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c index 0ad89c6629d7..d201b6e8a190 100644 --- a/fs/fuse/virtio_fs.c +++ b/fs/fuse/virtio_fs.c @@ -739,7 +739,7 @@ static void virtio_fs_cleanup_vqs(struct virtio_device *vdev, * offset. */ static long virtio_fs_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, - long nr_pages, void **kaddr, pfn_t *pfn) + long nr_pages, void **kaddr, pfn_t *pfn, unsigned long flags) { struct virtio_fs *fs = dax_get_private(dax_dev); phys_addr_t offset = PFN_PHYS(pgoff); @@ -773,7 +773,7 @@ static int virtio_fs_zero_page_range(struct dax_device *dax_dev, long rc; void *kaddr; - rc = dax_direct_access(dax_dev, pgoff, nr_pages, &kaddr, NULL); + rc = dax_direct_access(dax_dev, pgoff, nr_pages, &kaddr, NULL, 0); if (rc < 0) return rc; memset(kaddr, 0, nr_pages << PAGE_SHIFT); diff --git a/include/linux/dax.h b/include/linux/dax.h index 2619d94c308d..0044a5d87e5d 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -9,6 +9,9 @@ /* Flag for synchronous flush */ #define DAXDEV_F_SYNC (1UL << 0) +/* Flag for DAX data recovery */ +#define DAXDEV_F_RECOVERY (1UL << 1) + typedef unsigned long dax_entry_t; struct iomap_ops; @@ -21,7 +24,7 @@ struct dax_operations { * number of pages available for DAX at that pfn. */ long (*direct_access)(struct dax_device *, pgoff_t, long, - void **, pfn_t *); + void **, pfn_t *, unsigned long); /* * Validate whether this device is usable as an fsdax backing * device. @@ -192,7 +195,7 @@ static inline void dax_read_unlock(int id) bool dax_alive(struct dax_device *dax_dev); void *dax_get_private(struct dax_device *dax_dev); long dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, long nr_pages, - void **kaddr, pfn_t *pfn); + void **kaddr, pfn_t *pfn, unsigned long); size_t dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr, size_t bytes, struct iov_iter *i); size_t dax_copy_to_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr, diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index 114553b487ef..307c29789332 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -146,7 +146,7 @@ typedef int (*dm_busy_fn) (struct dm_target *ti); * >= 0 : the number of bytes accessible at the address */ typedef long (*dm_dax_direct_access_fn) (struct dm_target *ti, pgoff_t pgoff, - long nr_pages, void **kaddr, pfn_t *pfn); + long nr_pages, void **kaddr, pfn_t *pfn, unsigned long flags); typedef size_t (*dm_dax_copy_iter_fn)(struct dm_target *ti, pgoff_t pgoff, void *addr, size_t bytes, struct iov_iter *i); typedef int (*dm_dax_zero_page_range_fn)(struct dm_target *ti, pgoff_t pgoff, diff --git a/tools/testing/nvdimm/pmem-dax.c b/tools/testing/nvdimm/pmem-dax.c index af19c85558e7..45dcfffe5575 100644 --- a/tools/testing/nvdimm/pmem-dax.c +++ b/tools/testing/nvdimm/pmem-dax.c @@ -8,7 +8,7 @@ #include <nd.h> long __pmem_direct_access(struct pmem_device *pmem, pgoff_t pgoff, - long nr_pages, void **kaddr, pfn_t *pfn) + long nr_pages, void **kaddr, pfn_t *pfn, unsigned long flags) { resource_size_t offset = PFN_PHYS(pgoff) + pmem->data_offset; -- 2.18.4