On Fri, Nov 05, 2021 at 07:16:37PM -0600, Jane Chu wrote: > Introduce DAX_OP_NORMAL and DAX_OP_RECOVERY operation modes to > {dax_direct_access, dax_copy_from_iter, dax_copy_to_iter}. > DAX_OP_NORMAL is the default or the existing mode, and > DAX_OP_RECOVERY is a new mode for data recovery purpose. > > When dax-FS suspects dax media error might be encountered > on a read or write, it can enact the recovery mode read or write > by setting DAX_OP_RECOVERY in the aforementioned APIs. A read > in recovery mode attempts to fetch as much data as possible > until the first poisoned page is encountered. A write in recovery > mode attempts to clear poison(s) in a page-aligned range and > then write the user provided data over. > > DAX_OP_NORMAL should be used for all non-recovery code path. > > Signed-off-by: Jane Chu <jane.chu@xxxxxxxxxx> > --- > drivers/dax/super.c | 15 +++++++++------ > drivers/md/dm-linear.c | 14 ++++++++------ > drivers/md/dm-log-writes.c | 19 +++++++++++-------- > drivers/md/dm-stripe.c | 14 ++++++++------ > drivers/md/dm-target.c | 2 +- > drivers/md/dm-writecache.c | 8 +++++--- > drivers/md/dm.c | 14 ++++++++------ > drivers/nvdimm/pmem.c | 11 ++++++----- > drivers/nvdimm/pmem.h | 2 +- > drivers/s390/block/dcssblk.c | 13 ++++++++----- > fs/dax.c | 14 ++++++++------ > fs/fuse/dax.c | 4 ++-- > fs/fuse/virtio_fs.c | 12 ++++++++---- > include/linux/dax.h | 18 +++++++++++------- > include/linux/device-mapper.h | 5 +++-- > tools/testing/nvdimm/pmem-dax.c | 2 +- > 16 files changed, 98 insertions(+), 69 deletions(-) > <snip> > diff --git a/include/linux/dax.h b/include/linux/dax.h > index 324363b798ec..931586df2905 100644 > --- a/include/linux/dax.h > +++ b/include/linux/dax.h > @@ -9,6 +9,10 @@ > /* Flag for synchronous flush */ > #define DAXDEV_F_SYNC (1UL << 0) > > +/* dax operation mode dynamically set by caller */ > +#define DAX_OP_NORMAL 0 > +#define DAX_OP_RECOVERY 1 Mostly looks ok to me, but since this is an operation mode, should this be an enum instead of an int? Granted I also think six arguments is a lot... though I don't really see any better way to do this. (Dunno, I spent all day running internal patches through the process gauntlet so this is the remaining 2% of my brain speaking...) --D > + > typedef unsigned long dax_entry_t; > > struct dax_device; > @@ -22,8 +26,8 @@ struct dax_operations { > * logical-page-offset into an absolute physical pfn. Return the > * number of pages available for DAX at that pfn. > */ > - long (*direct_access)(struct dax_device *, pgoff_t, long, > - void **, pfn_t *); > + long (*direct_access)(struct dax_device *, pgoff_t, long, int, > + void **, pfn_t *); > /* > * Validate whether this device is usable as an fsdax backing > * device. > @@ -32,10 +36,10 @@ struct dax_operations { > sector_t, sector_t); > /* copy_from_iter: required operation for fs-dax direct-i/o */ > size_t (*copy_from_iter)(struct dax_device *, pgoff_t, void *, size_t, > - struct iov_iter *); > + struct iov_iter *, int); > /* copy_to_iter: required operation for fs-dax direct-i/o */ > size_t (*copy_to_iter)(struct dax_device *, pgoff_t, void *, size_t, > - struct iov_iter *); > + struct iov_iter *, int); > /* zero_page_range: required operation. Zero page range */ > int (*zero_page_range)(struct dax_device *, pgoff_t, size_t); > }; > @@ -186,11 +190,11 @@ static inline void dax_read_unlock(int id) > bool dax_alive(struct dax_device *dax_dev); > void *dax_get_private(struct dax_device *dax_dev); > long dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, long nr_pages, > - void **kaddr, pfn_t *pfn); > + int mode, void **kaddr, pfn_t *pfn); > size_t dax_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr, > - size_t bytes, struct iov_iter *i); > + size_t bytes, struct iov_iter *i, int mode); > size_t dax_copy_to_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr, > - size_t bytes, struct iov_iter *i); > + size_t bytes, struct iov_iter *i, int mode); > int dax_zero_page_range(struct dax_device *dax_dev, pgoff_t pgoff, > size_t nr_pages); > void dax_flush(struct dax_device *dax_dev, void *addr, size_t size); > diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h > index a7df155ea49b..6596a8e0ceed 100644 > --- a/include/linux/device-mapper.h > +++ b/include/linux/device-mapper.h > @@ -146,9 +146,10 @@ typedef int (*dm_busy_fn) (struct dm_target *ti); > * >= 0 : the number of bytes accessible at the address > */ > typedef long (*dm_dax_direct_access_fn) (struct dm_target *ti, pgoff_t pgoff, > - long nr_pages, void **kaddr, pfn_t *pfn); > + long nr_pages, int mode, void **kaddr, pfn_t *pfn); > typedef size_t (*dm_dax_copy_iter_fn)(struct dm_target *ti, pgoff_t pgoff, > - void *addr, size_t bytes, struct iov_iter *i); > + void *addr, size_t bytes, struct iov_iter *i, > + int mode); > typedef int (*dm_dax_zero_page_range_fn)(struct dm_target *ti, pgoff_t pgoff, > size_t nr_pages); > > diff --git a/tools/testing/nvdimm/pmem-dax.c b/tools/testing/nvdimm/pmem-dax.c > index af19c85558e7..71c225630e7e 100644 > --- a/tools/testing/nvdimm/pmem-dax.c > +++ b/tools/testing/nvdimm/pmem-dax.c > @@ -8,7 +8,7 @@ > #include <nd.h> > > long __pmem_direct_access(struct pmem_device *pmem, pgoff_t pgoff, > - long nr_pages, void **kaddr, pfn_t *pfn) > + long nr_pages, int mode, void **kaddr, pfn_t *pfn) > { > resource_size_t offset = PFN_PHYS(pgoff) + pmem->data_offset; > > -- > 2.18.4 >