On 11/6/2021 9:48 AM, Dan Williams wrote: > On Fri, Nov 5, 2021 at 6:17 PM Jane Chu <jane.chu@xxxxxxxxxx> wrote: >> >> Introduce DAX_OP_NORMAL and DAX_OP_RECOVERY operation modes to >> {dax_direct_access, dax_copy_from_iter, dax_copy_to_iter}. >> DAX_OP_NORMAL is the default or the existing mode, and >> DAX_OP_RECOVERY is a new mode for data recovery purpose. >> >> When dax-FS suspects dax media error might be encountered >> on a read or write, it can enact the recovery mode read or write >> by setting DAX_OP_RECOVERY in the aforementioned APIs. A read >> in recovery mode attempts to fetch as much data as possible >> until the first poisoned page is encountered. A write in recovery >> mode attempts to clear poison(s) in a page-aligned range and >> then write the user provided data over. >> >> DAX_OP_NORMAL should be used for all non-recovery code path. >> >> Signed-off-by: Jane Chu <jane.chu@xxxxxxxxxx> > [..] >> diff --git a/include/linux/dax.h b/include/linux/dax.h >> index 324363b798ec..931586df2905 100644 >> --- a/include/linux/dax.h >> +++ b/include/linux/dax.h >> @@ -9,6 +9,10 @@ >> /* Flag for synchronous flush */ >> #define DAXDEV_F_SYNC (1UL << 0) >> >> +/* dax operation mode dynamically set by caller */ >> +#define DAX_OP_NORMAL 0 > > Perhaps this should be called DAX_OP_FAILFAST? Sure. > >> +#define DAX_OP_RECOVERY 1 >> + >> typedef unsigned long dax_entry_t; >> >> struct dax_device; >> @@ -22,8 +26,8 @@ struct dax_operations { >> * logical-page-offset into an absolute physical pfn. Return the >> * number of pages available for DAX at that pfn. >> */ >> - long (*direct_access)(struct dax_device *, pgoff_t, long, >> - void **, pfn_t *); >> + long (*direct_access)(struct dax_device *, pgoff_t, long, int, > > Would be nice if that 'int' was an enum, but I'm not sure a new > parameter is needed at all, see below... Let's do your suggestion below. :) > >> + void **, pfn_t *); >> /* >> * Validate whether this device is usable as an fsdax backing >> * device. >> @@ -32,10 +36,10 @@ struct dax_operations { >> sector_t, sector_t); >> /* copy_from_iter: required operation for fs-dax direct-i/o */ >> size_t (*copy_from_iter)(struct dax_device *, pgoff_t, void *, size_t, >> - struct iov_iter *); >> + struct iov_iter *, int); > > I'm not sure the flag is needed here as the "void *" could carry a > flag in the pointer to indicate that is a recovery kaddr. Agreed. > >> /* copy_to_iter: required operation for fs-dax direct-i/o */ >> size_t (*copy_to_iter)(struct dax_device *, pgoff_t, void *, size_t, >> - struct iov_iter *); >> + struct iov_iter *, int); > > Same comment here. > >> /* zero_page_range: required operation. Zero page range */ >> int (*zero_page_range)(struct dax_device *, pgoff_t, size_t); >> }; >> @@ -186,11 +190,11 @@ static inline void dax_read_unlock(int id) >> bool dax_alive(struct dax_device *dax_dev); >> void *dax_get_private(struct dax_device *dax_dev); >> long dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, long nr_pages, >> - void **kaddr, pfn_t *pfn); >> + int mode, void **kaddr, pfn_t *pfn); > > How about dax_direct_access() calling convention stays the same, but > the kaddr is optionally updated to carry a flag in the lower unused > bits. So: > > void **kaddr = NULL; /* caller only cares about the pfn */ > > void *failfast = NULL; > void **kaddr = &failfast; /* caller wants -EIO not recovery */ > > void *recovery = (void *) DAX_OP_RECOVERY; > void **kaddr = &recovery; /* caller wants to carefully access page(s) > containing poison */ > Got it. thanks! -jane