Jane Chu wrote: > When multiple processes mmap() a dax file, then at some point, > a process issues a 'load' and consumes a hwpoison, the process > receives a SIGBUS with si_code = BUS_MCEERR_AR and with si_lsb > set for the poison scope. Soon after, any other process issues > a 'load' to the poisoned page (that is unmapped from the kernel > side by memory_failure), it receives a SIGBUS with > si_code = BUS_ADRERR and without valid si_lsb. > > This is confusing to user, and is different from page fault due > to poison in RAM memory, also some helpful information is lost. > > Channel dax backend driver's poison detection to the filesystem > such that instead of reporting VM_FAULT_SIGBUS, it could report > VM_FAULT_HWPOISON. I do think it is interesting that this will start returning SIGBUS with BUS_MCEERR_AR for stores whereas it is only signalled for loads in the direct consumption path, but I can't think of a scenario where that would confuse existing software. > Change from v2: > Convert -EHWPOISON to -EIO to prevent EHWPOISON errno from leaking > out to block read(2) - suggested by Matthew. For next time these kind of changelog notes belong... > Signed-off-by: Jane Chu <jane.chu@xxxxxxxxxx> > --- ...here after the "---". > drivers/nvdimm/pmem.c | 2 +- > fs/dax.c | 4 ++-- > include/linux/mm.h | 2 ++ > 3 files changed, 5 insertions(+), 3 deletions(-) > > diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c > index ceea55f621cc..46e094e56159 100644 > --- a/drivers/nvdimm/pmem.c > +++ b/drivers/nvdimm/pmem.c > @@ -260,7 +260,7 @@ __weak long __pmem_direct_access(struct pmem_device *pmem, pgoff_t pgoff, > long actual_nr; > > if (mode != DAX_RECOVERY_WRITE) > - return -EIO; > + return -EHWPOISON; > > /* > * Set the recovery stride is set to kernel page size because > diff --git a/fs/dax.c b/fs/dax.c > index 2ababb89918d..18f9598951f1 100644 > --- a/fs/dax.c > +++ b/fs/dax.c > @@ -1498,7 +1498,7 @@ static loff_t dax_iomap_iter(const struct iomap_iter *iomi, > > map_len = dax_direct_access(dax_dev, pgoff, PHYS_PFN(size), > DAX_ACCESS, &kaddr, NULL); > - if (map_len == -EIO && iov_iter_rw(iter) == WRITE) { > + if (map_len == -EHWPOISON && iov_iter_rw(iter) == WRITE) { > map_len = dax_direct_access(dax_dev, pgoff, > PHYS_PFN(size), DAX_RECOVERY_WRITE, > &kaddr, NULL); > @@ -1506,7 +1506,7 @@ static loff_t dax_iomap_iter(const struct iomap_iter *iomi, > recovery = true; > } > if (map_len < 0) { > - ret = map_len; > + ret = (map_len == -EHWPOISON) ? -EIO : map_len; This fixup leaves out several other places where EHWPOISON could leak as the errno for read(2)/write(2). I think I want to see something like this: diff --git a/fs/dax.c b/fs/dax.c index 2ababb89918d..ec17f9977bcb 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -1077,14 +1077,13 @@ int dax_writeback_mapping_range(struct address_space *mapping, } EXPORT_SYMBOL_GPL(dax_writeback_mapping_range); -static int dax_iomap_direct_access(const struct iomap *iomap, loff_t pos, - size_t size, void **kaddr, pfn_t *pfnp) +static int __dax_iomap_direct_access(const struct iomap *iomap, loff_t pos, + size_t size, void **kaddr, pfn_t *pfnp) { pgoff_t pgoff = dax_iomap_pgoff(iomap, pos); - int id, rc = 0; long length; + int rc = 0; - id = dax_read_lock(); length = dax_direct_access(iomap->dax_dev, pgoff, PHYS_PFN(size), DAX_ACCESS, kaddr, pfnp); if (length < 0) { @@ -1113,6 +1112,36 @@ static int dax_iomap_direct_access(const struct iomap *iomap, loff_t pos, return rc; } +static int dax_iomap_direct_access(const struct iomap *iomap, loff_t pos, + size_t size, void **kaddr, pfn_t *pfnp) +{ + + int id; + + id = dax_read_lock(); + rc = __dax_iomap_direct_access(iomap, pos, size, kaddr, pfnp); + dax_read_unlock(id); + + /* don't leak a memory access error code to I/O syscalls */ + if (rc == -EHWPOISON) + return -EIO; + return rc; +} + +static int dax_fault_direct_access(const struct iomap *iomap, loff_t pos, + size_t size, void **kaddr, pfn_t *pfnp) +{ + + int id; + + id = dax_read_lock(); + rc = __dax_iomap_direct_access(iomap, pos, size, kaddr, pfnp); + dax_read_unlock(id); + + /* -EHWPOISON return ok */ + return rc; +} + /** * dax_iomap_copy_around - Prepare for an unaligned write to a shared/cow page * by copying the data before and after the range to be written. @@ -1682,7 +1711,7 @@ static vm_fault_t dax_fault_iter(struct vm_fault *vmf, return pmd ? VM_FAULT_FALLBACK : VM_FAULT_SIGBUS; } - err = dax_iomap_direct_access(iomap, pos, size, &kaddr, &pfn); + err = dax_fault_direct_access(iomap, pos, size, &kaddr, &pfn); if (err) return pmd ? VM_FAULT_FALLBACK : dax_fault_return(err); ...and then convert all other callers of dax_direct_access() in that file such that they are either calling: dax_iomap_direct_access(): if caller wants EHWPOISON translated to -EIO and is ok with internal locking dax_fault_direct_access(): if caller wants EHWPOISON passed through and is ok with internal locking __dax_iomap_direct_access(): if the caller wants to do its own EHWPOISON translation and locking > break; > } > > diff --git a/include/linux/mm.h b/include/linux/mm.h > index 1f79667824eb..e4c974587659 100644 > --- a/include/linux/mm.h > +++ b/include/linux/mm.h > @@ -3217,6 +3217,8 @@ static inline vm_fault_t vmf_error(int err) > { > if (err == -ENOMEM) > return VM_FAULT_OOM; > + else if (err == -EHWPOISON) > + return VM_FAULT_HWPOISON; > return VM_FAULT_SIGBUS; > } > > -- > 2.18.4 >