Jane Chu wrote: > When multiple processes mmap() a dax file, then at some point, > a process issues a 'load' and consumes a hwpoison, the process > receives a SIGBUS with si_code = BUS_MCEERR_AR and with si_lsb > set for the poison scope. Soon after, any other process issues > a 'load' to the poisoned page (that is unmapped from the kernel > side by memory_failure), it receives a SIGBUS with > si_code = BUS_ADRERR and without valid si_lsb. > > This is confusing to user, and is different from page fault due > to poison in RAM memory, also some helpful information is lost. > > Channel dax backend driver's poison detection to the filesystem > such that instead of reporting VM_FAULT_SIGBUS, it could report > VM_FAULT_HWPOISON. > > If user level block IO syscalls fail due to poison, the errno will > be converted to EIO to maintain block API consistency. > > Signed-off-by: Jane Chu <jane.chu@xxxxxxxxxx> > --- > drivers/dax/super.c | 5 ++++- > drivers/nvdimm/pmem.c | 2 +- > drivers/s390/block/dcssblk.c | 3 ++- > fs/dax.c | 11 ++++++----- > fs/fuse/virtio_fs.c | 3 ++- > include/linux/dax.h | 5 +++++ > include/linux/mm.h | 2 ++ > 7 files changed, 22 insertions(+), 9 deletions(-) > > diff --git a/drivers/dax/super.c b/drivers/dax/super.c > index c4c4728a36e4..0da9232ea175 100644 > --- a/drivers/dax/super.c > +++ b/drivers/dax/super.c > @@ -203,6 +203,8 @@ size_t dax_copy_to_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr, > int dax_zero_page_range(struct dax_device *dax_dev, pgoff_t pgoff, > size_t nr_pages) > { > + int ret; > + > if (!dax_alive(dax_dev)) > return -ENXIO; > /* > @@ -213,7 +215,8 @@ int dax_zero_page_range(struct dax_device *dax_dev, pgoff_t pgoff, > if (nr_pages != 1) > return -EIO; > > - return dax_dev->ops->zero_page_range(dax_dev, pgoff, nr_pages); > + ret = dax_dev->ops->zero_page_range(dax_dev, pgoff, nr_pages); > + return dax_mem2blk_err(ret); > } > EXPORT_SYMBOL_GPL(dax_zero_page_range); > > diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c > index ceea55f621cc..46e094e56159 100644 > --- a/drivers/nvdimm/pmem.c > +++ b/drivers/nvdimm/pmem.c > @@ -260,7 +260,7 @@ __weak long __pmem_direct_access(struct pmem_device *pmem, pgoff_t pgoff, > long actual_nr; > > if (mode != DAX_RECOVERY_WRITE) > - return -EIO; > + return -EHWPOISON; > > /* > * Set the recovery stride is set to kernel page size because > diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c > index c09f2e053bf8..ee47ac520cd4 100644 > --- a/drivers/s390/block/dcssblk.c > +++ b/drivers/s390/block/dcssblk.c > @@ -54,7 +54,8 @@ static int dcssblk_dax_zero_page_range(struct dax_device *dax_dev, > rc = dax_direct_access(dax_dev, pgoff, nr_pages, DAX_ACCESS, > &kaddr, NULL); > if (rc < 0) > - return rc; > + return dax_mem2blk_err(rc); > + > memset(kaddr, 0, nr_pages << PAGE_SHIFT); > dax_flush(dax_dev, kaddr, nr_pages << PAGE_SHIFT); > return 0; > diff --git a/fs/dax.c b/fs/dax.c > index 2ababb89918d..a26eb5abfdc0 100644 > --- a/fs/dax.c > +++ b/fs/dax.c > @@ -1148,7 +1148,7 @@ static int dax_iomap_copy_around(loff_t pos, uint64_t length, size_t align_size, > if (!zero_edge) { > ret = dax_iomap_direct_access(srcmap, pos, size, &saddr, NULL); > if (ret) > - return ret; > + return dax_mem2blk_err(ret); > } > > if (copy_all) { > @@ -1310,7 +1310,7 @@ static s64 dax_unshare_iter(struct iomap_iter *iter) > > out_unlock: > dax_read_unlock(id); > - return ret; > + return dax_mem2blk_err(ret); > } > > int dax_file_unshare(struct inode *inode, loff_t pos, loff_t len, > @@ -1342,7 +1342,8 @@ static int dax_memzero(struct iomap_iter *iter, loff_t pos, size_t size) > ret = dax_direct_access(iomap->dax_dev, pgoff, 1, DAX_ACCESS, &kaddr, > NULL); > if (ret < 0) > - return ret; > + return dax_mem2blk_err(ret); > + > memset(kaddr + offset, 0, size); > if (iomap->flags & IOMAP_F_SHARED) > ret = dax_iomap_copy_around(pos, size, PAGE_SIZE, srcmap, > @@ -1498,7 +1499,7 @@ static loff_t dax_iomap_iter(const struct iomap_iter *iomi, > > map_len = dax_direct_access(dax_dev, pgoff, PHYS_PFN(size), > DAX_ACCESS, &kaddr, NULL); > - if (map_len == -EIO && iov_iter_rw(iter) == WRITE) { > + if (map_len == -EHWPOISON && iov_iter_rw(iter) == WRITE) { > map_len = dax_direct_access(dax_dev, pgoff, > PHYS_PFN(size), DAX_RECOVERY_WRITE, > &kaddr, NULL); > @@ -1506,7 +1507,7 @@ static loff_t dax_iomap_iter(const struct iomap_iter *iomi, > recovery = true; > } > if (map_len < 0) { > - ret = map_len; > + ret = dax_mem2blk_err(map_len); > break; > } > > diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c > index 4d8d4f16c727..5f1be1da92ce 100644 > --- a/fs/fuse/virtio_fs.c > +++ b/fs/fuse/virtio_fs.c > @@ -775,7 +775,8 @@ static int virtio_fs_zero_page_range(struct dax_device *dax_dev, > rc = dax_direct_access(dax_dev, pgoff, nr_pages, DAX_ACCESS, &kaddr, > NULL); > if (rc < 0) > - return rc; > + return dax_mem2blk_err(rc); > + > memset(kaddr, 0, nr_pages << PAGE_SHIFT); > dax_flush(dax_dev, kaddr, nr_pages << PAGE_SHIFT); > return 0; > diff --git a/include/linux/dax.h b/include/linux/dax.h > index bf6258472e49..a4e97acf60f5 100644 > --- a/include/linux/dax.h > +++ b/include/linux/dax.h > @@ -261,6 +261,11 @@ static inline bool dax_mapping(struct address_space *mapping) > return mapping->host && IS_DAX(mapping->host); > } > > +static inline int dax_mem2blk_err(int err) > +{ > + return (err == -EHWPOISON) ? -EIO : err; > +} I think it is worth a comment on this function to indicate where this helper is *not* used. I.e. it's easy to grep for where the error code is converted for file I/O errors, but the subtlety of when the dax_direct_acces() return value is not translated for fault handling deserves a comment when we wonder why this function exists in a few years time.