RE: [PATCH v4 1/1] dax: enable dax fault handler to report VM_FAULT_HWPOISON

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Jane Chu wrote:
> When multiple processes mmap() a dax file, then at some point,
> a process issues a 'load' and consumes a hwpoison, the process
> receives a SIGBUS with si_code = BUS_MCEERR_AR and with si_lsb
> set for the poison scope. Soon after, any other process issues
> a 'load' to the poisoned page (that is unmapped from the kernel
> side by memory_failure), it receives a SIGBUS with
> si_code = BUS_ADRERR and without valid si_lsb.
> 
> This is confusing to user, and is different from page fault due
> to poison in RAM memory, also some helpful information is lost.
> 
> Channel dax backend driver's poison detection to the filesystem
> such that instead of reporting VM_FAULT_SIGBUS, it could report
> VM_FAULT_HWPOISON.
> 
> If user level block IO syscalls fail due to poison, the errno will
> be converted to EIO to maintain block API consistency.
> 
> Signed-off-by: Jane Chu <jane.chu@xxxxxxxxxx>
> ---
>  drivers/dax/super.c          |  5 ++++-
>  drivers/nvdimm/pmem.c        |  2 +-
>  drivers/s390/block/dcssblk.c |  3 ++-
>  fs/dax.c                     | 11 ++++++-----
>  fs/fuse/virtio_fs.c          |  3 ++-
>  include/linux/dax.h          |  5 +++++
>  include/linux/mm.h           |  2 ++
>  7 files changed, 22 insertions(+), 9 deletions(-)
> 
> diff --git a/drivers/dax/super.c b/drivers/dax/super.c
> index c4c4728a36e4..0da9232ea175 100644
> --- a/drivers/dax/super.c
> +++ b/drivers/dax/super.c
> @@ -203,6 +203,8 @@ size_t dax_copy_to_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr,
>  int dax_zero_page_range(struct dax_device *dax_dev, pgoff_t pgoff,
>  			size_t nr_pages)
>  {
> +	int ret;
> +
>  	if (!dax_alive(dax_dev))
>  		return -ENXIO;
>  	/*
> @@ -213,7 +215,8 @@ int dax_zero_page_range(struct dax_device *dax_dev, pgoff_t pgoff,
>  	if (nr_pages != 1)
>  		return -EIO;
>  
> -	return dax_dev->ops->zero_page_range(dax_dev, pgoff, nr_pages);
> +	ret = dax_dev->ops->zero_page_range(dax_dev, pgoff, nr_pages);
> +	return dax_mem2blk_err(ret);
>  }
>  EXPORT_SYMBOL_GPL(dax_zero_page_range);
>  
> diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
> index ceea55f621cc..46e094e56159 100644
> --- a/drivers/nvdimm/pmem.c
> +++ b/drivers/nvdimm/pmem.c
> @@ -260,7 +260,7 @@ __weak long __pmem_direct_access(struct pmem_device *pmem, pgoff_t pgoff,
>  		long actual_nr;
>  
>  		if (mode != DAX_RECOVERY_WRITE)
> -			return -EIO;
> +			return -EHWPOISON;
>  
>  		/*
>  		 * Set the recovery stride is set to kernel page size because
> diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c
> index c09f2e053bf8..ee47ac520cd4 100644
> --- a/drivers/s390/block/dcssblk.c
> +++ b/drivers/s390/block/dcssblk.c
> @@ -54,7 +54,8 @@ static int dcssblk_dax_zero_page_range(struct dax_device *dax_dev,
>  	rc = dax_direct_access(dax_dev, pgoff, nr_pages, DAX_ACCESS,
>  			&kaddr, NULL);
>  	if (rc < 0)
> -		return rc;
> +		return dax_mem2blk_err(rc);
> +
>  	memset(kaddr, 0, nr_pages << PAGE_SHIFT);
>  	dax_flush(dax_dev, kaddr, nr_pages << PAGE_SHIFT);
>  	return 0;
> diff --git a/fs/dax.c b/fs/dax.c
> index 2ababb89918d..a26eb5abfdc0 100644
> --- a/fs/dax.c
> +++ b/fs/dax.c
> @@ -1148,7 +1148,7 @@ static int dax_iomap_copy_around(loff_t pos, uint64_t length, size_t align_size,
>  	if (!zero_edge) {
>  		ret = dax_iomap_direct_access(srcmap, pos, size, &saddr, NULL);
>  		if (ret)
> -			return ret;
> +			return dax_mem2blk_err(ret);
>  	}
>  
>  	if (copy_all) {
> @@ -1310,7 +1310,7 @@ static s64 dax_unshare_iter(struct iomap_iter *iter)
>  
>  out_unlock:
>  	dax_read_unlock(id);
> -	return ret;
> +	return dax_mem2blk_err(ret);
>  }
>  
>  int dax_file_unshare(struct inode *inode, loff_t pos, loff_t len,
> @@ -1342,7 +1342,8 @@ static int dax_memzero(struct iomap_iter *iter, loff_t pos, size_t size)
>  	ret = dax_direct_access(iomap->dax_dev, pgoff, 1, DAX_ACCESS, &kaddr,
>  				NULL);
>  	if (ret < 0)
> -		return ret;
> +		return dax_mem2blk_err(ret);
> +
>  	memset(kaddr + offset, 0, size);
>  	if (iomap->flags & IOMAP_F_SHARED)
>  		ret = dax_iomap_copy_around(pos, size, PAGE_SIZE, srcmap,
> @@ -1498,7 +1499,7 @@ static loff_t dax_iomap_iter(const struct iomap_iter *iomi,
>  
>  		map_len = dax_direct_access(dax_dev, pgoff, PHYS_PFN(size),
>  				DAX_ACCESS, &kaddr, NULL);
> -		if (map_len == -EIO && iov_iter_rw(iter) == WRITE) {
> +		if (map_len == -EHWPOISON && iov_iter_rw(iter) == WRITE) {
>  			map_len = dax_direct_access(dax_dev, pgoff,
>  					PHYS_PFN(size), DAX_RECOVERY_WRITE,
>  					&kaddr, NULL);
> @@ -1506,7 +1507,7 @@ static loff_t dax_iomap_iter(const struct iomap_iter *iomi,
>  				recovery = true;
>  		}
>  		if (map_len < 0) {
> -			ret = map_len;
> +			ret = dax_mem2blk_err(map_len);
>  			break;
>  		}
>  
> diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c
> index 4d8d4f16c727..5f1be1da92ce 100644
> --- a/fs/fuse/virtio_fs.c
> +++ b/fs/fuse/virtio_fs.c
> @@ -775,7 +775,8 @@ static int virtio_fs_zero_page_range(struct dax_device *dax_dev,
>  	rc = dax_direct_access(dax_dev, pgoff, nr_pages, DAX_ACCESS, &kaddr,
>  			       NULL);
>  	if (rc < 0)
> -		return rc;
> +		return dax_mem2blk_err(rc);
> +
>  	memset(kaddr, 0, nr_pages << PAGE_SHIFT);
>  	dax_flush(dax_dev, kaddr, nr_pages << PAGE_SHIFT);
>  	return 0;
> diff --git a/include/linux/dax.h b/include/linux/dax.h
> index bf6258472e49..a4e97acf60f5 100644
> --- a/include/linux/dax.h
> +++ b/include/linux/dax.h
> @@ -261,6 +261,11 @@ static inline bool dax_mapping(struct address_space *mapping)
>  	return mapping->host && IS_DAX(mapping->host);
>  }
>  
> +static inline int dax_mem2blk_err(int err)
> +{
> +	return (err == -EHWPOISON) ? -EIO : err;
> +}

I think it is worth a comment on this function to indicate where this
helper is *not* used. I.e. it's easy to grep for where the error code is
converted for file I/O errors, but the subtlety of when the
dax_direct_acces() return value is not translated for fault handling
deserves a comment when we wonder why this function exists in a few
years time.



[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [NTFS 3]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [NTFS 3]     [Samba]     [Device Mapper]     [CEPH Development]

  Powered by Linux