RE: [PATCH v3] dax: enable dax fault handler to report VM_FAULT_HWPOISON

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Jane Chu wrote:
> When multiple processes mmap() a dax file, then at some point,
> a process issues a 'load' and consumes a hwpoison, the process
> receives a SIGBUS with si_code = BUS_MCEERR_AR and with si_lsb
> set for the poison scope. Soon after, any other process issues
> a 'load' to the poisoned page (that is unmapped from the kernel
> side by memory_failure), it receives a SIGBUS with
> si_code = BUS_ADRERR and without valid si_lsb.
> 
> This is confusing to user, and is different from page fault due
> to poison in RAM memory, also some helpful information is lost.
> 
> Channel dax backend driver's poison detection to the filesystem
> such that instead of reporting VM_FAULT_SIGBUS, it could report
> VM_FAULT_HWPOISON.

I do think it is interesting that this will start returning SIGBUS with
BUS_MCEERR_AR for stores whereas it is only signalled for loads in the
direct consumption path, but I can't think of a scenario where that
would confuse existing software.

> Change from v2:
>   Convert -EHWPOISON to -EIO to prevent EHWPOISON errno from leaking
> out to block read(2) - suggested by Matthew.

For next time these kind of changelog notes belong...

> Signed-off-by: Jane Chu <jane.chu@xxxxxxxxxx>
> ---

...here after the "---".

>  drivers/nvdimm/pmem.c | 2 +-
>  fs/dax.c              | 4 ++--
>  include/linux/mm.h    | 2 ++
>  3 files changed, 5 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
> index ceea55f621cc..46e094e56159 100644
> --- a/drivers/nvdimm/pmem.c
> +++ b/drivers/nvdimm/pmem.c
> @@ -260,7 +260,7 @@ __weak long __pmem_direct_access(struct pmem_device *pmem, pgoff_t pgoff,
>  		long actual_nr;
>  
>  		if (mode != DAX_RECOVERY_WRITE)
> -			return -EIO;
> +			return -EHWPOISON;
>  
>  		/*
>  		 * Set the recovery stride is set to kernel page size because
> diff --git a/fs/dax.c b/fs/dax.c
> index 2ababb89918d..18f9598951f1 100644
> --- a/fs/dax.c
> +++ b/fs/dax.c
> @@ -1498,7 +1498,7 @@ static loff_t dax_iomap_iter(const struct iomap_iter *iomi,
>  
>  		map_len = dax_direct_access(dax_dev, pgoff, PHYS_PFN(size),
>  				DAX_ACCESS, &kaddr, NULL);
> -		if (map_len == -EIO && iov_iter_rw(iter) == WRITE) {
> +		if (map_len == -EHWPOISON && iov_iter_rw(iter) == WRITE) {
>  			map_len = dax_direct_access(dax_dev, pgoff,
>  					PHYS_PFN(size), DAX_RECOVERY_WRITE,
>  					&kaddr, NULL);
> @@ -1506,7 +1506,7 @@ static loff_t dax_iomap_iter(const struct iomap_iter *iomi,
>  				recovery = true;
>  		}
>  		if (map_len < 0) {
> -			ret = map_len;
> +			ret = (map_len == -EHWPOISON) ? -EIO : map_len;

This fixup leaves out several other places where EHWPOISON could leak as
the errno for read(2)/write(2). I think I want to see something like
this:

diff --git a/fs/dax.c b/fs/dax.c
index 2ababb89918d..ec17f9977bcb 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -1077,14 +1077,13 @@ int dax_writeback_mapping_range(struct address_space *mapping,
 }
 EXPORT_SYMBOL_GPL(dax_writeback_mapping_range);
 
-static int dax_iomap_direct_access(const struct iomap *iomap, loff_t pos,
-               size_t size, void **kaddr, pfn_t *pfnp)
+static int __dax_iomap_direct_access(const struct iomap *iomap, loff_t pos,
+                                    size_t size, void **kaddr, pfn_t *pfnp)
 {
        pgoff_t pgoff = dax_iomap_pgoff(iomap, pos);
-       int id, rc = 0;
        long length;
+       int rc = 0;
 
-       id = dax_read_lock();
        length = dax_direct_access(iomap->dax_dev, pgoff, PHYS_PFN(size),
                                   DAX_ACCESS, kaddr, pfnp);
        if (length < 0) {
@@ -1113,6 +1112,36 @@ static int dax_iomap_direct_access(const struct iomap *iomap, loff_t pos,
        return rc;
 }
 
+static int dax_iomap_direct_access(const struct iomap *iomap, loff_t pos,
+                                  size_t size, void **kaddr, pfn_t *pfnp)
+{
+
+       int id;
+
+       id = dax_read_lock();
+       rc = __dax_iomap_direct_access(iomap, pos, size, kaddr, pfnp);
+       dax_read_unlock(id);
+
+       /* don't leak a memory access error code to I/O syscalls */
+       if (rc == -EHWPOISON)
+               return -EIO;
+       return rc;
+}
+
+static int dax_fault_direct_access(const struct iomap *iomap, loff_t pos,
+                                  size_t size, void **kaddr, pfn_t *pfnp)
+{
+
+       int id;
+
+       id = dax_read_lock();
+       rc = __dax_iomap_direct_access(iomap, pos, size, kaddr, pfnp);
+       dax_read_unlock(id);
+
+       /* -EHWPOISON return ok */
+       return rc;
+}
+
 /**
  * dax_iomap_copy_around - Prepare for an unaligned write to a shared/cow page
  * by copying the data before and after the range to be written.
@@ -1682,7 +1711,7 @@ static vm_fault_t dax_fault_iter(struct vm_fault *vmf,
                return pmd ? VM_FAULT_FALLBACK : VM_FAULT_SIGBUS;
        }
 
-       err = dax_iomap_direct_access(iomap, pos, size, &kaddr, &pfn);
+       err = dax_fault_direct_access(iomap, pos, size, &kaddr, &pfn);
        if (err)
                return pmd ? VM_FAULT_FALLBACK : dax_fault_return(err);
 


...and then convert all other callers of dax_direct_access() in that
file such that they are either calling:

dax_iomap_direct_access(): if caller wants EHWPOISON translated to -EIO and is ok
			   with internal locking
dax_fault_direct_access(): if caller wants EHWPOISON passed through and is 
			   ok with internal locking
__dax_iomap_direct_access(): if the caller wants to do its own EHWPOISON
			     translation and locking

>  			break;
>  		}
>  
> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index 1f79667824eb..e4c974587659 100644
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -3217,6 +3217,8 @@ static inline vm_fault_t vmf_error(int err)
>  {
>  	if (err == -ENOMEM)
>  		return VM_FAULT_OOM;
> +	else if (err == -EHWPOISON)
> +		return VM_FAULT_HWPOISON;
>  	return VM_FAULT_SIGBUS;
>  }
>  
> -- 
> 2.18.4
> 





[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [NTFS 3]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [NTFS 3]     [Samba]     [Device Mapper]     [CEPH Development]

  Powered by Linux