+ mm-hmm-change-hmm_vma_fault-to-allow-write-fault-on-page-basis.patch added to -mm tree

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



The patch titled
     Subject: mm/hmm: change hmm_vma_fault() to allow write fault on page basis
has been added to the -mm tree.  Its filename is
     mm-hmm-change-hmm_vma_fault-to-allow-write-fault-on-page-basis.patch

This patch should soon appear at
    http://ozlabs.org/~akpm/mmots/broken-out/mm-hmm-change-hmm_vma_fault-to-allow-write-fault-on-page-basis.patch
and later at
    http://ozlabs.org/~akpm/mmotm/broken-out/mm-hmm-change-hmm_vma_fault-to-allow-write-fault-on-page-basis.patch

Before you just go and hit "reply", please:
   a) Consider who else should be cc'ed
   b) Prefer to cc a suitable mailing list as well
   c) Ideally: find the original patch on the mailing list and do a
      reply-to-all to that, adding suitable additional cc's

*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***

The -mm tree is included into linux-next and is updated
there every 3-4 working days

------------------------------------------------------
From: Jérôme Glisse <jglisse@xxxxxxxxxx>
Subject: mm/hmm: change hmm_vma_fault() to allow write fault on page basis

Change hmm_vma_fault() to not take a global write fault flag for a range
but instead rely on the caller to populate HMM pfns array with proper
fault flag ie HMM_PFN_VALID if the driver wants a read fault for that
address or HMM_PFN_VALID and HMM_PFN_WRITE for write.

Moreover by setting HMM_PFN_DEVICE_PRIVATE the device driver can ask for
device private memory to be migrated back to system memory through page
faults.

This is a more flexible API and it better reflects how a device handles
and reports faults.

Link: http://lkml.kernel.org/r/20180316203552.4155-4-jglisse@xxxxxxxxxx
Signed-off-by: Jérôme Glisse <jglisse@xxxxxxxxxx>
Cc: Evgeny Baskakov <ebaskakov@xxxxxxxxxx>
Cc: Ralph Campbell <rcampbell@xxxxxxxxxx>
Cc: Mark Hairgrove <mhairgrove@xxxxxxxxxx>
Cc: John Hubbard <jhubbard@xxxxxxxxxx>
Signed-off-by: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx>
---

 include/linux/hmm.h |    2 
 mm/hmm.c            |  152 ++++++++++++++++++++++++++++++++----------
 2 files changed, 120 insertions(+), 34 deletions(-)

diff -puN include/linux/hmm.h~mm-hmm-change-hmm_vma_fault-to-allow-write-fault-on-page-basis include/linux/hmm.h
--- a/include/linux/hmm.h~mm-hmm-change-hmm_vma_fault-to-allow-write-fault-on-page-basis
+++ a/include/linux/hmm.h
@@ -317,7 +317,7 @@ bool hmm_vma_range_done(struct hmm_range
  *
  * See the function description in mm/hmm.c for further documentation.
  */
-int hmm_vma_fault(struct hmm_range *range, bool write, bool block);
+int hmm_vma_fault(struct hmm_range *range, bool block);
 #endif /* IS_ENABLED(CONFIG_HMM_MIRROR) */
 
 
diff -puN mm/hmm.c~mm-hmm-change-hmm_vma_fault-to-allow-write-fault-on-page-basis mm/hmm.c
--- a/mm/hmm.c~mm-hmm-change-hmm_vma_fault-to-allow-write-fault-on-page-basis
+++ a/mm/hmm.c
@@ -256,12 +256,10 @@ struct hmm_vma_walk {
 	unsigned long		last;
 	bool			fault;
 	bool			block;
-	bool			write;
 };
 
-static int hmm_vma_do_fault(struct mm_walk *walk,
-			    unsigned long addr,
-			    uint64_t *pfn)
+static int hmm_vma_do_fault(struct mm_walk *walk, unsigned long addr,
+			    bool write_fault, uint64_t *pfn)
 {
 	unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_REMOTE;
 	struct hmm_vma_walk *hmm_vma_walk = walk->private;
@@ -269,7 +267,7 @@ static int hmm_vma_do_fault(struct mm_wa
 	int r;
 
 	flags |= hmm_vma_walk->block ? 0 : FAULT_FLAG_ALLOW_RETRY;
-	flags |= hmm_vma_walk->write ? FAULT_FLAG_WRITE : 0;
+	flags |= write_fault ? FAULT_FLAG_WRITE : 0;
 	r = handle_mm_fault(vma, addr, flags);
 	if (r & VM_FAULT_RETRY)
 		return -EBUSY;
@@ -301,15 +299,17 @@ static int hmm_pfns_bad(unsigned long ad
  * hmm_vma_walk_hole() - handle a range back by no pmd or no pte
  * @start: range virtual start address (inclusive)
  * @end: range virtual end address (exclusive)
+ * @fault: should we fault or not ?
+ * @write_fault: write fault ?
  * @walk: mm_walk structure
  * Returns: 0 on success, -EAGAIN after page fault, or page fault error
  *
  * This is an helper call whenever pmd_none() or pte_none() returns true
  * or when there is no directory covering the range.
  */
-static int hmm_vma_walk_hole(unsigned long addr,
-			     unsigned long end,
-			     struct mm_walk *walk)
+static int hmm_vma_walk_hole_(unsigned long addr, unsigned long end,
+			      bool fault, bool write_fault,
+			      struct mm_walk *walk)
 {
 	struct hmm_vma_walk *hmm_vma_walk = walk->private;
 	struct hmm_range *range = hmm_vma_walk->range;
@@ -320,16 +320,89 @@ static int hmm_vma_walk_hole(unsigned lo
 	i = (addr - range->start) >> PAGE_SHIFT;
 	for (; addr < end; addr += PAGE_SIZE, i++) {
 		pfns[i] = 0;
-		if (hmm_vma_walk->fault) {
+		if (fault || write_fault) {
 			int ret;
 
-			ret = hmm_vma_do_fault(walk, addr, &pfns[i]);
+			ret = hmm_vma_do_fault(walk, addr, write_fault,
+					       &pfns[i]);
 			if (ret != -EAGAIN)
 				return ret;
 		}
 	}
 
-	return hmm_vma_walk->fault ? -EAGAIN : 0;
+	return (fault || write_fault) ? -EAGAIN : 0;
+}
+
+static inline void hmm_pte_need_fault(const struct hmm_vma_walk *hmm_vma_walk,
+				      uint64_t pfns, uint64_t cpu_flags,
+				      bool *fault, bool *write_fault)
+{
+	*fault = *write_fault = false;
+	if (!hmm_vma_walk->fault)
+		return;
+
+	/* We aren't ask to do anything ... */
+	if (!(pfns & HMM_PFN_VALID))
+		return;
+	/* If CPU page table is not valid then we need to fault */
+	*fault = cpu_flags & HMM_PFN_VALID;
+	/* Need to write fault ? */
+	if ((pfns & HMM_PFN_WRITE) && !(cpu_flags & HMM_PFN_WRITE)) {
+		*fault = *write_fault = false;
+		return;
+	}
+	/* Do we fault on device memory ? */
+	if ((pfns & HMM_PFN_DEVICE_PRIVATE) &&
+	    (cpu_flags & HMM_PFN_DEVICE_PRIVATE)) {
+		*write_fault = pfns & HMM_PFN_WRITE;
+		*fault = true;
+	}
+}
+
+static void hmm_range_need_fault(const struct hmm_vma_walk *hmm_vma_walk,
+				 const uint64_t *pfns, unsigned long npages,
+				 uint64_t cpu_flags, bool *fault,
+				 bool *write_fault)
+{
+	unsigned long i;
+
+	if (!hmm_vma_walk->fault) {
+		*fault = *write_fault = false;
+		return;
+	}
+
+	for (i = 0; i < npages; ++i) {
+		hmm_pte_need_fault(hmm_vma_walk, pfns[i], cpu_flags,
+				   fault, write_fault);
+		if ((*fault) || (*write_fault))
+			return;
+	}
+}
+
+static int hmm_vma_walk_hole(unsigned long addr, unsigned long end,
+			     struct mm_walk *walk)
+{
+	struct hmm_vma_walk *hmm_vma_walk = walk->private;
+	struct hmm_range *range = hmm_vma_walk->range;
+	bool fault, write_fault;
+	unsigned long i, npages;
+	uint64_t *pfns;
+
+	i = (addr - range->start) >> PAGE_SHIFT;
+	npages = (end - addr) >> PAGE_SHIFT;
+	pfns = &range->pfns[i];
+	hmm_range_need_fault(hmm_vma_walk, pfns, npages,
+			     0, &fault, &write_fault);
+	return hmm_vma_walk_hole_(addr, end, fault, write_fault, walk);
+}
+
+static inline uint64_t pmd_to_hmm_pfn_flags(pmd_t pmd)
+{
+	if (pmd_protnone(pmd))
+		return 0;
+	return pmd_write(pmd) ? HMM_PFN_VALID |
+				HMM_PFN_WRITE :
+				HMM_PFN_VALID;
 }
 
 static int hmm_vma_handle_pmd(struct mm_walk *walk,
@@ -339,14 +412,17 @@ static int hmm_vma_handle_pmd(struct mm_
 			      pmd_t pmd)
 {
 	struct hmm_vma_walk *hmm_vma_walk = walk->private;
-	unsigned long pfn, i;
-	uint64_t flag = 0;
-
-	if (pmd_protnone(pmd))
-		return hmm_vma_walk_hole(addr, end, walk);
+	unsigned long pfn, npages, i;
+	uint64_t flag = 0, cpu_flags;
+	bool fault, write_fault;
+
+	npages = (end - addr) >> PAGE_SHIFT;
+	cpu_flags = pmd_to_hmm_pfn_flags(pmd);
+	hmm_range_need_fault(hmm_vma_walk, pfns, npages, cpu_flags,
+			     &fault, &write_fault);
 
-	if ((hmm_vma_walk->fault & hmm_vma_walk->write) && !pmd_write(pmd))
-		return hmm_vma_walk_hole(addr, end, walk);
+	if (pmd_protnone(pmd) || fault || write_fault)
+		return hmm_vma_walk_hole_(addr, end, fault, write_fault, walk);
 
 	pfn = pmd_pfn(pmd) + pte_index(addr);
 	flag |= pmd_write(pmd) ? HMM_PFN_WRITE : 0;
@@ -356,19 +432,33 @@ static int hmm_vma_handle_pmd(struct mm_
 	return 0;
 }
 
+static inline uint64_t pte_to_hmm_pfn_flags(pte_t pte)
+{
+	if (pte_none(pte) || !pte_present(pte))
+		return 0;
+	return pte_write(pte) ? HMM_PFN_VALID |
+				HMM_PFN_WRITE :
+				HMM_PFN_VALID;
+}
+
 static int hmm_vma_handle_pte(struct mm_walk *walk, unsigned long addr,
 			      unsigned long end, pmd_t *pmdp, pte_t *ptep,
 			      uint64_t *pfns)
 {
 	struct hmm_vma_walk *hmm_vma_walk = walk->private;
 	struct vm_area_struct *vma = walk->vma;
+	bool fault, write_fault;
+	uint64_t cpu_flags;
 	pte_t pte = *ptep;
 
 	*pfns = 0;
+	cpu_flags = pte_to_hmm_pfn_flags(pte);
+	hmm_pte_need_fault(hmm_vma_walk, *pfns, cpu_flags,
+			   &fault, &write_fault);
 
 	if (pte_none(pte)) {
 		*pfns = 0;
-		if (hmm_vma_walk->fault)
+		if (fault || write_fault)
 			goto fault;
 		return 0;
 	}
@@ -377,7 +467,7 @@ static int hmm_vma_handle_pte(struct mm_
 		swp_entry_t entry = pte_to_swp_entry(pte);
 
 		if (!non_swap_entry(entry)) {
-			if (hmm_vma_walk->fault)
+			if (fault || write_fault)
 				goto fault;
 			return 0;
 		}
@@ -387,21 +477,20 @@ static int hmm_vma_handle_pte(struct mm_
 		 * device and report anything else as error.
 		 */
 		if (is_device_private_entry(entry)) {
+			cpu_flags = HMM_PFN_VALID | HMM_PFN_DEVICE_PRIVATE;
+			cpu_flags |= is_write_device_private_entry(entry) ?
+					HMM_PFN_WRITE : 0;
 			*pfns = hmm_pfn_from_pfn(swp_offset(entry));
-			if (is_write_device_private_entry(entry)) {
-				*pfns |= HMM_PFN_WRITE;
-			} else if ((hmm_vma_walk->fault & hmm_vma_walk->write))
-				goto fault;
 			*pfns |= HMM_PFN_DEVICE_PRIVATE;
 			return 0;
 		}
 
 		if (is_migration_entry(entry)) {
-			if (hmm_vma_walk->fault) {
+			if (fault || write_fault) {
 				pte_unmap(ptep);
 				hmm_vma_walk->last = addr;
 				migration_entry_wait(vma->vm_mm,
-						pmdp, addr);
+						     pmdp, addr);
 				return -EAGAIN;
 			}
 			return 0;
@@ -412,17 +501,16 @@ static int hmm_vma_handle_pte(struct mm_
 		return -EFAULT;
 	}
 
-	if ((hmm_vma_walk->fault & hmm_vma_walk->write) && !pte_write(pte))
+	if (fault || write_fault)
 		goto fault;
 
-	*pfns = hmm_pfn_from_pfn(pte_pfn(pte));
-	*pfns |= pte_write(pte) ? HMM_PFN_WRITE : 0;
+	*pfns = hmm_pfn_from_pfn(pte_pfn(pte)) | cpu_flags;
 	return 0;
 
 fault:
 	pte_unmap(ptep);
 	/* Fault all pages in range if ask for */
-	return hmm_vma_walk_hole(addr, end, walk);
+	return hmm_vma_walk_hole_(addr, end, fault, write_fault, walk);
 }
 
 static int hmm_vma_walk_pmd(pmd_t *pmdp,
@@ -642,7 +730,6 @@ EXPORT_SYMBOL(hmm_vma_range_done);
 /*
  * hmm_vma_fault() - try to fault some address in a virtual address range
  * @range: range being faulted and all needed informations
- * @write: is it a write fault
  * @block: allow blocking on fault (if true it sleeps and do not drop mmap_sem)
  * Returns: 0 success, error otherwise (-EAGAIN means mmap_sem have been drop)
  *
@@ -684,7 +771,7 @@ EXPORT_SYMBOL(hmm_vma_range_done);
  *
  * YOU HAVE BEEN WARNED !
  */
-int hmm_vma_fault(struct hmm_range *range, bool write, bool block)
+int hmm_vma_fault(struct hmm_range *range, bool block)
 {
 	struct vm_area_struct *vma = range->vma;
 	unsigned long start = range->start;
@@ -732,7 +819,6 @@ int hmm_vma_fault(struct hmm_range *rang
 	}
 
 	hmm_vma_walk.fault = true;
-	hmm_vma_walk.write = write;
 	hmm_vma_walk.block = block;
 	hmm_vma_walk.range = range;
 	mm_walk.private = &hmm_vma_walk;
_

Patches currently in -mm which might be from jglisse@xxxxxxxxxx are

mm-hmm-fix-header-file-if-else-endif-maze.patch
mm-hmm-hmm_pfns_bad-was-accessing-wrong-struct.patch
mm-hmm-use-struct-for-hmm_vma_fault-hmm_vma_get_pfns-parameters.patch
mm-hmm-remove-hmm_pfn_read-flag-and-ignore-peculiar-architecture.patch
mm-hmm-use-uint64_t-for-hmm-pfn-instead-of-defining-hmm_pfn_t-to-ulong.patch
mm-hmm-cleanup-special-vma-handling-vm_special.patch
mm-hmm-do-not-differentiate-between-empty-entry-or-missing-directory.patch
mm-hmm-rename-hmm_pfn_device_unaddressable-to-hmm_pfn_device_private.patch
mm-hmm-move-hmm_pfns_clear-closer-to-where-it-is-use.patch
mm-hmm-factor-out-pte-and-pmd-handling-to-simplify-hmm_vma_walk_pmd.patch
mm-hmm-change-hmm_vma_fault-to-allow-write-fault-on-page-basis.patch
mm-hmm-use-device-driver-encoding-for-hmm-pfn.patch

--
To unsubscribe from this list: send the line "unsubscribe mm-commits" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html



[Index of Archives]     [Kernel Archive]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]

  Powered by Linux