+ mm-hmm-support-hugetlbfs-snap-shoting-faulting-and-dma-mapping.patch added to -mm tree

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



The patch titled
     Subject: mm/hmm: support hugetlbfs (snapshotting, faulting and DMA mapping)
has been added to the -mm tree.  Its filename is
     mm-hmm-support-hugetlbfs-snap-shoting-faulting-and-dma-mapping.patch

This patch should soon appear at
    http://ozlabs.org/~akpm/mmots/broken-out/mm-hmm-support-hugetlbfs-snap-shoting-faulting-and-dma-mapping.patch
and later at
    http://ozlabs.org/~akpm/mmotm/broken-out/mm-hmm-support-hugetlbfs-snap-shoting-faulting-and-dma-mapping.patch

Before you just go and hit "reply", please:
   a) Consider who else should be cc'ed
   b) Prefer to cc a suitable mailing list as well
   c) Ideally: find the original patch on the mailing list and do a
      reply-to-all to that, adding suitable additional cc's

*** Remember to use Documentation/process/submit-checklist.rst when testing your code ***

The -mm tree is included into linux-next and is updated
there every 3-4 working days

------------------------------------------------------
From: Jérôme Glisse <jglisse@xxxxxxxxxx>
Subject: mm/hmm: support hugetlbfs (snapshotting, faulting and DMA mapping)

This adds support for hugetlbfs so that HMM user can map mirror range of
virtual address backed by hugetlbfs.  Note that now the range allows user
to optimize DMA mapping of such pages so that we can map a huge page as
one chunk.

Link: http://lkml.kernel.org/r/20190129165428.3931-9-jglisse@xxxxxxxxxx
Signed-off-by: Jérôme Glisse <jglisse@xxxxxxxxxx>
Cc: Ralph Campbell <rcampbell@xxxxxxxxxx>
Cc: John Hubbard <jhubbard@xxxxxxxxxx>
Cc: Christian König <christian.koenig@xxxxxxx>
Cc: Dan Williams <dan.j.williams@xxxxxxxxx>
Cc: Felix Kuehling <Felix.Kuehling@xxxxxxx>
Cc: Jason Gunthorpe <jgg@xxxxxxxxxxxx>
Signed-off-by: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx>
---

 include/linux/hmm.h |   29 +++++++-
 mm/hmm.c            |  141 ++++++++++++++++++++++++++++++++++++------
 2 files changed, 147 insertions(+), 23 deletions(-)

--- a/include/linux/hmm.h~mm-hmm-support-hugetlbfs-snap-shoting-faulting-and-dma-mapping
+++ a/include/linux/hmm.h
@@ -181,11 +181,32 @@ struct hmm_range {
 	const uint64_t		*values;
 	uint64_t		default_flags;
 	uint64_t		pfn_flags_mask;
+	uint8_t			page_shift;
 	uint8_t			pfn_shift;
 	bool			valid;
 };
 
 /*
+ * hmm_range_page_shift() - return the page shift for the range
+ * @range: range being queried
+ * Returns: page shift (page size = 1 << page shift) for the range
+ */
+static inline unsigned hmm_range_page_shift(const struct hmm_range *range)
+{
+	return range->page_shift;
+}
+
+/*
+ * hmm_range_page_size() - return the page size for the range
+ * @range: range being queried
+ * Returns: page size for the range in bytes
+ */
+static inline unsigned long hmm_range_page_size(const struct hmm_range *range)
+{
+	return 1UL << hmm_range_page_shift(range);
+}
+
+/*
  * hmm_range_wait_until_valid() - wait for range to be valid
  * @range: range affected by invalidation to wait on
  * @timeout: time out for wait in ms (ie abort wait after that period of time)
@@ -438,7 +459,7 @@ void hmm_mirror_unregister(struct hmm_mi
  *          struct hmm_range range;
  *          ...
  *
- *          ret = hmm_range_register(&range, mm, start, end);
+ *          ret = hmm_range_register(&range, mm, start, end, page_shift);
  *          if (ret)
  *              return ret;
  *
@@ -498,7 +519,8 @@ void hmm_mirror_unregister(struct hmm_mi
 int hmm_range_register(struct hmm_range *range,
 		       struct mm_struct *mm,
 		       unsigned long start,
-		       unsigned long end);
+		       unsigned long end,
+		       unsigned page_shift);
 void hmm_range_unregister(struct hmm_range *range);
 long hmm_range_snapshot(struct hmm_range *range);
 long hmm_range_fault(struct hmm_range *range, bool block);
@@ -538,7 +560,8 @@ static inline int hmm_vma_fault(struct h
 	range->pfn_flags_mask = -1UL;
 
 	ret = hmm_range_register(range, range->vma->vm_mm,
-				 range->start, range->end);
+				 range->start, range->end,
+				 PAGE_SHIFT);
 	if (ret)
 		return (int)ret;
 
--- a/mm/hmm.c~mm-hmm-support-hugetlbfs-snap-shoting-faulting-and-dma-mapping
+++ a/mm/hmm.c
@@ -396,11 +396,13 @@ static int hmm_vma_walk_hole_(unsigned l
 	struct hmm_vma_walk *hmm_vma_walk = walk->private;
 	struct hmm_range *range = hmm_vma_walk->range;
 	uint64_t *pfns = range->pfns;
-	unsigned long i;
+	unsigned long i, page_size;
 
 	hmm_vma_walk->last = addr;
-	i = (addr - range->start) >> PAGE_SHIFT;
-	for (; addr < end; addr += PAGE_SIZE, i++) {
+	page_size = 1UL << range->page_shift;
+	i = (addr - range->start) >> range->page_shift;
+
+	for (; addr < end; addr += page_size, i++) {
 		pfns[i] = range->values[HMM_PFN_NONE];
 		if (fault || write_fault) {
 			int ret;
@@ -712,6 +714,69 @@ again:
 	return 0;
 }
 
+static int hmm_vma_walk_hugetlb_entry(pte_t *pte, unsigned long hmask,
+				      unsigned long start, unsigned long end,
+				      struct mm_walk *walk)
+{
+#ifdef CONFIG_HUGETLB_PAGE
+	unsigned long addr = start, i, pfn, mask, size, pfn_inc;
+	struct hmm_vma_walk *hmm_vma_walk = walk->private;
+	struct hmm_range *range = hmm_vma_walk->range;
+	struct vm_area_struct *vma = walk->vma;
+	struct hstate *h = hstate_vma(vma);
+	uint64_t orig_pfn, cpu_flags;
+	bool fault, write_fault;
+	spinlock_t *ptl;
+	pte_t entry;
+	int ret = 0;
+
+	size = 1UL << huge_page_shift(h);
+	mask = size - 1;
+	if (range->page_shift != PAGE_SHIFT) {
+		/* Make sure we are looking at full page. */
+		if (start & mask)
+			return -EINVAL;
+		if (end < (start + size))
+			return -EINVAL;
+		pfn_inc = size >> PAGE_SHIFT;
+	} else {
+		pfn_inc = 1;
+		size = PAGE_SIZE;
+	}
+
+
+	ptl = huge_pte_lock(hstate_vma(walk->vma), walk->mm, pte);
+	entry = huge_ptep_get(pte);
+
+	i = (start - range->start) >> range->page_shift;
+	orig_pfn = range->pfns[i];
+	range->pfns[i] = range->values[HMM_PFN_NONE];
+	cpu_flags = pte_to_hmm_pfn_flags(range, entry);
+	fault = write_fault = false;
+	hmm_pte_need_fault(hmm_vma_walk, orig_pfn, cpu_flags,
+			   &fault, &write_fault);
+	if (fault || write_fault) {
+		ret = -ENOENT;
+		goto unlock;
+	}
+
+	pfn = pte_pfn(entry) + (start & mask);
+	for (; addr < end; addr += size, i++, pfn += pfn_inc)
+		range->pfns[i] = hmm_pfn_from_pfn(range, pfn) | cpu_flags;
+	hmm_vma_walk->last = end;
+
+unlock:
+	spin_unlock(ptl);
+
+	if (ret == -ENOENT)
+		return hmm_vma_walk_hole_(addr, end, fault, write_fault, walk);
+
+	return ret;
+#else /* CONFIG_HUGETLB_PAGE */
+	return -EINVAL;
+#endif
+}
+
 static void hmm_pfns_clear(struct hmm_range *range,
 			   uint64_t *pfns,
 			   unsigned long addr,
@@ -735,6 +800,7 @@ static void hmm_pfns_special(struct hmm_
  * @mm: the mm struct for the range of virtual address
  * @start: start virtual address (inclusive)
  * @end: end virtual address (exclusive)
+ * @page_shift: expect page shift for the range
  * Returns 0 on success, -EFAULT if the address space is no longer valid
  *
  * Track updates to the CPU page table see include/linux/hmm.h
@@ -742,15 +808,22 @@ static void hmm_pfns_special(struct hmm_
 int hmm_range_register(struct hmm_range *range,
 		       struct mm_struct *mm,
 		       unsigned long start,
-		       unsigned long end)
+		       unsigned long end,
+		       unsigned page_shift)
 {
-	range->start = start & PAGE_MASK;
-	range->end = end & PAGE_MASK;
+	unsigned long mask = ((1UL << page_shift) - 1UL);
+
 	range->valid = false;
 	range->hmm = NULL;
 
-	if (range->start >= range->end)
+	if ((start & mask) || (end & mask))
 		return -EINVAL;
+	if (start >= end)
+		return -EINVAL;
+
+	range->page_shift = page_shift;
+	range->start = start;
+	range->end = end;
 
 	range->hmm = hmm_register(mm);
 	if (!range->hmm)
@@ -818,6 +891,7 @@ EXPORT_SYMBOL(hmm_range_unregister);
  */
 long hmm_range_snapshot(struct hmm_range *range)
 {
+	const unsigned long device_vma = VM_IO | VM_PFNMAP | VM_MIXEDMAP;
 	unsigned long start = range->start, end;
 	struct hmm_vma_walk hmm_vma_walk;
 	struct hmm *hmm = range->hmm;
@@ -834,15 +908,26 @@ long hmm_range_snapshot(struct hmm_range
 			return -EAGAIN;
 
 		vma = find_vma(hmm->mm, start);
-		if (vma == NULL || (vma->vm_flags & VM_SPECIAL))
+		if (vma == NULL || (vma->vm_flags & device_vma))
 			return -EFAULT;
 
-		/* FIXME support hugetlb fs/dax */
-		if (is_vm_hugetlb_page(vma) || vma_is_dax(vma)) {
+		/* FIXME support dax */
+		if (vma_is_dax(vma)) {
 			hmm_pfns_special(range);
 			return -EINVAL;
 		}
 
+		if (is_vm_hugetlb_page(vma)) {
+			struct hstate *h = hstate_vma(vma);
+
+			if (huge_page_shift(h) != range->page_shift &&
+			    range->page_shift != PAGE_SHIFT)
+				return -EINVAL;
+		} else {
+			if (range->page_shift != PAGE_SHIFT)
+				return -EINVAL;
+		}
+
 		if (!(vma->vm_flags & VM_READ)) {
 			/*
 			 * If vma do not allow read access, then assume that it
@@ -868,6 +953,7 @@ long hmm_range_snapshot(struct hmm_range
 		mm_walk.hugetlb_entry = NULL;
 		mm_walk.pmd_entry = hmm_vma_walk_pmd;
 		mm_walk.pte_hole = hmm_vma_walk_hole;
+		mm_walk.hugetlb_entry = hmm_vma_walk_hugetlb_entry;
 
 		walk_page_range(start, end, &mm_walk);
 		start = end;
@@ -909,6 +995,7 @@ EXPORT_SYMBOL(hmm_range_snapshot);
  */
 long hmm_range_fault(struct hmm_range *range, bool block)
 {
+	const unsigned long device_vma = VM_IO | VM_PFNMAP | VM_MIXEDMAP;
 	unsigned long start = range->start, end;
 	struct hmm_vma_walk hmm_vma_walk;
 	struct hmm *hmm = range->hmm;
@@ -928,15 +1015,26 @@ long hmm_range_fault(struct hmm_range *r
 		}
 
 		vma = find_vma(hmm->mm, start);
-		if (vma == NULL || (vma->vm_flags & VM_SPECIAL))
+		if (vma == NULL || (vma->vm_flags & device_vma))
 			return -EFAULT;
 
-		/* FIXME support hugetlb fs/dax */
-		if (is_vm_hugetlb_page(vma) || vma_is_dax(vma)) {
+		/* FIXME support dax */
+		if (vma_is_dax(vma)) {
 			hmm_pfns_special(range);
 			return -EINVAL;
 		}
 
+		if (is_vm_hugetlb_page(vma)) {
+			struct hstate *h = hstate_vma(vma);
+
+			if (huge_page_shift(h) != range->page_shift &&
+			    range->page_shift != PAGE_SHIFT)
+				return -EINVAL;
+		} else {
+			if (range->page_shift != PAGE_SHIFT)
+				return -EINVAL;
+		}
+
 		if (!(vma->vm_flags & VM_READ)) {
 			/*
 			 * If vma do not allow read access, then assume that it
@@ -963,6 +1061,7 @@ long hmm_range_fault(struct hmm_range *r
 		mm_walk.hugetlb_entry = NULL;
 		mm_walk.pmd_entry = hmm_vma_walk_pmd;
 		mm_walk.pte_hole = hmm_vma_walk_hole;
+		mm_walk.hugetlb_entry = hmm_vma_walk_hugetlb_entry;
 
 		do {
 			ret = walk_page_range(start, end, &mm_walk);
@@ -1003,14 +1102,15 @@ long hmm_range_dma_map(struct hmm_range
 		       dma_addr_t *daddrs,
 		       bool block)
 {
-	unsigned long i, npages, mapped;
+	unsigned long i, npages, mapped, page_size;
 	long ret;
 
 	ret = hmm_range_fault(range, block);
 	if (ret <= 0)
 		return ret ? ret : -EBUSY;
 
-	npages = (range->end - range->start) >> PAGE_SHIFT;
+	page_size = hmm_range_page_size(range);
+	npages = (range->end - range->start) >> range->page_shift;
 	for (i = 0, mapped = 0; i < npages; ++i) {
 		enum dma_data_direction dir = DMA_FROM_DEVICE;
 		struct page *page;
@@ -1039,7 +1139,7 @@ long hmm_range_dma_map(struct hmm_range
 		if (range->pfns[i] & range->values[HMM_PFN_WRITE])
 			dir = DMA_BIDIRECTIONAL;
 
-		daddrs[i] = dma_map_page(device, page, 0, PAGE_SIZE, dir);
+		daddrs[i] = dma_map_page(device, page, 0, page_size, dir);
 		if (dma_mapping_error(device, daddrs[i])) {
 			ret = -EFAULT;
 			goto unmap;
@@ -1066,7 +1166,7 @@ unmap:
 		if (range->pfns[i] & range->values[HMM_PFN_WRITE])
 			dir = DMA_BIDIRECTIONAL;
 
-		dma_unmap_page(device, daddrs[i], PAGE_SIZE, dir);
+		dma_unmap_page(device, daddrs[i], page_size, dir);
 		mapped--;
 	}
 
@@ -1094,7 +1194,7 @@ long hmm_range_dma_unmap(struct hmm_rang
 			 dma_addr_t *daddrs,
 			 bool dirty)
 {
-	unsigned long i, npages;
+	unsigned long i, npages, page_size;
 	long cpages = 0;
 
 	/* Sanity check. */
@@ -1105,7 +1205,8 @@ long hmm_range_dma_unmap(struct hmm_rang
 	if (!range->pfns)
 		return -EINVAL;
 
-	npages = (range->end - range->start) >> PAGE_SHIFT;
+	page_size = hmm_range_page_size(range);
+	npages = (range->end - range->start) >> range->page_shift;
 	for (i = 0; i < npages; ++i) {
 		enum dma_data_direction dir = DMA_FROM_DEVICE;
 		struct page *page;
@@ -1127,7 +1228,7 @@ long hmm_range_dma_unmap(struct hmm_rang
 		}
 
 		/* Unmap and clear pfns/dma address */
-		dma_unmap_page(device, daddrs[i], PAGE_SIZE, dir);
+		dma_unmap_page(device, daddrs[i], page_size, dir);
 		range->pfns[i] = range->values[HMM_PFN_NONE];
 		/* FIXME see comments in hmm_vma_dma_map() */
 		daddrs[i] = 0;
_

Patches currently in -mm which might be from jglisse@xxxxxxxxxx are

mm-hmm-use-reference-counting-for-hmm-struct.patch
mm-hmm-do-not-erase-snapshot-when-a-range-is-invalidated.patch
mm-hmm-improve-and-rename-hmm_vma_get_pfns-to-hmm_range_snapshot.patch
mm-hmm-improve-and-rename-hmm_vma_fault-to-hmm_range_fault.patch
mm-hmm-improve-driver-api-to-work-and-wait-over-a-range.patch
mm-hmm-add-default-fault-flags-to-avoid-the-need-to-pre-fill-pfns-arrays.patch
mm-hmm-add-an-helper-function-that-fault-pages-and-map-them-to-a-device.patch
mm-hmm-support-hugetlbfs-snap-shoting-faulting-and-dma-mapping.patch
mm-hmm-allow-to-mirror-vma-of-a-file-on-a-dax-backed-filesystem.patch
mm-hmm-add-helpers-for-driver-to-safely-take-the-mmap_sem.patch
mm-mmu_notifier-contextual-information-for-event-triggering-invalidation-v2.patch
mm-mmu_notifier-contextual-information-for-event-triggering-invalidation-v2-fix.patch




[Index of Archives]     [Kernel Archive]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [Bugtraq]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]

  Powered by Linux