For
DAX, dax_direct_access() could switch from returning pfns to returning
the associated @pgmap and @pgmap_offset. Those changes are saved for
follow-on work.
Cc: Matthew Wilcox <willy@xxxxxxxxxxxxx>
Cc: Jan Kara <jack@xxxxxxx>
Cc: "Darrick J. Wong" <djwong@xxxxxxxxxx>
Cc: Christoph Hellwig <hch@xxxxxx>
Cc: John Hubbard <jhubbard@xxxxxxxxxx>
Cc: Alistair Popple <apopple@xxxxxxxxxx>
Cc: Felix Kuehling <Felix.Kuehling@xxxxxxx>
Cc: Alex Deucher <alexander.deucher@xxxxxxx>
Cc: "Christian König" <christian.koenig@xxxxxxx>
Cc: "Pan, Xinhui" <Xinhui.Pan@xxxxxxx>
Cc: David Airlie <airlied@xxxxxxxx>
Cc: Daniel Vetter <daniel@xxxxxxxx>
Cc: Ben Skeggs <bskeggs@xxxxxxxxxx>
Cc: Karol Herbst <kherbst@xxxxxxxxxx>
Cc: Lyude Paul <lyude@xxxxxxxxxx>
Cc: "Jérôme Glisse" <jglisse@xxxxxxxxxx>
Suggested-by: Jason Gunthorpe <jgg@xxxxxxxxxx>
Signed-off-by: Dan Williams <dan.j.williams@xxxxxxxxx>
---
This builds on the dax reference counting reworks in mm-unstable.
arch/powerpc/kvm/book3s_hv_uvmem.c | 11 ++--
drivers/dax/mapping.c | 10 +++
drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 14 +++--
drivers/gpu/drm/nouveau/nouveau_dmem.c | 13 +++-
include/linux/memremap.h | 35 ++++++++---
lib/test_hmm.c | 9 +++
mm/memremap.c | 92 ++++++++++++------------------
7 files changed, 106 insertions(+), 78 deletions(-)
diff --git a/arch/powerpc/kvm/book3s_hv_uvmem.c b/arch/powerpc/kvm/book3s_hv_uvmem.c
index 884ec112ad43..2ea59396f608 100644
--- a/arch/powerpc/kvm/book3s_hv_uvmem.c
+++ b/arch/powerpc/kvm/book3s_hv_uvmem.c
@@ -689,12 +689,14 @@ unsigned long kvmppc_h_svm_init_abort(struct kvm *kvm)
*/
static struct page *kvmppc_uvmem_get_page(unsigned long gpa, struct kvm *kvm)
{
- struct page *dpage = NULL;
+ struct dev_pagemap *pgmap = &kvmppc_uvmem_pgmap;
unsigned long bit, uvmem_pfn;
struct kvmppc_uvmem_page_pvt *pvt;
unsigned long pfn_last, pfn_first;
+ struct folio *folio;
+ struct page *dpage;
- pfn_first = kvmppc_uvmem_pgmap.range.start >> PAGE_SHIFT;
+ pfn_first = pgmap->range.start >> PAGE_SHIFT;
pfn_last = pfn_first +
(range_len(&kvmppc_uvmem_pgmap.range) >> PAGE_SHIFT);
@@ -716,9 +718,10 @@ static struct page *kvmppc_uvmem_get_page(unsigned long gpa, struct kvm *kvm)
pvt->gpa = gpa;
pvt->kvm = kvm;
- dpage = pfn_to_page(uvmem_pfn);
+ folio = pgmap_request_folio(pgmap,
+ pfn_to_pgmap_offset(pgmap, uvmem_pfn), 0);
+ dpage = &folio->page;
dpage->zone_device_data = pvt;
- pgmap_request_folios(dpage->pgmap, page_folio(dpage), 1);
lock_page(dpage);
return dpage;
out_clear:
diff --git a/drivers/dax/mapping.c b/drivers/dax/mapping.c
index ca06f2515644..b885c75e2dfb 100644
--- a/drivers/dax/mapping.c
+++ b/drivers/dax/mapping.c
@@ -376,8 +376,14 @@ static vm_fault_t dax_associate_entry(void *entry,
if (flags & DAX_COW) {
dax_mapping_set_cow(folio);
} else {
+ struct dev_pagemap *pgmap = folio_pgmap(folio);
+ unsigned long pfn = page_to_pfn(&folio->page);
+
WARN_ON_ONCE(folio->mapping);
- if (!pgmap_request_folios(folio_pgmap(folio), folio, 1))
+ if (folio !=
+ pgmap_request_folio(pgmap,
+ pfn_to_pgmap_offset(pgmap, pfn),
+ folio_order(folio)))
return VM_FAULT_SIGBUS;
folio->mapping = mapping;
folio->index = index + i;
@@ -691,7 +697,7 @@ static struct page *dax_zap_pages(struct xa_state *xas, void *entry)
dax_for_each_folio(entry, folio, i) {
if (zap)
- pgmap_release_folios(folio, 1);
+ folio_put(folio);
if (!ret && !dax_folio_idle(folio))
ret = folio_page(folio, 0);
}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
index 8cf97060122b..1cecee358a9e 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
@@ -215,15 +215,17 @@ svm_migrate_addr_to_pfn(struct amdgpu_device *adev, unsigned long addr)
return (addr + adev->kfd.dev->pgmap.range.start) >> PAGE_SHIFT;
}
-static void
-svm_migrate_get_vram_page(struct svm_range *prange, unsigned long pfn)
+static void svm_migrate_get_vram_page(struct dev_pagemap *pgmap,
+ struct svm_range *prange,
+ unsigned long pfn)
{
+ struct folio *folio;
struct page *page;
- page = pfn_to_page(pfn);
+ folio = pgmap_request_folio(pgmap, pfn_to_pgmap_offset(pgmap, pfn), 0);
+ page = &folio->page;
svm_range_bo_ref(prange->svm_bo);
page->zone_device_data = prange->svm_bo;
- pgmap_request_folios(page->pgmap, page_folio(page), 1);
lock_page(page);
}
@@ -298,6 +300,7 @@ svm_migrate_copy_to_vram(struct amdgpu_device *adev, struct svm_range *prange,
struct migrate_vma *migrate, struct dma_fence **mfence,
dma_addr_t *scratch)
{
+ struct kfd_dev *kfddev = adev->kfd.dev;
uint64_t npages = migrate->npages;
struct device *dev = adev->dev;
struct amdgpu_res_cursor cursor;
@@ -327,7 +330,8 @@ svm_migrate_copy_to_vram(struct amdgpu_device *adev, struct svm_range *prange,
if (spage && !is_zone_device_page(spage)) {
dst[i] = cursor.start + (j << PAGE_SHIFT);
migrate->dst[i] = svm_migrate_addr_to_pfn(adev, dst[i]);
- svm_migrate_get_vram_page(prange, migrate->dst[i]);
+ svm_migrate_get_vram_page(&kfddev->pgmap, prange,
+ migrate->dst[i]);
migrate->dst[i] = migrate_pfn(migrate->dst[i]);
src[i] = dma_map_page(dev, spage, 0, PAGE_SIZE,
DMA_TO_DEVICE);
diff --git a/drivers/gpu/drm/nouveau/nouveau_dmem.c b/drivers/gpu/drm/nouveau/nouveau_dmem.c
index 1482533c7ca0..24208a1d7441 100644
--- a/drivers/gpu/drm/nouveau/nouveau_dmem.c
+++ b/drivers/gpu/drm/nouveau/nouveau_dmem.c
@@ -307,6 +307,9 @@ static struct page *
nouveau_dmem_page_alloc_locked(struct nouveau_drm *drm)
{
struct nouveau_dmem_chunk *chunk;
+ struct dev_pagemap *pgmap;
+ struct folio *folio;
+ unsigned long pfn;
struct page *page = NULL;
int ret;
@@ -316,17 +319,21 @@ nouveau_dmem_page_alloc_locked(struct nouveau_drm *drm)
drm->dmem->free_pages = page->zone_device_data;
chunk = nouveau_page_to_chunk(page);
chunk->callocated++;
+ pfn = page_to_pfn(page);
spin_unlock(&drm->dmem->lock);
} else {
spin_unlock(&drm->dmem->lock);
ret = nouveau_dmem_chunk_alloc(drm, &page);
if (ret)
return NULL;
+ chunk = nouveau_page_to_chunk(page);
+ pfn = page_to_pfn(page);
}
- pgmap_request_folios(page->pgmap, page_folio(page), 1);
- lock_page(page);
- return page;
+ pgmap = &chunk->pagemap;
+ folio = pgmap_request_folio(pgmap, pfn_to_pgmap_offset(pgmap, pfn), 0);
+ lock_page(&folio->page);
+ return &folio->page;
}
static void
diff --git a/include/linux/memremap.h b/include/linux/memremap.h
index ddb196ae0696..f11f827883bb 100644
--- a/include/linux/memremap.h
+++ b/include/linux/memremap.h
@@ -139,6 +139,28 @@ struct dev_pagemap {
};
};
+/*
+ * Do not use this in new code, this is a transitional helper on the
+ * path to convert all ZONE_DEVICE users to operate in terms of pgmap
+ * offsets rather than pfn and pfn_to_page() to put ZONE_DEVICE pages
+ * into use.
+ */
+static inline pgoff_t pfn_to_pgmap_offset(struct dev_pagemap *pgmap, unsigned long pfn)
+{
+ u64 phys = PFN_PHYS(pfn), sum = 0;
+ int i;
+
+ for (i = 0; i < pgmap->nr_range; i++) {
+ struct range *range = &pgmap->ranges[i];
+
+ if (phys >= range->start && phys <= range->end)
+ return PHYS_PFN(phys - range->start + sum);
+ sum += range_len(range);
+ }
+
+ return -1;
+}
+
static inline bool pgmap_has_memory_failure(struct dev_pagemap *pgmap)
{
return pgmap->ops && pgmap->ops->memory_failure;
@@ -193,9 +215,8 @@ void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap);
void devm_memunmap_pages(struct device *dev, struct dev_pagemap *pgmap);
struct dev_pagemap *get_dev_pagemap(unsigned long pfn,
struct dev_pagemap *pgmap);
-bool pgmap_request_folios(struct dev_pagemap *pgmap, struct folio *folio,
- int nr_folios);
-void pgmap_release_folios(struct folio *folio, int nr_folios);
+struct folio *pgmap_request_folio(struct dev_pagemap *pgmap,
+ pgoff_t pgmap_offset, int order);
bool pgmap_pfn_valid(struct dev_pagemap *pgmap, unsigned long pfn);
unsigned long vmem_altmap_offset(struct vmem_altmap *altmap);
@@ -231,16 +252,12 @@ static inline struct dev_pagemap *get_dev_pagemap(unsigned long pfn,
return NULL;
}
-static inline bool pgmap_request_folios(struct dev_pagemap *pgmap,
- struct folio *folio, int nr_folios)
+static inline struct folio *pgmap_request_folio(struct dev_pagemap *pgmap,
+ pgoff_t pgmap_offset, int order)
{
return false;
}
-static inline void pgmap_release_folios(struct folio *folio, int nr_folios)
-{
-}
-
static inline bool pgmap_pfn_valid(struct dev_pagemap *pgmap, unsigned long pfn)
{
return false;
diff --git a/lib/test_hmm.c b/lib/test_hmm.c
index e4f7219ae3bb..1f7e00ae62d5 100644
--- a/lib/test_hmm.c
+++ b/lib/test_hmm.c
@@ -605,8 +605,11 @@ static int dmirror_allocate_chunk(struct dmirror_device *mdevice,
static struct page *dmirror_devmem_alloc_page(struct dmirror_device *mdevice)
{
+ struct dev_pagemap *pgmap;
struct page *dpage = NULL;
struct page *rpage = NULL;
+ struct folio *folio;
+ unsigned long pfn;
/*
* For ZONE_DEVICE private type, this is a fake device so we allocate
@@ -632,7 +635,11 @@ static struct page *dmirror_devmem_alloc_page(struct dmirror_device *mdevice)
goto error;
}
- pgmap_request_folios(dpage->pgmap, page_folio(dpage), 1);
+ /* FIXME: Rework allocator to be pgmap offset based */
+ pgmap = dpage->pgmap;
+ pfn = page_to_pfn(dpage);
+ folio = pgmap_request_folio(pgmap, pfn_to_pgmap_offset(pgmap, pfn), 0);
+ WARN_ON_ONCE(dpage != &folio->page);
lock_page(dpage);
dpage->zone_device_data = rpage;
return dpage;
diff --git a/mm/memremap.c b/mm/memremap.c
index 02b796749b72..09b20a337db9 100644
--- a/mm/memremap.c
+++ b/mm/memremap.c
@@ -492,76 +492,60 @@ void free_zone_device_page(struct page *page)
put_dev_pagemap(page->pgmap);
}
-static __maybe_unused bool folio_span_valid(struct dev_pagemap *pgmap,
- struct folio *folio,
- int nr_folios)
+static unsigned long pgmap_offset_to_pfn(struct dev_pagemap *pgmap,
+ pgoff_t pgmap_offset)
{
- unsigned long pfn_start, pfn_end;
-
- pfn_start = page_to_pfn(folio_page(folio, 0));
- pfn_end = pfn_start + (1 << folio_order(folio)) * nr_folios - 1;
+ u64 sum = 0, offset = PFN_PHYS(pgmap_offset);
+ int i;
- if (pgmap != xa_load(&pgmap_array, pfn_start))
- return false;
+ for (i = 0; i < pgmap->nr_range; i++) {
+ struct range *range = &pgmap->ranges[i];
- if (pfn_end > pfn_start && pgmap != xa_load(&pgmap_array, pfn_end))
- return false;
+ if (offset >= sum && offset < (sum + range_len(range)))
+ return PHYS_PFN(range->start + offset - sum);
+ sum += range_len(range);
+ }
- return true;
+ return -1;
}
/**
- * pgmap_request_folios - activate an contiguous span of folios in @pgmap
- * @pgmap: host page map for the folio array
- * @folio: start of the folio list, all subsequent folios have same folio_size()
+ * pgmap_request_folio - activate a folio of a given order in @pgmap
+ * @pgmap: host page map of the folio to activate
+ * @pgmap_offset: page-offset into the pgmap to request
+ * @order: expected folio_order() of the folio
*
* Caller is responsible for @pgmap remaining live for the duration of
- * this call. Caller is also responsible for not racing requests for the
- * same folios.
+ * this call. The order (size) of the folios in the pgmap are assumed
+ * stable before this call.
*/
-bool pgmap_request_folios(struct dev_pagemap *pgmap, struct folio *folio,
- int nr_folios)
+struct folio *pgmap_request_folio(struct dev_pagemap *pgmap,
+ pgoff_t pgmap_offset, int order)
{
- struct folio *iter;
- int i;
+ unsigned long pfn = pgmap_offset_to_pfn(pgmap, pgmap_offset);
+ struct page *page = pfn_to_page(pfn);
+ struct folio *folio;
+ int v;
- /*
- * All of the WARNs below are for catching bugs in future
- * development that changes the assumptions of:
- * 1/ uniform folios in @pgmap
- * 2/ @pgmap death does not race this routine.
- */
- VM_WARN_ON_ONCE(!folio_span_valid(pgmap, folio, nr_folios));
+ if (WARN_ON_ONCE(page->pgmap != pgmap))
+ return NULL;
if (WARN_ON_ONCE(percpu_ref_is_dying(&pgmap->ref)))
- return false;
+ return NULL;
- for (iter = folio_next(folio), i = 1; i < nr_folios;
- iter = folio_next(folio), i++)
- if (WARN_ON_ONCE(folio_order(iter) != folio_order(folio)))
- return false;
+ folio = page_folio(page);
+ if (WARN_ON_ONCE(folio_order(folio) != order))
+ return NULL;
- for (iter = folio, i = 0; i < nr_folios; iter = folio_next(iter), i++) {
- folio_ref_inc(iter);
- if (folio_ref_count(iter) == 1)
- percpu_ref_tryget(&pgmap->ref);
- }
-
- return true;
-}
-EXPORT_SYMBOL_GPL(pgmap_request_folios);
+ v = folio_ref_inc_return(folio);
+ if (v > 1)
+ return folio;
-/*
- * A symmetric helper to undo the page references acquired by
- * pgmap_request_folios(), but the caller can also just arrange
- * folio_put() on all the folios it acquired previously for the same
- * effect.
- */
-void pgmap_release_folios(struct folio *folio, int nr_folios)
-{
- struct folio *iter;
- int i;
+ if (WARN_ON_ONCE(!percpu_ref_tryget(&pgmap->ref))) {
+ folio_put(folio);
+ return NULL;
+ }
- for (iter = folio, i = 0; i < nr_folios; iter = folio_next(folio), i++)
- folio_put(iter);
+ return folio;
}
+EXPORT_SYMBOL_GPL(pgmap_request_folio);