From: Yonatan Maman <Ymaman@xxxxxxxxxx> hmm_range_fault() by default triggered a page fault on device private when HMM_PFN_REQ_FAULT flag was set. pages, migrating them to RAM. In some cases, such as with RDMA devices, the migration overhead between the device (e.g., GPU) and the CPU, and vice-versa, significantly degrades performance. Thus, enabling Peer-to-Peer (P2P) DMA access for device private page might be crucial for minimizing data transfer overhead. Introduced an API to support P2P DMA for device private pages,includes: - Leveraging the struct pagemap_ops for P2P Page Callbacks. This callback involves mapping the page for P2P DMA and returning the corresponding PCI_P2P page. - Utilizing hmm_range_fault for initializing P2P DMA. The API also adds the HMM_PFN_REQ_TRY_P2P flag option for the hmm_range_fault caller to initialize P2P. If set, hmm_range_fault attempts initializing the P2P connection first, if the owner device supports P2P, using p2p_page. In case of failure or lack of support, hmm_range_fault will continue with the regular flow of migrating the page to RAM. This change does not affect previous use-cases of hmm_range_fault, because both the caller and the page owner must explicitly request and support it to initialize P2P connection. Signed-off-by: Yonatan Maman <Ymaman@xxxxxxxxxx> Signed-off-by: Gal Shalom <GalShalom@xxxxxxxxxx> --- include/linux/hmm.h | 3 ++- include/linux/memremap.h | 8 ++++++ mm/hmm.c | 57 +++++++++++++++++++++++++++++++++------- 3 files changed, 57 insertions(+), 11 deletions(-) diff --git a/include/linux/hmm.h b/include/linux/hmm.h index 62980ca8f3c5..017f22cef893 100644 --- a/include/linux/hmm.h +++ b/include/linux/hmm.h @@ -26,6 +26,7 @@ struct mmu_interval_notifier; * HMM_PFN_DMA_MAPPED - Flag preserved on input-to-output transformation * to mark that page is already DMA mapped + * HMM_PFN_ALLOW_P2P - Allow returning PCI P2PDMA page * * On input: * 0 - Return the current state of the page, do not fault it. @@ -41,7 +42,7 @@ enum hmm_pfn_flags { HMM_PFN_ERROR = 1UL << (BITS_PER_LONG - 3), /* Sticky flag, carried from Input to Output */ + HMM_PFN_ALLOW_P2P = 1UL << (BITS_PER_LONG - 6), HMM_PFN_DMA_MAPPED = 1UL << (BITS_PER_LONG - 7), HMM_PFN_ORDER_SHIFT = (BITS_PER_LONG - 8), diff --git a/include/linux/memremap.h b/include/linux/memremap.h index 3f7143ade32c..cdf5189be5e9 100644 --- a/include/linux/memremap.h +++ b/include/linux/memremap.h @@ -89,6 +89,14 @@ struct dev_pagemap_ops { */ vm_fault_t (*migrate_to_ram)(struct vm_fault *vmf); + /* + * Used for private (un-addressable) device memory only. Return a + * corresponding PFN for a page that can be mapped to device + * (e.g using dma_map_page) + */ + int (*get_dma_pfn_for_device)(struct page *private_page, + unsigned long *dma_pfn); + /* * Handle the memory failure happens on a range of pfns. Notify the * processes who are using these pfns, and try to recover the data on diff --git a/mm/hmm.c b/mm/hmm.c index a852d8337c73..1c080bc00ee8 100644 --- a/mm/hmm.c +++ b/mm/hmm.c @@ -226,6 +226,51 @@ static inline unsigned long pte_to_hmm_pfn_flags(struct hmm_range *range, return pte_write(pte) ? (HMM_PFN_VALID | HMM_PFN_WRITE) : HMM_PFN_VALID; } +static bool hmm_handle_device_private(struct hmm_range *range, + unsigned long pfn_req_flags, + swp_entry_t entry, + unsigned long *hmm_pfn) +{ + struct page *page = pfn_swap_entry_to_page(entry); + struct dev_pagemap *pgmap = page->pgmap; + int ret; + pfn_req_flags &= range->pfn_flags_mask; + pfn_req_flags |= range->default_flags; + + /* + * Don't fault in device private pages owned by the caller, + * just report the PFN. + */ + if (pgmap->owner == range->dev_private_owner) { + *hmm_pfn = swp_offset_pfn(entry); + goto found; + } + + /* + * P2P for supported pages, and according to caller request + * translate the private page to the match P2P page if it fails + * continue with the regular flow + */ + if (pfn_req_flags & HMM_PFN_ALLOW_P2P && + pgmap->ops->get_dma_pfn_for_device) { + ret = pgmap->ops->get_dma_pfn_for_device(page, hmm_pfn); + if (!ret) { + *hmm_pfn |= HMM_PFN_ALLOW_P2P; + goto found; + } + } + + return false; + +found: + *hmm_pfn |= HMM_PFN_VALID; + if (is_writable_device_private_entry(entry)) + *hmm_pfn |= HMM_PFN_WRITE; + return true; +} + static int hmm_vma_handle_pte(struct mm_walk *walk, unsigned long addr, unsigned long end, pmd_t *pmdp, pte_t *ptep, unsigned long *hmm_pfn) @@ -249,17 +294,9 @@ static int hmm_vma_handle_pte(struct mm_walk *walk, unsigned long addr, if (!pte_present(pte)) { swp_entry_t entry = pte_to_swp_entry(pte); - /* - * Don't fault in device private pages owned by the caller, - * just report the PFN. - */ if (is_device_private_entry(entry) && - pfn_swap_entry_to_page(entry)->pgmap->owner == - range->dev_private_owner) { - cpu_flags = HMM_PFN_VALID; - if (is_writable_device_private_entry(entry)) - cpu_flags |= HMM_PFN_WRITE; - *hmm_pfn = (*hmm_pfn & HMM_PFN_DMA_MAPPED) | swp_offset_pfn(entry) | cpu_flags; + hmm_handle_device_private(range, pfn_req_flags, entry, hmm_pfn)) { + *hmm_pfn = *hmm_pfn & HMM_PFN_DMA_MAPPED; return 0; } -- 2.34.1