The method of establishing mmap mapping for memory allocated by dmabuf through vm_insert_page causes changes in the way dmabuf memory is accounted for, primarily in the following three aspects: (1) The memory usage of dmabuf is accounted for in mm->rss. (2) /proc/self/smaps will account for the memory usage of dmabuf. (3) Memory usage of dmabuf after mmap will be counted in Mapped in /proc/meminfo. By adding a VM_DMABUF_DIO_MAP flag, we address the memory accounting issues in the three aspects mentioned above, ensuring that the memory allocated by dmabuf with direct_io support does not undergo changes in its memory accounting method. Signed-off-by: Lei Liu <liulei.rjpt@xxxxxxxx> --- drivers/dma-buf/heaps/system_heap.c | 2 ++ fs/proc/task_mmu.c | 8 +++++++- include/linux/mm.h | 1 + mm/memory.c | 15 ++++++++++----- mm/rmap.c | 9 +++++---- 5 files changed, 25 insertions(+), 10 deletions(-) diff --git a/drivers/dma-buf/heaps/system_heap.c b/drivers/dma-buf/heaps/system_heap.c index 87547791f9e1..1d6f08b1dc5b 100644 --- a/drivers/dma-buf/heaps/system_heap.c +++ b/drivers/dma-buf/heaps/system_heap.c @@ -200,6 +200,8 @@ static int system_heap_mmap(struct dma_buf *dmabuf, struct vm_area_struct *vma) struct sg_page_iter piter; int ret; + vm_flags_set(vma, VM_DMABUF_DIO); + for_each_sgtable_page(table, &piter, vma->vm_pgoff) { struct page *page = sg_page_iter_page(&piter); diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 71e5039d940d..8070fdd4ac7b 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -784,7 +784,13 @@ static void smap_gather_stats(struct vm_area_struct *vma, /* Invalid start */ if (start >= vma->vm_end) return; - + /* + * The memory of DMABUF needs to be mmaped using vm_insert_page in order to + * support direct_io. It will not with VM_PFNMAP flag, but it does have the + * VM_DMABUF_DIO flag memory will be counted in the process's RSS. + */ + if (vma->vm_flags & VM_DMABUF_DIO) + return; if (vma->vm_file && shmem_mapping(vma->vm_file->f_mapping)) { /* * For shared or readonly shmem mappings we know that all diff --git a/include/linux/mm.h b/include/linux/mm.h index eb7c96d24ac0..86d23f1a9717 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -283,6 +283,7 @@ extern unsigned int kobjsize(const void *objp); #define VM_UFFD_MISSING 0 #endif /* CONFIG_MMU */ #define VM_PFNMAP 0x00000400 /* Page-ranges managed without "struct page", just pure PFN */ +#define VM_DMABUF_DIO 0x00000800 /* Memory accounting for dmabuf support direct_io */ #define VM_UFFD_WP 0x00001000 /* wrprotect pages tracking */ #define VM_LOCKED 0x00002000 diff --git a/mm/memory.c b/mm/memory.c index d10e616d7389..8b126ce0f788 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1003,7 +1003,8 @@ copy_present_ptes(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma VM_WARN_ON_FOLIO(PageAnonExclusive(page), folio); } else { folio_dup_file_rmap_ptes(folio, page, nr); - rss[mm_counter_file(folio)] += nr; + if (likely(!(src_vma->vm_flags & VM_DMABUF_DIO))) + rss[mm_counter_file(folio)] += nr; } if (any_writable) pte = pte_mkwrite(pte, src_vma); @@ -1031,7 +1032,8 @@ copy_present_ptes(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma VM_WARN_ON_FOLIO(PageAnonExclusive(page), folio); } else { folio_dup_file_rmap_pte(folio, page); - rss[mm_counter_file(folio)]++; + if (likely(!(src_vma->vm_flags & VM_DMABUF_DIO))) + rss[mm_counter_file(folio)]++; } copy_pte: @@ -1488,7 +1490,8 @@ static __always_inline void zap_present_folio_ptes(struct mmu_gather *tlb, } if (pte_young(ptent) && likely(vma_has_recency(vma))) folio_mark_accessed(folio); - rss[mm_counter(folio)] -= nr; + if (likely(!(vma->vm_flags & VM_DMABUF_DIO))) + rss[mm_counter(folio)] -= nr; } else { /* We don't need up-to-date accessed/dirty bits. */ clear_full_ptes(mm, addr, pte, nr, tlb->fullmm); @@ -1997,7 +2000,8 @@ static int insert_page_into_pte_locked(struct vm_area_struct *vma, pte_t *pte, return -EBUSY; /* Ok, finally just insert the thing.. */ folio_get(folio); - inc_mm_counter(vma->vm_mm, mm_counter_file(folio)); + if (likely(!(vma->vm_flags & VM_DMABUF_DIO))) + inc_mm_counter(vma->vm_mm, mm_counter_file(folio)); folio_add_file_rmap_pte(folio, page, vma); set_pte_at(vma->vm_mm, addr, pte, mk_pte(page, prot)); return 0; @@ -4641,7 +4645,8 @@ vm_fault_t do_set_pmd(struct vm_fault *vmf, struct page *page) if (write) entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma); - add_mm_counter(vma->vm_mm, mm_counter_file(folio), HPAGE_PMD_NR); + if (likely(!(vma->vm_flags & VM_DMABUF_DIO))) + add_mm_counter(vma->vm_mm, mm_counter_file(folio), HPAGE_PMD_NR); folio_add_file_rmap_pmd(folio, page, vma); /* diff --git a/mm/rmap.c b/mm/rmap.c index e8fc5ecb59b2..17cab358acc1 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -1441,10 +1441,10 @@ static __always_inline void __folio_add_file_rmap(struct folio *folio, VM_WARN_ON_FOLIO(folio_test_anon(folio), folio); nr = __folio_add_rmap(folio, page, nr_pages, level, &nr_pmdmapped); - if (nr_pmdmapped) + if (nr_pmdmapped && !(vma->vm_flags & VM_DMABUF_DIO)) __mod_node_page_state(pgdat, folio_test_swapbacked(folio) ? NR_SHMEM_PMDMAPPED : NR_FILE_PMDMAPPED, nr_pmdmapped); - if (nr) + if (nr && !(vma->vm_flags & VM_DMABUF_DIO)) __lruvec_stat_mod_folio(folio, NR_FILE_MAPPED, nr); /* See comments in folio_add_anon_rmap_*() */ @@ -1545,7 +1545,7 @@ static __always_inline void __folio_remove_rmap(struct folio *folio, /* NR_{FILE/SHMEM}_PMDMAPPED are not maintained per-memcg */ if (folio_test_anon(folio)) __lruvec_stat_mod_folio(folio, NR_ANON_THPS, -nr_pmdmapped); - else + else if (likely(!(vma->vm_flags & VM_DMABUF_DIO))) __mod_node_page_state(pgdat, folio_test_swapbacked(folio) ? NR_SHMEM_PMDMAPPED : NR_FILE_PMDMAPPED, @@ -1553,7 +1553,8 @@ static __always_inline void __folio_remove_rmap(struct folio *folio, } if (nr) { idx = folio_test_anon(folio) ? NR_ANON_MAPPED : NR_FILE_MAPPED; - __lruvec_stat_mod_folio(folio, idx, -nr); + if (likely(!(vma->vm_flags & VM_DMABUF_DIO))) + __lruvec_stat_mod_folio(folio, idx, -nr); /* * Queue anon large folio for deferred split if at least one -- 2.34.1