On Sat, 2023-02-11 at 03:39 +0000, Matthew Wilcox (Oracle) wrote: > Add set_ptes(), update_mmu_cache_range(), flush_dcache_folio() > and flush_icache_pages(). > > This is a fairly deep change. The PG_dc_clean flag changes from > being a > per-page bit to being a per-folio bit (which means it cannot always > be set > as we don't know that all pages in this folio were cleaned). The > internal > flush routines are enhanced to take the number of pages to flush. > > Signed-off-by: Matthew Wilcox (Oracle) <willy@xxxxxxxxxxxxx> > --- > arch/arc/include/asm/cacheflush.h | 7 ++- > arch/arc/include/asm/pgtable-bits-arcv2.h | 20 ++++++-- > arch/arc/mm/cache.c | 61 ++++++++++++++------- > -- > arch/arc/mm/tlb.c | 18 ++++--- > 4 files changed, 68 insertions(+), 38 deletions(-) > > diff --git a/arch/arc/include/asm/cacheflush.h > b/arch/arc/include/asm/cacheflush.h > index e201b4b1655a..04f65f588510 100644 > --- a/arch/arc/include/asm/cacheflush.h > +++ b/arch/arc/include/asm/cacheflush.h > @@ -25,17 +25,20 @@ > * in update_mmu_cache() > */ > #define flush_icache_page(vma, page) > +#define flush_icache_pages(vma, page, nr) Maybe just remove these two definitions because general implementation is just no-op? > > void flush_cache_all(void); > > void flush_icache_range(unsigned long kstart, unsigned long kend); > void __sync_icache_dcache(phys_addr_t paddr, unsigned long vaddr, > int len); > -void __inv_icache_page(phys_addr_t paddr, unsigned long vaddr); > -void __flush_dcache_page(phys_addr_t paddr, unsigned long vaddr); > +void __inv_icache_pages(phys_addr_t paddr, unsigned long vaddr, > unsigned nr); > +void __flush_dcache_pages(phys_addr_t paddr, unsigned long vaddr, > unsigned nr); > > #define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1 > > void flush_dcache_page(struct page *page); > +void flush_dcache_folio(struct folio *folio); > +#define flush_dcache_folio flush_dcache_folio > > void dma_cache_wback_inv(phys_addr_t start, unsigned long sz); > void dma_cache_inv(phys_addr_t start, unsigned long sz); > diff --git a/arch/arc/include/asm/pgtable-bits-arcv2.h > b/arch/arc/include/asm/pgtable-bits-arcv2.h > index 6e9f8ca6d6a1..4a1b2ce204c6 100644 > --- a/arch/arc/include/asm/pgtable-bits-arcv2.h > +++ b/arch/arc/include/asm/pgtable-bits-arcv2.h > @@ -100,14 +100,24 @@ static inline pte_t pte_modify(pte_t pte, > pgprot_t newprot) > return __pte((pte_val(pte) & _PAGE_CHG_MASK) | > pgprot_val(newprot)); > } > > -static inline void set_pte_at(struct mm_struct *mm, unsigned long > addr, > - pte_t *ptep, pte_t pteval) > +static inline void set_ptes(struct mm_struct *mm, unsigned long > addr, > + pte_t *ptep, pte_t pte, unsigned int nr) > { > - set_pte(ptep, pteval); > + for (;;) { > + set_pte(ptep, pte); > + if (--nr == 0) > + break; > + ptep++; > + pte_val(pte) += PAGE_SIZE; > + } > } > +#define set_pte_at(mm, addr, ptep, pte) set_ptes(mm, addr, ptep, > pte, 1) > > -void update_mmu_cache(struct vm_area_struct *vma, unsigned long > address, > - pte_t *ptep); > +void update_mmu_cache_range(struct vm_area_struct *vma, unsigned > long address, > + pte_t *ptep, unsigned int nr); > + > +#define update_mmu_cache(vma, addr, ptep) \ > + update_mmu_cache_range(vma, addr, ptep, 1) > > /* > * Encode/decode swap entries and swap PTEs. Swap PTEs are all PTEs > that > diff --git a/arch/arc/mm/cache.c b/arch/arc/mm/cache.c > index 55c6de138eae..3c16ee942a5c 100644 > --- a/arch/arc/mm/cache.c > +++ b/arch/arc/mm/cache.c > @@ -752,17 +752,17 @@ static inline void arc_slc_enable(void) > * There's a corollary case, where kernel READs from a userspace > mapped page. > * If the U-mapping is not congruent to K-mapping, former needs > flushing. > */ > -void flush_dcache_page(struct page *page) > +void flush_dcache_folio(struct folio *folio) > { > struct address_space *mapping; > > if (!cache_is_vipt_aliasing()) { > - clear_bit(PG_dc_clean, &page->flags); > + clear_bit(PG_dc_clean, &folio->flags); > return; > } > > /* don't handle anon pages here */ > - mapping = page_mapping_file(page); > + mapping = folio_flush_mapping(folio); > if (!mapping) > return; > > @@ -771,17 +771,27 @@ void flush_dcache_page(struct page *page) > * Make a note that K-mapping is dirty > */ > if (!mapping_mapped(mapping)) { > - clear_bit(PG_dc_clean, &page->flags); > - } else if (page_mapcount(page)) { > - > + clear_bit(PG_dc_clean, &folio->flags); > + } else if (folio_mapped(folio)) { > /* kernel reading from page with U-mapping */ > - phys_addr_t paddr = (unsigned > long)page_address(page); > - unsigned long vaddr = page->index << PAGE_SHIFT; > + phys_addr_t paddr = (unsigned > long)folio_address(folio); > + unsigned long vaddr = folio_pos(folio); > > + /* > + * vaddr is not actually the virtual address, but is > + * congruent to every user mapping. > + */ > if (addr_not_cache_congruent(paddr, vaddr)) > - __flush_dcache_page(paddr, vaddr); > + __flush_dcache_pages(paddr, vaddr, > + folio_nr_pages(folio) > ); > } > } > +EXPORT_SYMBOL(flush_dcache_folio); > + > +void flush_dcache_page(struct page *page) > +{ > + return flush_dcache_folio(page_folio(page)); > +} I am wondering whether we should add flush_dcache_folio_range() because it's possible just part of folio needs be flush. Thanks. Regards Yin, Fengwei > EXPORT_SYMBOL(flush_dcache_page); > > /* > @@ -921,18 +931,18 @@ void __sync_icache_dcache(phys_addr_t paddr, > unsigned long vaddr, int len) > } > > /* wrapper to compile time eliminate alignment checks in flush loop > */ > -void __inv_icache_page(phys_addr_t paddr, unsigned long vaddr) > +void __inv_icache_pages(phys_addr_t paddr, unsigned long vaddr, > unsigned nr) > { > - __ic_line_inv_vaddr(paddr, vaddr, PAGE_SIZE); > + __ic_line_inv_vaddr(paddr, vaddr, nr * PAGE_SIZE); > } > > /* > * wrapper to clearout kernel or userspace mappings of a page > * For kernel mappings @vaddr == @paddr > */ > -void __flush_dcache_page(phys_addr_t paddr, unsigned long vaddr) > +void __flush_dcache_pages(phys_addr_t paddr, unsigned long vaddr, > unsigned nr) > { > - __dc_line_op(paddr, vaddr & PAGE_MASK, PAGE_SIZE, > OP_FLUSH_N_INV); > + __dc_line_op(paddr, vaddr & PAGE_MASK, nr * PAGE_SIZE, > OP_FLUSH_N_INV); > } > > noinline void flush_cache_all(void) > @@ -962,10 +972,10 @@ void flush_cache_page(struct vm_area_struct > *vma, unsigned long u_vaddr, > > u_vaddr &= PAGE_MASK; > > - __flush_dcache_page(paddr, u_vaddr); > + __flush_dcache_pages(paddr, u_vaddr, 1); > > if (vma->vm_flags & VM_EXEC) > - __inv_icache_page(paddr, u_vaddr); > + __inv_icache_pages(paddr, u_vaddr, 1); > } > > void flush_cache_range(struct vm_area_struct *vma, unsigned long > start, > @@ -978,9 +988,9 @@ void flush_anon_page(struct vm_area_struct *vma, > struct page *page, > unsigned long u_vaddr) > { > /* TBD: do we really need to clear the kernel mapping */ > - __flush_dcache_page((phys_addr_t)page_address(page), > u_vaddr); > - __flush_dcache_page((phys_addr_t)page_address(page), > - (phys_addr_t)page_address(page)); > + __flush_dcache_pages((phys_addr_t)page_address(page), > u_vaddr, 1); > + __flush_dcache_pages((phys_addr_t)page_address(page), > + (phys_addr_t)page_address(page), 1); > > } > > @@ -989,6 +999,8 @@ void flush_anon_page(struct vm_area_struct *vma, > struct page *page, > void copy_user_highpage(struct page *to, struct page *from, > unsigned long u_vaddr, struct vm_area_struct *vma) > { > + struct folio *src = page_folio(from); > + struct folio *dst = page_folio(to); > void *kfrom = kmap_atomic(from); > void *kto = kmap_atomic(to); > int clean_src_k_mappings = 0; > @@ -1005,7 +1017,7 @@ void copy_user_highpage(struct page *to, struct > page *from, > * addr_not_cache_congruent() is 0 > */ > if (page_mapcount(from) && addr_not_cache_congruent(kfrom, > u_vaddr)) { > - __flush_dcache_page((unsigned long)kfrom, u_vaddr); > + __flush_dcache_pages((unsigned long)kfrom, u_vaddr, > 1); > clean_src_k_mappings = 1; > } > > @@ -1019,17 +1031,17 @@ void copy_user_highpage(struct page *to, > struct page *from, > * non copied user pages (e.g. read faults which wire in > pagecache page > * directly). > */ > - clear_bit(PG_dc_clean, &to->flags); > + clear_bit(PG_dc_clean, &dst->flags); > > /* > * if SRC was already usermapped and non-congruent to kernel > mapping > * sync the kernel mapping back to physical page > */ > if (clean_src_k_mappings) { > - __flush_dcache_page((unsigned long)kfrom, (unsigned > long)kfrom); > - set_bit(PG_dc_clean, &from->flags); > + __flush_dcache_pages((unsigned long)kfrom, > + (unsigned long)kfrom, 1); > } else { > - clear_bit(PG_dc_clean, &from->flags); > + clear_bit(PG_dc_clean, &src->flags); > } > > kunmap_atomic(kto); > @@ -1038,8 +1050,9 @@ void copy_user_highpage(struct page *to, struct > page *from, > > void clear_user_page(void *to, unsigned long u_vaddr, struct page > *page) > { > + struct folio *folio = page_folio(page); > clear_page(to); > - clear_bit(PG_dc_clean, &page->flags); > + clear_bit(PG_dc_clean, &folio->flags); > } > EXPORT_SYMBOL(clear_user_page); > > diff --git a/arch/arc/mm/tlb.c b/arch/arc/mm/tlb.c > index 5f71445f26bd..0a996b65bb4e 100644 > --- a/arch/arc/mm/tlb.c > +++ b/arch/arc/mm/tlb.c > @@ -467,8 +467,8 @@ void create_tlb(struct vm_area_struct *vma, > unsigned long vaddr, pte_t *ptep) > * Note that flush (when done) involves both WBACK - so physical > page is > * in sync as well as INV - so any non-congruent aliases don't > remain > */ > -void update_mmu_cache(struct vm_area_struct *vma, unsigned long > vaddr_unaligned, > - pte_t *ptep) > +void update_mmu_cache_range(struct vm_area_struct *vma, > + unsigned long vaddr_unaligned, pte_t *ptep, unsigned > int nr) > { > unsigned long vaddr = vaddr_unaligned & PAGE_MASK; > phys_addr_t paddr = pte_val(*ptep) & PAGE_MASK_PHYS; > @@ -491,15 +491,19 @@ void update_mmu_cache(struct vm_area_struct > *vma, unsigned long vaddr_unaligned, > */ > if ((vma->vm_flags & VM_EXEC) || > addr_not_cache_congruent(paddr, vaddr)) { > - > - int dirty = !test_and_set_bit(PG_dc_clean, &page- > >flags); > + struct folio *folio = page_folio(page); > + int dirty = !test_and_set_bit(PG_dc_clean, &folio- > >flags); > if (dirty) { > + unsigned long offset = offset_in_folio(folio, > paddr); > + nr = folio_nr_pages(folio); > + paddr -= offset; > + vaddr -= offset; > /* wback + inv dcache lines (K-mapping) */ > - __flush_dcache_page(paddr, paddr); > + __flush_dcache_pages(paddr, paddr, nr); > > /* invalidate any existing icache lines (U- > mapping) */ > if (vma->vm_flags & VM_EXEC) > - __inv_icache_page(paddr, vaddr); > + __inv_icache_pages(paddr, vaddr, nr); > } > } > } > @@ -531,7 +535,7 @@ void update_mmu_cache_pmd(struct vm_area_struct > *vma, unsigned long addr, > pmd_t *pmd) > { > pte_t pte = __pte(pmd_val(*pmd)); > - update_mmu_cache(vma, addr, &pte); > + update_mmu_cache_range(vma, addr, &pte, HPAGE_PMD_NR); > } > > void local_flush_pmd_tlb_range(struct vm_area_struct *vma, unsigned > long start,