On 17.07.2018 14:44, Janosch Frank wrote: > Let's allow huge pmd linking when enabled through the > KVM_CAP_S390_HPAGE_1M capability. Also we can now restrict gmap > invalidation and notification to the cases where the capability has > been activated and save some cycles when that's not the case. > > Signed-off-by: Janosch Frank <frankja@xxxxxxxxxxxxx> > --- > arch/s390/include/asm/mmu.h | 2 ++ > arch/s390/include/asm/mmu_context.h | 1 + > arch/s390/mm/gmap.c | 9 ++++++--- > arch/s390/mm/pgtable.c | 12 ++++++------ > 4 files changed, 15 insertions(+), 9 deletions(-) > > diff --git a/arch/s390/include/asm/mmu.h b/arch/s390/include/asm/mmu.h > index f5ff9dbad8ac..fad2ec2c1fd1 100644 > --- a/arch/s390/include/asm/mmu.h > +++ b/arch/s390/include/asm/mmu.h > @@ -24,6 +24,8 @@ typedef struct { > unsigned int uses_skeys:1; > /* The mmu context uses CMM. */ > unsigned int uses_cmm:1; > + /* The gmap associated with this context uses huge pages. */ ... are allowed to use ... > + unsigned int allow_gmap_hpage_1m:1; > } mm_context_t; > > #define INIT_MM_CONTEXT(name) \ > diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h > index d16bc79c30bb..0717ee76885d 100644 > --- a/arch/s390/include/asm/mmu_context.h > +++ b/arch/s390/include/asm/mmu_context.h > @@ -32,6 +32,7 @@ static inline int init_new_context(struct task_struct *tsk, > mm->context.has_pgste = 0; > mm->context.uses_skeys = 0; > mm->context.uses_cmm = 0; > + mm->context.allow_gmap_hpage_1m = 0; > #endif > switch (mm->context.asce_limit) { > case _REGION2_SIZE: > diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c > index a5a60afae8df..16878dacaeab 100644 > --- a/arch/s390/mm/gmap.c > +++ b/arch/s390/mm/gmap.c > @@ -2,8 +2,10 @@ > /* > * KVM guest address space mapping code > * > - * Copyright IBM Corp. 2007, 2016 > + * Copyright IBM Corp. 2007, 2016, 2018 > * Author(s): Martin Schwidefsky <schwidefsky@xxxxxxxxxx> > + * David Hildenbrand <david@xxxxxxxxxx> > + * Janosch Frank <frankja@xxxxxxxxxxxxxxxxxx> > */ > > #include <linux/kernel.h> > @@ -589,8 +591,8 @@ int __gmap_link(struct gmap *gmap, unsigned long gaddr, unsigned long vmaddr) > return -EFAULT; > pmd = pmd_offset(pud, vmaddr); > VM_BUG_ON(pmd_none(*pmd)); > - /* large pmds cannot yet be handled */ > - if (pmd_large(*pmd)) > + /* Are we allowed to use huge pages? */ > + if (pmd_large(*pmd) && !gmap->mm->context.allow_gmap_hpage_1m) > return -EFAULT; > /* Link gmap segment table entry location to page table. */ > rc = radix_tree_preload(GFP_KERNEL); > @@ -1634,6 +1636,7 @@ struct gmap *gmap_shadow(struct gmap *parent, unsigned long asce, > unsigned long limit; > int rc; > > + BUG_ON(parent->mm->context.allow_gmap_hpage_1m); > BUG_ON(gmap_is_shadow(parent)); > spin_lock(&parent->shadow_lock); > sg = gmap_find_shadow(parent, asce, edat_level); > diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c > index 147380c0b5d3..8595c37e0370 100644 > --- a/arch/s390/mm/pgtable.c > +++ b/arch/s390/mm/pgtable.c > @@ -348,7 +348,7 @@ static inline void pmdp_idte_local(struct mm_struct *mm, > mm->context.asce, IDTE_LOCAL); > else > __pmdp_idte(addr, pmdp, 0, 0, IDTE_LOCAL); > - if (mm_has_pgste(mm)) > + if (mm_has_pgste(mm) && mm->context.allow_gmap_hpage_1m) > gmap_pmdp_idte_local(mm, addr); > } > > @@ -358,15 +358,15 @@ static inline void pmdp_idte_global(struct mm_struct *mm, > if (MACHINE_HAS_TLB_GUEST) { > __pmdp_idte(addr, pmdp, IDTE_NODAT | IDTE_GUEST_ASCE, > mm->context.asce, IDTE_GLOBAL); > - if (mm_has_pgste(mm)) > + if (mm_has_pgste(mm) && mm->context.allow_gmap_hpage_1m) > gmap_pmdp_idte_global(mm, addr); > } else if (MACHINE_HAS_IDTE) { > __pmdp_idte(addr, pmdp, 0, 0, IDTE_GLOBAL); > - if (mm_has_pgste(mm)) > + if (mm_has_pgste(mm) && mm->context.allow_gmap_hpage_1m) > gmap_pmdp_idte_global(mm, addr); > } else { > __pmdp_csp(pmdp); > - if (mm_has_pgste(mm)) > + if (mm_has_pgste(mm) && mm->context.allow_gmap_hpage_1m) > gmap_pmdp_csp(mm, addr); > } > } > @@ -435,7 +435,7 @@ pmd_t pmdp_xchg_direct(struct mm_struct *mm, unsigned long addr, > pmd_t old; > > preempt_disable(); > - if (mm_has_pgste(mm)) > + if (mm_has_pgste(mm) && mm->context.allow_gmap_hpage_1m) > pmdp_clear_skeys(mm, pmdp, new); > old = pmdp_flush_direct(mm, addr, pmdp); > *pmdp = new; > @@ -450,7 +450,7 @@ pmd_t pmdp_xchg_lazy(struct mm_struct *mm, unsigned long addr, > pmd_t old; > > preempt_disable(); > - if (mm_has_pgste(mm)) > + if (mm_has_pgste(mm) && mm->context.allow_gmap_hpage_1m) > pmdp_clear_skeys(mm, pmdp, new); > old = pmdp_flush_lazy(mm, addr, pmdp); > *pmdp = new; > Not sure if all of these extra checks really make sense (performance wise), but I guess it would be helpful if some part of e.g. QEMU (shared libraries?) are based on huge pages, while the GMAP does not contain huge pages. -- Thanks, David / dhildenb