On 06.07.2018 15:55, Janosch Frank wrote: > Like for ptes, we also need invalidation notification for pmds, to > make sure the guest lowcore pages are always accessible and later > addition of shadowed pmds. > > With PMDs we do not have PGSTEs or some other bits we could use in the > host PMD. Instead we pick one of the free bits in the gmap PMD. Every > time a host pmd will be invalidated, we will check if the respective > gmap PMD has the bit set and in that case fire up the notifier. > > Signed-off-by: Janosch Frank <frankja@xxxxxxxxxxxxx> > --- > arch/s390/include/asm/gmap.h | 3 ++ > arch/s390/include/asm/pgtable.h | 1 + > arch/s390/mm/gmap.c | 89 ++++++++++++++++++++++++++++++++++++++--- > arch/s390/mm/pgtable.c | 4 ++ > 4 files changed, 91 insertions(+), 6 deletions(-) > > diff --git a/arch/s390/include/asm/gmap.h b/arch/s390/include/asm/gmap.h > index c1bc5633fc6e..276268b48aff 100644 > --- a/arch/s390/include/asm/gmap.h > +++ b/arch/s390/include/asm/gmap.h > @@ -13,6 +13,9 @@ > #define GMAP_NOTIFY_SHADOW 0x2 > #define GMAP_NOTIFY_MPROT 0x1 > > +/* Status bits only for huge segment entries */ > +#define _SEGMENT_ENTRY_GMAP_IN 0x8000 /* invalidation notify bit */ > + > /** > * struct gmap_struct - guest address space > * @list: list head for the mm->context gmap list > diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h > index fe36b3bb2afd..7c9ccd180e75 100644 > --- a/arch/s390/include/asm/pgtable.h > +++ b/arch/s390/include/asm/pgtable.h > @@ -1094,6 +1094,7 @@ void ptep_set_pte_at(struct mm_struct *mm, unsigned long addr, > void ptep_set_notify(struct mm_struct *mm, unsigned long addr, pte_t *ptep); > void ptep_notify(struct mm_struct *mm, unsigned long addr, > pte_t *ptep, unsigned long bits); > +void pmdp_notify(struct mm_struct *mm, unsigned long addr); > int ptep_force_prot(struct mm_struct *mm, unsigned long gaddr, > pte_t *ptep, int prot, unsigned long bit); > void ptep_zap_unused(struct mm_struct *mm, unsigned long addr, > diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c > index e31c365d4a2f..3140f9084c8b 100644 > --- a/arch/s390/mm/gmap.c > +++ b/arch/s390/mm/gmap.c > @@ -914,6 +914,34 @@ static inline void gmap_pmd_op_end(struct gmap *gmap, pmd_t *pmdp) > spin_unlock(&gmap->guest_table_lock); > } > > +/* > + * gmap_protect_pmd - remove access rights to memory and set pmd notification bits > + * @pmdp: pointer to the pmd to be protected > + * @prot: indicates access rights: PROT_NONE, PROT_READ or PROT_WRITE > + * @bits: notification bits to set > + * > + * Returns 0 if successfully protected, -ENOMEM if out of memory and > + * -EAGAIN if a fixup is needed. > + * > + * Expected to be called with sg->mm->mmap_sem in read and > + * guest_table_lock held. > + */ > +static int gmap_protect_pmd(struct gmap *gmap, unsigned long gaddr, > + pmd_t *pmdp, int prot, unsigned long bits) > +{ > + int pmd_i = pmd_val(*pmdp) & _SEGMENT_ENTRY_INVALID; > + int pmd_p = pmd_val(*pmdp) & _SEGMENT_ENTRY_PROTECT; > + pmd_t new = *pmdp; > + > + /* Fixup needed */ > + if ((pmd_i && (prot != PROT_NONE)) || (pmd_p && (prot == PROT_WRITE))) > + return -EAGAIN; > + Can you add something like this (so it is directly clear what is missing): /* Shadow GMAP protection needs split PMDs */ if (bits & GMAP_NOTIFY_SHADOW) return -EINVAL; (might require the caller to forward the rc instead of fixing up) > + if (bits & GMAP_NOTIFY_MPROT) > + pmd_val(*pmdp) |= _SEGMENT_ENTRY_GMAP_IN; > + return 0; > +} > + > /* > * gmap_protect_pte - remove access rights to memory and set pgste bits > * @gmap: pointer to guest mapping meta data structure > @@ -966,7 +994,7 @@ static int gmap_protect_pte(struct gmap *gmap, unsigned long gaddr, > static int gmap_protect_range(struct gmap *gmap, unsigned long gaddr, > unsigned long len, int prot, unsigned long bits) > { > - unsigned long vmaddr; > + unsigned long vmaddr, dist; > pmd_t *pmdp; > int rc; > > @@ -975,11 +1003,21 @@ static int gmap_protect_range(struct gmap *gmap, unsigned long gaddr, > rc = -EAGAIN; > pmdp = gmap_pmd_op_walk(gmap, gaddr); > if (pmdp) { > - rc = gmap_protect_pte(gmap, gaddr, pmdp, prot, > - bits); > - if (!rc) { > - len -= PAGE_SIZE; > - gaddr += PAGE_SIZE; > + if (!pmd_large(*pmdp)) { > + rc = gmap_protect_pte(gmap, gaddr, pmdp, prot, > + bits); > + if (!rc) { > + len -= PAGE_SIZE; > + gaddr += PAGE_SIZE; > + } > + } else { > + rc = gmap_protect_pmd(gmap, gaddr, pmdp, prot, > + bits); > + if (!rc) { > + dist = HPAGE_SIZE - (gaddr & ~HPAGE_MASK); len -= MIN(len, HPAGE_SIZE - gaddr & ~HPAGE_MASK) or of course len -= MIN(len, dist); > + len = len < dist ? 0 : len - dist; > + gaddr = (gaddr & HPAGE_MASK) + HPAGE_SIZE; > + } > } > gmap_pmd_op_end(gmap, pmdp); > } > @@ -2176,6 +2214,43 @@ void ptep_notify(struct mm_struct *mm, unsigned long vmaddr, > } > EXPORT_SYMBOL_GPL(ptep_notify); > > +static void pmdp_notify_gmap(struct gmap *gmap, pmd_t *pmdp, > + unsigned long gaddr) > +{ > + pmd_val(*pmdp) &= ~_SEGMENT_ENTRY_GMAP_IN; > + gmap_call_notifier(gmap, gaddr, gaddr + HPAGE_SIZE - 1); Does it make sense to inline that? > +} > + > +/** > + * pmdp_notify - call all invalidation callbacks for a specific pmd > + * @mm: pointer to the process mm_struct > + * @vmaddr: virtual address in the process address space > + * > + * This function is expected to be called with mmap_sem held in read. > + */ > +void pmdp_notify(struct mm_struct *mm, unsigned long vmaddr) > +{ > + unsigned long *table, gaddr; > + struct gmap *gmap; > + > + rcu_read_lock(); > + list_for_each_entry_rcu(gmap, &mm->context.gmap_list, list) { > + spin_lock(&gmap->guest_table_lock); > + table = radix_tree_lookup(&gmap->host_to_guest, > + vmaddr >> PMD_SHIFT); > + if (!table || !(*table & _SEGMENT_ENTRY_GMAP_IN)) { > + spin_unlock(&gmap->guest_table_lock); > + continue; > + } > + *table &= ~_SEGMENT_ENTRY_GMAP_IN; > + gaddr = __gmap_segment_gaddr(table); > + gmap_call_notifier(gmap, gaddr, gaddr + HPAGE_SIZE - 1); > + spin_unlock(&gmap->guest_table_lock); > + } > + rcu_read_unlock(); > +} > +EXPORT_SYMBOL_GPL(pmdp_notify); > + > /** > * gmap_pmdp_xchg - exchange a gmap pmd with another > * @gmap: pointer to the guest address space structure > @@ -2189,6 +2264,8 @@ EXPORT_SYMBOL_GPL(ptep_notify); > static void gmap_pmdp_xchg(struct gmap *gmap, pmd_t *pmdp, pmd_t new, > unsigned long gaddr) > { > + gaddr &= HPAGE_MASK; > + pmdp_notify_gmap(gmap, pmdp, gaddr); > if (MACHINE_HAS_TLB_GUEST) > __pmdp_idte(gaddr, (pmd_t *)pmdp, IDTE_GUEST_ASCE, gmap->asce, > IDTE_GLOBAL); > diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c > index 301e466e4263..7e1c17b1a24a 100644 > --- a/arch/s390/mm/pgtable.c > +++ b/arch/s390/mm/pgtable.c > @@ -405,6 +405,8 @@ pmd_t pmdp_xchg_direct(struct mm_struct *mm, unsigned long addr, > pmd_t old; > > preempt_disable(); > + if (mm_has_pgste(mm)) > + pmdp_notify(mm, addr); > old = pmdp_flush_direct(mm, addr, pmdp); > *pmdp = new; > preempt_enable(); > @@ -418,6 +420,8 @@ pmd_t pmdp_xchg_lazy(struct mm_struct *mm, unsigned long addr, > pmd_t old; > > preempt_disable(); > + if (mm_has_pgste(mm)) > + pmdp_notify(mm, addr); > old = pmdp_flush_lazy(mm, addr, pmdp); > *pmdp = new; > preempt_enable(); > Looks good to me. -- Thanks, David / dhildenb