On 19/09/2018 10:47, Janosch Frank wrote: > The gmap guest_table_lock is used to protect changes to the guest's > DAT tables from region 1 to segments. Therefore it also protects the > host to guest radix tree where each new segment mapping by gmap_link() > is tracked. Changes to ptes are synchronized through the pte lock, > which is easyly retrievable, because the gmap shares the page tables > with userspace. > > With huge pages the story changes. PMD tables are not shared and we're > left with the pmd lock on userspace side and the guest_table_lock on > the gmap side. Having two locks for an object is a guarantee for > locking problems. > > Therefore the guest_table_lock will only be used for population of the > gmap tables and hence protecting the host_to_guest tree. While the pmd > lock will be used for all changes to the pmd from both userspace and > the gmap. > > This means we need to retrieve the vmaddr to retrieve a gmap pmd, > which takes a bit longer than before. But we can now operate on > multiple pmds which are in disjoint segment tables instead of having a > global lock. > Has the time come to document how locking works? (especially also for gmap shadows) > Signed-off-by: Janosch Frank <frankja@xxxxxxxxxxxxx> > --- > arch/s390/include/asm/pgtable.h | 1 + > arch/s390/mm/gmap.c | 70 +++++++++++++++++++++++++---------------- > arch/s390/mm/pgtable.c | 2 +- > 3 files changed, 45 insertions(+), 28 deletions(-) > > diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h > index 0e7cb0dc9c33..c0abd57c5a21 100644 > --- a/arch/s390/include/asm/pgtable.h > +++ b/arch/s390/include/asm/pgtable.h > @@ -1420,6 +1420,7 @@ static inline void __pudp_idte(unsigned long addr, pud_t *pudp, > } > } > > +pmd_t *pmd_alloc_map(struct mm_struct *mm, unsigned long addr); > pmd_t pmdp_xchg_direct(struct mm_struct *, unsigned long, pmd_t *, pmd_t); > pmd_t pmdp_xchg_lazy(struct mm_struct *, unsigned long, pmd_t *, pmd_t); > pud_t pudp_xchg_direct(struct mm_struct *, unsigned long, pud_t *, pud_t); > diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c > index 9ccd62cc7f37..04c24a284113 100644 > --- a/arch/s390/mm/gmap.c > +++ b/arch/s390/mm/gmap.c > @@ -895,47 +895,62 @@ static void gmap_pte_op_end(spinlock_t *ptl) > } > > /** > - * gmap_pmd_op_walk - walk the gmap tables, get the guest table lock > - * and return the pmd pointer > + * gmap_pmd_op_walk - walk the gmap tables, get the pmd_lock if needed > + * and return the pmd pointer or NULL > * @gmap: pointer to guest mapping meta data structure > * @gaddr: virtual address in the guest address space > * > * Returns a pointer to the pmd for a guest address, or NULL > */ > -static inline pmd_t *gmap_pmd_op_walk(struct gmap *gmap, unsigned long gaddr) > +static inline pmd_t *gmap_pmd_op_walk(struct gmap *gmap, unsigned long gaddr, > + spinlock_t **ptl) > { > - pmd_t *pmdp; > + pmd_t *pmdp, *hpmdp; > + unsigned long vmaddr; > + > > BUG_ON(gmap_is_shadow(gmap)); > - pmdp = (pmd_t *) gmap_table_walk(gmap, gaddr, 1); > - if (!pmdp) > - return NULL; > > - /* without huge pages, there is no need to take the table lock */ > - if (!gmap->mm->context.allow_gmap_hpage_1m) > - return pmd_none(*pmdp) ? NULL : pmdp; > - > - spin_lock(&gmap->guest_table_lock); > - if (pmd_none(*pmdp)) { > - spin_unlock(&gmap->guest_table_lock); > - return NULL; > + *ptl = NULL; > + if (gmap->mm->context.allow_gmap_hpage_1m) { > + vmaddr = __gmap_translate(gmap, gaddr); > + if (IS_ERR_VALUE(vmaddr)) > + return NULL; > + hpmdp = pmd_alloc_map(gmap->mm, vmaddr); > + if (!hpmdp) > + return NULL; > + *ptl = pmd_lock(gmap->mm, hpmdp); > + if (pmd_none(*hpmdp)) { > + spin_unlock(*ptl); > + *ptl = NULL; > + return NULL; > + } > + if (!pmd_large(*hpmdp)) { > + spin_unlock(*ptl); > + *ptl = NULL; > + } > + } > + > + pmdp = (pmd_t *) gmap_table_walk(gmap, gaddr, 1); > + if (!pmdp || pmd_none(*pmdp)) { > + if (*ptl) > + spin_unlock(*ptl); > + pmdp = NULL; > + *ptl = NULL; > } > > - /* 4k page table entries are locked via the pte (pte_alloc_map_lock). */ > - if (!pmd_large(*pmdp)) > - spin_unlock(&gmap->guest_table_lock); > return pmdp; > } > > /** > - * gmap_pmd_op_end - release the guest_table_lock if needed > + * gmap_pmd_op_end - release the pmd lock if needed > * @gmap: pointer to the guest mapping meta data structure > * @pmdp: pointer to the pmd > */ > -static inline void gmap_pmd_op_end(struct gmap *gmap, pmd_t *pmdp) > +static inline void gmap_pmd_op_end(spinlock_t *ptl) > { > - if (pmd_large(*pmdp)) > - spin_unlock(&gmap->guest_table_lock); > + if (ptl) > + spin_unlock(ptl); > } > > /* > @@ -1037,13 +1052,14 @@ static int gmap_protect_range(struct gmap *gmap, unsigned long gaddr, > unsigned long len, int prot, unsigned long bits) > { > unsigned long vmaddr, dist; > + spinlock_t *ptl = NULL; > pmd_t *pmdp; > int rc; > > BUG_ON(gmap_is_shadow(gmap)); > while (len) { > rc = -EAGAIN; > - pmdp = gmap_pmd_op_walk(gmap, gaddr); > + pmdp = gmap_pmd_op_walk(gmap, gaddr, &ptl); > if (pmdp) { > if (!pmd_large(*pmdp)) { > rc = gmap_protect_pte(gmap, gaddr, pmdp, prot, > @@ -1061,7 +1077,7 @@ static int gmap_protect_range(struct gmap *gmap, unsigned long gaddr, > gaddr = (gaddr & HPAGE_MASK) + HPAGE_SIZE; > } > } > - gmap_pmd_op_end(gmap, pmdp); > + gmap_pmd_op_end(ptl); > } > if (rc) { > if (rc == -EINVAL) > @@ -2457,9 +2473,9 @@ void gmap_sync_dirty_log_pmd(struct gmap *gmap, unsigned long bitmap[4], > int i; > pmd_t *pmdp; > pte_t *ptep; > - spinlock_t *ptl; > + spinlock_t *ptl = NULL; > > - pmdp = gmap_pmd_op_walk(gmap, gaddr); > + pmdp = gmap_pmd_op_walk(gmap, gaddr, &ptl); > if (!pmdp) > return; > > @@ -2476,7 +2492,7 @@ void gmap_sync_dirty_log_pmd(struct gmap *gmap, unsigned long bitmap[4], > spin_unlock(ptl); > } > } > - gmap_pmd_op_end(gmap, pmdp); > + gmap_pmd_op_end(ptl); > } > EXPORT_SYMBOL_GPL(gmap_sync_dirty_log_pmd); > > diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c > index 16d35b881a11..4b184744350b 100644 > --- a/arch/s390/mm/pgtable.c > +++ b/arch/s390/mm/pgtable.c > @@ -410,7 +410,7 @@ static inline pmd_t pmdp_flush_lazy(struct mm_struct *mm, > return old; > } > > -static pmd_t *pmd_alloc_map(struct mm_struct *mm, unsigned long addr) > +pmd_t *pmd_alloc_map(struct mm_struct *mm, unsigned long addr) > { > pgd_t *pgd; > p4d_t *p4d; > -- Thanks, David / dhildenb