[RFC/PATCH v3 09/16] s390/mm: Add shadow segment code

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



The VSIE code does not yet support shadowing large hosts. Let's add
large to large shadowing.

Signed-off-by: Janosch Frank <frankja@xxxxxxxxxxxxxxxxxx>
---
 arch/s390/include/asm/gmap.h |   7 +-
 arch/s390/kvm/gaccess.c      |  35 ++++-
 arch/s390/mm/gmap.c          | 332 ++++++++++++++++++++++++++++++++++++-------
 3 files changed, 314 insertions(+), 60 deletions(-)

diff --git a/arch/s390/include/asm/gmap.h b/arch/s390/include/asm/gmap.h
index e7592e1..8387fdc 100644
--- a/arch/s390/include/asm/gmap.h
+++ b/arch/s390/include/asm/gmap.h
@@ -17,6 +17,7 @@
 #define _SEGMENT_ENTRY_GMAP_SPLIT	0x0001  /* split huge pmd */
 /* Status bits only for huge segment entries */
 #define _SEGMENT_ENTRY_GMAP_UC		0x4000	/* user dirty (migration) */
+#define _SEGMENT_ENTRY_GMAP_VSIE	0x8000	/* vsie bit */
 
 /**
  * struct gmap_struct - guest address space
@@ -141,9 +142,11 @@ int gmap_shadow_sgt(struct gmap *sg, unsigned long saddr, unsigned long sgt,
 		    int fake);
 int gmap_shadow_pgt(struct gmap *sg, unsigned long saddr, unsigned long pgt,
 		    int fake);
-int gmap_shadow_pgt_lookup(struct gmap *sg, unsigned long saddr,
-			   unsigned long *pgt, int *dat_protection, int *fake);
+int gmap_shadow_sgt_lookup(struct gmap *sg, unsigned long saddr,
+			   unsigned long *pgt, int *dat_protection,
+			   int *fake);
 int gmap_shadow_page(struct gmap *sg, unsigned long saddr, pte_t pte);
+int gmap_shadow_segment(struct gmap *sg, unsigned long saddr, pmd_t pmd);
 
 void gmap_register_pte_notifier(struct gmap_notifier *);
 void gmap_unregister_pte_notifier(struct gmap_notifier *);
diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c
index f9c5cc0..045d12e 100644
--- a/arch/s390/kvm/gaccess.c
+++ b/arch/s390/kvm/gaccess.c
@@ -981,7 +981,7 @@ int kvm_s390_check_low_addr_prot_real(struct kvm_vcpu *vcpu, unsigned long gra)
  */
 static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr,
 				  unsigned long *pgt, int *dat_protection,
-				  int *fake)
+				  int *fake, int *lvl)
 {
 	struct gmap *parent;
 	union asce asce;
@@ -1136,6 +1136,17 @@ static int kvm_s390_shadow_tables(struct gmap *sg, unsigned long saddr,
 		if (ste.cs && asce.p)
 			return PGM_TRANSLATION_SPEC;
 		*dat_protection |= ste.fc0.p;
+
+		/* Parent is mapped by huge pages. */
+		if (fc) {
+			/* Guest is also huge, easy case. */
+			if (ste.fc && sg->edat_level >= 1) {
+				*lvl = 1;
+				*pgt = ptr;
+				return 0;
+			}
+		}
+		/* Small to small and small to huge case */
 		if (ste.fc && sg->edat_level >= 1) {
 			*fake = 1;
 			ptr = ste.fc1.sfaa * _SEGMENT_SIZE;
@@ -1172,8 +1183,9 @@ int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap *sg,
 {
 	union vaddress vaddr;
 	union page_table_entry pte;
+	union segment_table_entry ste;
 	unsigned long pgt;
-	int dat_protection, fake, fc;
+	int dat_protection, fake, lvl, fc;
 	int rc;
 
 	down_read(&sg->mm->mmap_sem);
@@ -1184,12 +1196,26 @@ int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap *sg,
 	 */
 	ipte_lock(vcpu);
 
-	rc = gmap_shadow_pgt_lookup(sg, saddr, &pgt, &dat_protection, &fake);
+	rc = gmap_shadow_sgt_lookup(sg, saddr, &pgt, &dat_protection, &fake);
 	if (rc)
 		rc = kvm_s390_shadow_tables(sg, saddr, &pgt, &dat_protection,
-					    &fake);
+					    &fake, &lvl);
 
 	vaddr.addr = saddr;
+
+	/* Shadow stopped at segment level, we map pmd to pmd */
+	if (lvl) {
+		if (!rc)
+			rc = gmap_read_table(sg->parent, pgt + vaddr.sx * 8,
+					     &ste.val, &fc);
+		if (!rc && ste.i)
+			rc = PGM_PAGE_TRANSLATION;
+		ste.fc1.p |= dat_protection;
+		if (!rc)
+			rc = gmap_shadow_segment(sg, saddr, __pmd(ste.val));
+		goto out;
+	}
+
 	if (fake) {
 		pte.val = pgt + vaddr.px * PAGE_SIZE;
 		goto shadow_page;
@@ -1204,6 +1230,7 @@ int kvm_s390_shadow_fault(struct kvm_vcpu *vcpu, struct gmap *sg,
 	pte.p |= dat_protection;
 	if (!rc)
 		rc = gmap_shadow_page(sg, saddr, __pte(pte.val));
+out:
 	ipte_unlock(vcpu);
 	up_read(&sg->mm->mmap_sem);
 	return rc;
diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c
index 66789e2..f805ec9 100644
--- a/arch/s390/mm/gmap.c
+++ b/arch/s390/mm/gmap.c
@@ -1405,6 +1405,7 @@ static int gmap_protect_rmap(struct gmap *sg, unsigned long raddr,
 		if (IS_ERR_VALUE(vmaddr))
 			return vmaddr;
 		hpmdp = (pmd_t *)huge_pte_offset(parent->mm, vmaddr, HPAGE_SIZE);
+		/* Do we need tests here? */
 		ptl = pmd_lock(parent->mm, hpmdp);
 		rmap = kzalloc(sizeof(*rmap), GFP_KERNEL);
 		if (!rmap) {
@@ -1450,6 +1451,7 @@ static int gmap_protect_rmap(struct gmap *sg, unsigned long raddr,
 }
 
 #define _SHADOW_RMAP_MASK	0x7
+#define _SHADOW_RMAP_SEGMENT_LP	0x6
 #define _SHADOW_RMAP_REGION1	0x5
 #define _SHADOW_RMAP_REGION2	0x4
 #define _SHADOW_RMAP_REGION3	0x3
@@ -1557,13 +1559,16 @@ static void __gmap_unshadow_sgt(struct gmap *sg, unsigned long raddr,
 	for (i = 0; i < _CRST_ENTRIES; i++, raddr += _SEGMENT_SIZE) {
 		if (!(sgt[i] & _SEGMENT_ENTRY_ORIGIN))
 			continue;
-		pgt = (unsigned long *)(sgt[i] & _REGION_ENTRY_ORIGIN);
+
+		if (!(sgt[i] & _SEGMENT_ENTRY_LARGE)) {
+			pgt = (unsigned long *)(sgt[i] & _REGION_ENTRY_ORIGIN);
+			__gmap_unshadow_pgt(sg, raddr, pgt);
+			/* Free page table */
+			page = pfn_to_page(__pa(pgt) >> PAGE_SHIFT);
+			list_del(&page->lru);
+			page_table_free_pgste(page);
+		}
 		sgt[i] = _SEGMENT_ENTRY_EMPTY;
-		__gmap_unshadow_pgt(sg, raddr, pgt);
-		/* Free page table */
-		page = pfn_to_page(__pa(pgt) >> PAGE_SHIFT);
-		list_del(&page->lru);
-		page_table_free_pgste(page);
 	}
 }
 
@@ -2178,32 +2183,62 @@ EXPORT_SYMBOL_GPL(gmap_shadow_sgt);
  *
  * Called with sg->mm->mmap_sem in read.
  */
-int gmap_shadow_pgt_lookup(struct gmap *sg, unsigned long saddr,
+void gmap_shadow_pgt_lookup(struct gmap *sg, unsigned long *sge,
+			    unsigned long saddr, unsigned long *pgt,
+			    int *dat_protection, int *fake)
+{
+	struct page *page;
+
+	/* Shadow page tables are full pages (pte+pgste) */
+	page = pfn_to_page(*sge >> PAGE_SHIFT);
+	*pgt = page->index & ~GMAP_SHADOW_FAKE_TABLE;
+	*dat_protection = !!(*sge & _SEGMENT_ENTRY_PROTECT);
+	*fake = !!(page->index & GMAP_SHADOW_FAKE_TABLE);
+}
+EXPORT_SYMBOL_GPL(gmap_shadow_pgt_lookup);
+
+int gmap_shadow_sgt_lookup(struct gmap *sg, unsigned long saddr,
 			   unsigned long *pgt, int *dat_protection,
 			   int *fake)
 {
-	unsigned long *table;
+	unsigned long *sge, *r3e = NULL;
 	struct page *page;
-	int rc;
+	int rc = -EAGAIN;
 
 	BUG_ON(!gmap_is_shadow(sg));
 	spin_lock(&sg->guest_table_lock);
-	table = gmap_table_walk(sg, saddr, 1); /* get segment pointer */
-	if (table && !(*table & _SEGMENT_ENTRY_INVALID)) {
-		/* Shadow page tables are full pages (pte+pgste) */
-		page = pfn_to_page(*table >> PAGE_SHIFT);
-		*pgt = page->index & ~GMAP_SHADOW_FAKE_TABLE;
-		*dat_protection = !!(*table & _SEGMENT_ENTRY_PROTECT);
-		*fake = !!(page->index & GMAP_SHADOW_FAKE_TABLE);
-		rc = 0;
-	} else  {
-		rc = -EAGAIN;
+	if (sg->asce & _ASCE_TYPE_MASK) {
+		/* >2 GB guest */
+		r3e = (unsigned long *) gmap_table_walk(sg, saddr, 2);
+		if (!r3e || (*r3e & _REGION_ENTRY_INVALID))
+			goto out;
+		sge = (unsigned long *)(*r3e & _REGION_ENTRY_ORIGIN) + ((saddr & _SEGMENT_INDEX) >> _SEGMENT_SHIFT);
+	} else {
+		sge = (unsigned long *)(sg->asce & PAGE_MASK) + ((saddr & _SEGMENT_INDEX) >> _SEGMENT_SHIFT);
 	}
+	if (*sge & _SEGMENT_ENTRY_INVALID)
+		goto out;
+	rc = 0;
+	if (*sge & _SEGMENT_ENTRY_LARGE) {
+		if (r3e) {
+			page = pfn_to_page(*r3e >> PAGE_SHIFT);
+			*pgt = page->index & ~GMAP_SHADOW_FAKE_TABLE;
+			*dat_protection = !!(*r3e & _SEGMENT_ENTRY_PROTECT);
+			*fake = !!(page->index & GMAP_SHADOW_FAKE_TABLE);
+		} else {
+			*pgt = sg->orig_asce & PAGE_MASK;
+			*dat_protection = 0;
+			*fake = 0;
+		}
+	} else {
+		gmap_shadow_pgt_lookup(sg, sge, saddr, pgt,
+				       dat_protection, fake);
+	}
+out:
 	spin_unlock(&sg->guest_table_lock);
 	return rc;
-
 }
-EXPORT_SYMBOL_GPL(gmap_shadow_pgt_lookup);
+EXPORT_SYMBOL_GPL(gmap_shadow_sgt_lookup);
 
 /**
  * gmap_shadow_pgt - instantiate a shadow page table
@@ -2285,6 +2320,89 @@ int gmap_shadow_pgt(struct gmap *sg, unsigned long saddr, unsigned long pgt,
 }
 EXPORT_SYMBOL_GPL(gmap_shadow_pgt);
 
+int gmap_shadow_segment(struct gmap *sg, unsigned long saddr, pmd_t pmd)
+{
+	struct gmap *parent;
+	struct gmap_rmap *rmap;
+	unsigned long vmaddr, paddr;
+	pmd_t spmd, tpmd, *spmdp = NULL, *tpmdp;
+	int prot;
+	int rc;
+
+	BUG_ON(!gmap_is_shadow(sg));
+	parent = sg->parent;
+
+	prot = (pmd_val(pmd) & _SEGMENT_ENTRY_PROTECT) ? PROT_READ : PROT_WRITE;
+	rmap = kzalloc(sizeof(*rmap), GFP_KERNEL);
+	if (!rmap)
+		return -ENOMEM;
+	rmap->raddr = (saddr & HPAGE_MASK) | _SHADOW_RMAP_SEGMENT_LP;
+
+	while (1) {
+		paddr = pmd_val(pmd) & HPAGE_MASK;
+		vmaddr = __gmap_translate(parent, paddr);
+		if (IS_ERR_VALUE(vmaddr)) {
+			rc = vmaddr;
+			break;
+		}
+		rc = radix_tree_preload(GFP_KERNEL);
+		if (rc)
+			break;
+		rc = -EAGAIN;
+
+		/* Let's look up the parent's mapping */
+		spmdp = gmap_pmd_op_walk(parent, paddr);
+		if (spmdp) {
+			spin_lock(&sg->guest_table_lock);
+			/* Get shadow segment table pointer */
+			tpmdp = (pmd_t *) gmap_table_walk(sg, saddr, 1);
+			if (!tpmdp) {
+				spin_unlock(&sg->guest_table_lock);
+				gmap_pmd_op_end(parent, spmdp);
+				radix_tree_preload_end();
+				break;
+			}
+			/* Shadowing magic happens here. */
+			if (!(pmd_val(*tpmdp) & _SEGMENT_ENTRY_INVALID)) {
+				rc = 0;	/* already shadowed */
+				spin_unlock(&sg->guest_table_lock);
+				gmap_pmd_op_end(parent, spmdp);
+				radix_tree_preload_end();
+				break;
+			}
+			spmd = *spmdp;
+			if (!(pmd_val(spmd) & _SEGMENT_ENTRY_INVALID) &&
+			    !((pmd_val(spmd) & _SEGMENT_ENTRY_PROTECT) &&
+			      !(pmd_val(pmd) & _SEGMENT_ENTRY_PROTECT))) {
+
+				*spmdp = __pmd(pmd_val(spmd)
+					       | _SEGMENT_ENTRY_GMAP_VSIE);
+
+				/* Insert shadow ste */
+				pmd_val(tpmd) = ((pmd_val(spmd) & HPAGE_MASK) |
+						 _SEGMENT_ENTRY_LARGE |
+						 (pmd_val(pmd) & _SEGMENT_ENTRY_PROTECT));
+				*tpmdp = tpmd;
+
+				gmap_insert_rmap(sg, vmaddr, rmap);
+				rc = 0;
+			}
+			spin_unlock(&sg->guest_table_lock);
+			gmap_pmd_op_end(parent, spmdp);
+		}
+		radix_tree_preload_end();
+		if (!rc)
+			break;
+		rc = gmap_pte_op_fixup(parent, paddr, vmaddr, prot);
+		if (rc)
+			break;
+	}
+	if (rc)
+		kfree(rmap);
+	return rc;
+}
+EXPORT_SYMBOL_GPL(gmap_shadow_segment);
+
 /**
  * gmap_shadow_page - create a shadow page mapping
  * @sg: pointer to the shadow guest address space structure
@@ -2361,6 +2479,65 @@ int gmap_shadow_page(struct gmap *sg, unsigned long saddr, pte_t pte)
 EXPORT_SYMBOL_GPL(gmap_shadow_page);
 
 /**
+ * gmap_unshadow_segment - remove a huge segment from a shadow segment table
+ * @sg: pointer to the shadow guest address space structure
+ * @raddr: rmap address in the shadow guest address space
+ *
+ * Called with the sg->guest_table_lock
+ */
+static void gmap_unshadow_segment(struct gmap *sg, unsigned long raddr)
+{
+	unsigned long *table;
+
+	BUG_ON(!gmap_is_shadow(sg));
+	/* We already have the lock */
+	table = gmap_table_walk(sg, raddr, 1); /* get segment table pointer */
+	if (!table || *table & _SEGMENT_ENTRY_INVALID ||
+	    !(*table & _SEGMENT_ENTRY_LARGE))
+		return;
+	gmap_call_notifier(sg, raddr, raddr + (1UL << 20) - 1);
+	gmap_pmdp_xchg(sg, (pmd_t *)table, __pmd(_SEGMENT_ENTRY_EMPTY), raddr);
+}
+
+static void gmap_shadow_notify_pmd(struct gmap *sg, unsigned long vmaddr,
+				   unsigned long gaddr)
+{
+	struct gmap_rmap *rmap, *rnext, *head;
+	unsigned long start, end, bits, raddr;
+
+
+	BUG_ON(!gmap_is_shadow(sg));
+
+	spin_lock(&sg->guest_table_lock);
+	if (sg->removed) {
+		spin_unlock(&sg->guest_table_lock);
+		return;
+	}
+	/* Check for top level table */
+	start = sg->orig_asce & _ASCE_ORIGIN;
+	end = start + ((sg->orig_asce & _ASCE_TABLE_LENGTH) + 1) * 4096;
+	if (!(sg->orig_asce & _ASCE_REAL_SPACE) && gaddr >= start &&
+	    gaddr < ((end & HPAGE_MASK) + HPAGE_SIZE - 1)) {
+		/* The complete shadow table has to go */
+		gmap_unshadow(sg);
+		spin_unlock(&sg->guest_table_lock);
+		list_del(&sg->list);
+		gmap_put(sg);
+		return;
+	}
+	/* Remove the page table tree from on specific entry */
+	head = radix_tree_delete(&sg->host_to_rmap, (vmaddr & HPAGE_MASK) >> PAGE_SHIFT);
+	gmap_for_each_rmap_safe(rmap, rnext, head) {
+		bits = rmap->raddr & _SHADOW_RMAP_MASK;
+		raddr = rmap->raddr ^ bits;
+		gmap_unshadow_segment(sg, raddr);
+		kfree(rmap);
+	}
+	spin_unlock(&sg->guest_table_lock);
+}
+
+
+/**
  * gmap_shadow_notify - handle notifications for shadow gmap
  *
  * Called with sg->parent->shadow_lock.
@@ -2512,6 +2689,85 @@ static void pmdp_notify_split(struct mm_struct *mm, unsigned long vmaddr,
 }
 
 /**
+ * pmdp_notify_gmap - call all invalidation callbacks for a specific pmd
+ * @gmap: pointer to the guest address space structure
+ * @gaddr: guest address which is affected
+ *
+ * This function is expected to be called with a locked
+ * guest_table_lock.
+ */
+static void pmdp_notify_gmap(struct gmap *gmap, unsigned long gaddr)
+{
+	unsigned long *table;
+	unsigned long vmaddr, bits;
+	struct gmap *sg, *next;
+
+	gaddr &= HPAGE_MASK;
+	table = gmap_table_walk(gmap, gaddr, 1);
+	if (!table)
+		return;
+	if (pmd_large(__pmd(*table)) && (*table & _SEGMENT_ENTRY_GMAP_VSIE))
+		bits = _SEGMENT_ENTRY_GMAP_VSIE;
+	if (!bits)
+		return;
+	*table &= ~bits;
+	vmaddr = __gmap_translate(gmap, gaddr);
+	if (!list_empty(&gmap->children) && (bits & _SEGMENT_ENTRY_GMAP_VSIE)
+	    && (*table & _SEGMENT_ENTRY_PROTECT)) {
+		spin_lock(&gmap->shadow_lock);
+		list_for_each_entry_safe(sg, next,
+					 &gmap->children, list)
+			gmap_shadow_notify_pmd(sg, vmaddr, gaddr);
+		spin_unlock(&gmap->shadow_lock);
+	}
+}
+
+/**
+ * pmdp_notify - call all invalidation callbacks for a specific pmd
+ * @mm: pointer to the process mm_struct
+ * @vmaddr: virtual address in the process address space
+ *
+ * This function is expected to be called with mmap_sem held in read.
+ */
+void pmdp_notify(struct mm_struct *mm, unsigned long vmaddr)
+{
+	unsigned long *table, gaddr, bits;
+	struct gmap *gmap, *sg, *next;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(gmap, &mm->context.gmap_list, list) {
+		spin_lock(&gmap->guest_table_lock);
+		table = radix_tree_lookup(&gmap->host_to_guest,
+					  vmaddr >> PMD_SHIFT);
+		if (!table) {
+			spin_unlock(&gmap->guest_table_lock);
+			continue;
+		}
+
+		if (gmap_pmd_is_split((pmd_t *)table)) {
+			pmdp_notify_split(mm, vmaddr, table);
+			spin_unlock(&gmap->guest_table_lock);
+			continue;
+		}
+
+		if (pmd_large(__pmd(*table)) && (*table & _SEGMENT_ENTRY_GMAP_VSIE))
+			bits = _SEGMENT_ENTRY_GMAP_VSIE;
+		*table &= ~bits;
+		gaddr = __gmap_segment_gaddr(table);
+		spin_unlock(&gmap->guest_table_lock);
+		if (!list_empty(&gmap->children) && (bits & _SEGMENT_ENTRY_GMAP_VSIE)) {
+			spin_lock(&gmap->shadow_lock);
+			list_for_each_entry_safe(sg, next,
+						 &gmap->children, list)
+				gmap_shadow_notify_pmd(sg, vmaddr, gaddr);
+			spin_unlock(&gmap->shadow_lock);
+		}
+	}
+	rcu_read_unlock();
+}
+EXPORT_SYMBOL_GPL(pmdp_notify);
+
+/**
  * gmap_pmdp_xchg - exchange a gmap pmd with another and notify
  * @gmap: pointer to the guest address space structure
  * @pmdp: pointer to the pmd entry
@@ -2524,6 +2780,7 @@ static void pmdp_notify_split(struct mm_struct *mm, unsigned long vmaddr,
 static void gmap_pmdp_xchg(struct gmap *gmap, pmd_t *pmdp, pmd_t new,
 			   unsigned long gaddr)
 {
+	pmdp_notify_gmap(gmap, gaddr);
 	if (MACHINE_HAS_TLB_GUEST)
 		__pmdp_idte(gaddr, (pmd_t *)pmdp,
 			    IDTE_GUEST_ASCE, gmap->asce,
@@ -2536,39 +2793,6 @@ static void gmap_pmdp_xchg(struct gmap *gmap, pmd_t *pmdp, pmd_t new,
 	*pmdp = new;
 }
 
-/**
- * pmdp_notify - call all invalidation callbacks for a specific pmd
- * @mm: pointer to the process mm_struct
- * @vmaddr: virtual address in the process address space
- *
- * This function is expected to be called with mmap_sem held in read.
- */
-void pmdp_notify(struct mm_struct *mm, unsigned long vmaddr)
-{
-	unsigned long *table, gaddr;
-	struct gmap *gmap;
-
-	rcu_read_lock();
-	list_for_each_entry_rcu(gmap, &mm->context.gmap_list, list) {
-		spin_lock(&gmap->guest_table_lock);
-		table = radix_tree_lookup(&gmap->host_to_guest,
-					  vmaddr >> PMD_SHIFT);
-		if (!table) {
-			spin_unlock(&gmap->guest_table_lock);
-			continue;
-		}
-		gaddr = __gmap_segment_gaddr(table);
-		if (gmap_pmd_is_split((pmd_t *)table)) {
-			pmdp_notify_split(mm, vmaddr, table);
-			spin_unlock(&gmap->guest_table_lock);
-			continue;
-		}
-		spin_unlock(&gmap->guest_table_lock);
-	}
-	rcu_read_unlock();
-}
-EXPORT_SYMBOL_GPL(pmdp_notify);
-
 static void gmap_pmdp_clear(struct mm_struct *mm, unsigned long vmaddr,
 			    int purge)
 {
-- 
2.7.4




[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux