Move gmap related functions from kernel/uv into kvm.
Create a new file to collect gmap-related functions.
Signed-off-by: Claudio Imbrenda <imbrenda@xxxxxxxxxxxxx>
---
arch/s390/include/asm/uv.h | 7 +-
arch/s390/kernel/uv.c | 293 ++++++-------------------------------
arch/s390/kvm/Makefile | 2 +-
arch/s390/kvm/gmap.c | 183 +++++++++++++++++++++++
arch/s390/kvm/gmap.h | 17 +++
arch/s390/kvm/intercept.c | 1 +
arch/s390/kvm/kvm-s390.c | 1 +
arch/s390/kvm/pv.c | 1 +
8 files changed, 251 insertions(+), 254 deletions(-)
create mode 100644 arch/s390/kvm/gmap.c
create mode 100644 arch/s390/kvm/gmap.h
diff --git a/arch/s390/include/asm/uv.h b/arch/s390/include/asm/uv.h
index dc332609f2c3..22ec1a24c291 100644
--- a/arch/s390/include/asm/uv.h
+++ b/arch/s390/include/asm/uv.h
@@ -628,12 +628,13 @@ static inline int is_prot_virt_host(void)
}
int uv_pin_shared(unsigned long paddr);
-int gmap_make_secure(struct gmap *gmap, unsigned long gaddr, void *uvcb);
-int gmap_destroy_page(struct gmap *gmap, unsigned long gaddr);
int uv_destroy_folio(struct folio *folio);
int uv_destroy_pte(pte_t pte);
int uv_convert_from_secure_pte(pte_t pte);
-int gmap_convert_to_secure(struct gmap *gmap, unsigned long gaddr);
+int uv_wiggle_folio(struct folio *folio, bool split);
+int make_folio_secure(struct folio *folio, struct uv_cb_header *uvcb);
+int uv_convert_from_secure(unsigned long paddr);
+int uv_convert_from_secure_folio(struct folio *folio);
void setup_uv(void);
diff --git a/arch/s390/kernel/uv.c b/arch/s390/kernel/uv.c
index 6f9654a191ad..832c39c9ccfa 100644
--- a/arch/s390/kernel/uv.c
+++ b/arch/s390/kernel/uv.c
@@ -19,19 +19,6 @@
#include <asm/sections.h>
#include <asm/uv.h>
-#if !IS_ENABLED(CONFIG_KVM)
-unsigned long __gmap_translate(struct gmap *gmap, unsigned long gaddr)
-{
- return 0;
-}
-
-int gmap_fault(struct gmap *gmap, unsigned long gaddr,
- unsigned int fault_flags)
-{
- return 0;
-}
-#endif
-
/* the bootdata_preserved fields come from ones in arch/s390/boot/uv.c */
int __bootdata_preserved(prot_virt_guest);
EXPORT_SYMBOL(prot_virt_guest);
@@ -159,6 +146,7 @@ int uv_destroy_folio(struct folio *folio)
folio_put(folio);
return rc;
}
+EXPORT_SYMBOL(uv_destroy_folio);
/*
* The present PTE still indirectly holds a folio reference through the mapping.
@@ -175,7 +163,7 @@ int uv_destroy_pte(pte_t pte)
*
* @paddr: Absolute host address of page to be exported
*/
-static int uv_convert_from_secure(unsigned long paddr)
+int uv_convert_from_secure(unsigned long paddr)
{
struct uv_cb_cfs uvcb = {
.header.cmd = UVC_CMD_CONV_FROM_SEC_STOR,
@@ -187,11 +175,12 @@ static int uv_convert_from_secure(unsigned long paddr)
return -EINVAL;
return 0;
}
+EXPORT_SYMBOL_GPL(uv_convert_from_secure);
/*
* The caller must already hold a reference to the folio.
*/
-static int uv_convert_from_secure_folio(struct folio *folio)
+int uv_convert_from_secure_folio(struct folio *folio)
{
int rc;
@@ -206,6 +195,7 @@ static int uv_convert_from_secure_folio(struct folio *folio)
folio_put(folio);
return rc;
}
+EXPORT_SYMBOL_GPL(uv_convert_from_secure_folio);
/*
* The present PTE still indirectly holds a folio reference through the mapping.
@@ -237,13 +227,32 @@ static int expected_folio_refs(struct folio *folio)
return res;
}
-static int make_folio_secure(struct folio *folio, struct uv_cb_header *uvcb)
+/**
+ * make_folio_secure() - make a folio secure
+ * @folio: the folio to make secure
+ * @uvcb: the uvcb that describes the UVC to be used
+ *
+ * The folio @folio will be made secure if possible, @uvcb will be passed
+ * as-is to the UVC.
+ *
+ * Return: 0 on success;
+ * -EBUSY if the folio is in writeback, has too many references, or is large;
+ * -EAGAIN if the UVC needs to be attempted again;
+ * -ENXIO if the address is not mapped;
+ * -EINVAL if the UVC failed for other reasons.
+ *
+ * Context: The caller must hold exactly one extra reference on the folio
+ * (it's the same logic as split_folio())
+ */
+int make_folio_secure(struct folio *folio, struct uv_cb_header *uvcb)
{
int expected, cc = 0;
+ if (folio_test_large(folio))
+ return -EBUSY;
if (folio_test_writeback(folio))
- return -EAGAIN;
- expected = expected_folio_refs(folio);
+ return -EBUSY;
+ expected = expected_folio_refs(folio) + 1;
if (!folio_ref_freeze(folio, expected))
return -EBUSY;
set_bit(PG_arch_1, &folio->flags);
@@ -267,251 +276,35 @@ static int make_folio_secure(struct folio *folio, struct uv_cb_header *uvcb)
return -EAGAIN;
return uvcb->rc == 0x10a ? -ENXIO : -EINVAL;
}
+EXPORT_SYMBOL_GPL(make_folio_secure);
/**
- * should_export_before_import - Determine whether an export is needed
- * before an import-like operation
- * @uvcb: the Ultravisor control block of the UVC to be performed
- * @mm: the mm of the process
- *
- * Returns whether an export is needed before every import-like operation.
- * This is needed for shared pages, which don't trigger a secure storage
- * exception when accessed from a different guest.
- *
- * Although considered as one, the Unpin Page UVC is not an actual import,
- * so it is not affected.
+ * uv_wiggle_folio() - try to drain extra references to a folio
+ * @folio: the folio
+ * @split: whether to split a large folio
*
- * No export is needed also when there is only one protected VM, because the
- * page cannot belong to the wrong VM in that case (there is no "other VM"
- * it can belong to).
- *
- * Return: true if an export is needed before every import, otherwise false.
+ * Context: Must be called while holding an extra reference to the folio;
+ * the mm lock should not be held.
*/
-static bool should_export_before_import(struct uv_cb_header *uvcb, struct mm_struct *mm)
+int uv_wiggle_folio(struct folio *folio, bool split)
{
- /*
- * The misc feature indicates, among other things, that importing a
- * shared page from a different protected VM will automatically also
- * transfer its ownership.
- */
- if (uv_has_feature(BIT_UV_FEAT_MISC))
- return false;
- if (uvcb->cmd == UVC_CMD_UNPIN_PAGE_SHARED)
- return false;
- return atomic_read(&mm->context.protected_count) > 1;
-}
-
-/*
- * Drain LRU caches: the local one on first invocation and the ones of all
- * CPUs on successive invocations. Returns "true" on the first invocation.
- */
-static bool drain_lru(bool *drain_lru_called)
-{
- /*
- * If we have tried a local drain and the folio refcount
- * still does not match our expected safe value, try with a
- * system wide drain. This is needed if the pagevecs holding
- * the page are on a different CPU.
- */
- if (*drain_lru_called) {
- lru_add_drain_all();
- /* We give up here, don't retry immediately. */
- return false;
- }
- /*
- * We are here if the folio refcount does not match the
- * expected safe value. The main culprits are usually
- * pagevecs. With lru_add_drain() we drain the pagevecs
- * on the local CPU so that hopefully the refcount will
- * reach the expected safe value.
- */
- lru_add_drain();
- *drain_lru_called = true;
- /* The caller should try again immediately */
- return true;
-}
-
-/*
- * Requests the Ultravisor to make a page accessible to a guest.
- * If it's brought in the first time, it will be cleared. If
- * it has been exported before, it will be decrypted and integrity
- * checked.
- */
-int gmap_make_secure(struct gmap *gmap, unsigned long gaddr, void *uvcb)
-{
- struct vm_area_struct *vma;
- bool drain_lru_called = false;
- spinlock_t *ptelock;
- unsigned long uaddr;
- struct folio *folio;
- pte_t *ptep;
int rc;
-again:
- rc = -EFAULT;
- mmap_read_lock(gmap->mm);
-
- uaddr = __gmap_translate(gmap, gaddr);
- if (IS_ERR_VALUE(uaddr))
- goto out;
- vma = vma_lookup(gmap->mm, uaddr);
- if (!vma)
- goto out;
- /*
- * Secure pages cannot be huge and userspace should not combine both.
- * In case userspace does it anyway this will result in an -EFAULT for
- * the unpack. The guest is thus never reaching secure mode. If
- * userspace is playing dirty tricky with mapping huge pages later
- * on this will result in a segmentation fault.
- */
- if (is_vm_hugetlb_page(vma))
- goto out;
-
- rc = -ENXIO;
- ptep = get_locked_pte(gmap->mm, uaddr, &ptelock);
- if (!ptep)
- goto out;
- if (pte_present(*ptep) && !(pte_val(*ptep) & _PAGE_INVALID) && pte_write(*ptep)) {
- folio = page_folio(pte_page(*ptep));
- rc = -EAGAIN;
- if (folio_test_large(folio)) {
- rc = -E2BIG;
- } else if (folio_trylock(folio)) {
- if (should_export_before_import(uvcb, gmap->mm))
- uv_convert_from_secure(PFN_PHYS(folio_pfn(folio)));
- rc = make_folio_secure(folio, uvcb);
- folio_unlock(folio);
- }
-
- /*
- * Once we drop the PTL, the folio may get unmapped and
- * freed immediately. We need a temporary reference.
- */
- if (rc == -EAGAIN || rc == -E2BIG)
- folio_get(folio);
- }
- pte_unmap_unlock(ptep, ptelock);
-out:
- mmap_read_unlock(gmap->mm);
-
- switch (rc) {
- case -E2BIG:
+ folio_wait_writeback(folio);
+ if (split) {
folio_lock(folio);
rc = split_folio(folio);
folio_unlock(folio);
- folio_put(folio);
-
- switch (rc) {
- case 0:
- /* Splitting succeeded, try again immediately. */
- goto again;
- case -EAGAIN:
- /* Additional folio references. */
- if (drain_lru(&drain_lru_called))
- goto again;
- return -EAGAIN;
- case -EBUSY:
- /* Unexpected race. */
+
+ if (rc == -EBUSY)
return -EAGAIN;
- }
- WARN_ON_ONCE(1);
- return -ENXIO;
- case -EAGAIN:
- /*
- * If we are here because the UVC returned busy or partial
- * completion, this is just a useless check, but it is safe.
- */
- folio_wait_writeback(folio);
- folio_put(folio);
- return -EAGAIN;
- case -EBUSY:
- /* Additional folio references. */
- if (drain_lru(&drain_lru_called))
- goto again;
- return -EAGAIN;
- case -ENXIO:
- if (gmap_fault(gmap, gaddr, FAULT_FLAG_WRITE))
- return -EFAULT;
- return -EAGAIN;
+ if (rc != -EAGAIN)
+ return rc;
}
- return rc;
-}
-EXPORT_SYMBOL_GPL(gmap_make_secure);
-
-int gmap_convert_to_secure(struct gmap *gmap, unsigned long gaddr)
-{
- struct uv_cb_cts uvcb = {
- .header.cmd = UVC_CMD_CONV_TO_SEC_STOR,
- .header.len = sizeof(uvcb),
- .guest_handle = gmap->guest_handle,
- .gaddr = gaddr,
- };
-
- return gmap_make_secure(gmap, gaddr, &uvcb);
-}
-EXPORT_SYMBOL_GPL(gmap_convert_to_secure);
-
-/**
- * gmap_destroy_page - Destroy a guest page.
- * @gmap: the gmap of the guest
- * @gaddr: the guest address to destroy
- *
- * An attempt will be made to destroy the given guest page. If the attempt
- * fails, an attempt is made to export the page. If both attempts fail, an
- * appropriate error is returned.
- */
-int gmap_destroy_page(struct gmap *gmap, unsigned long gaddr)
-{
- struct vm_area_struct *vma;
- struct folio_walk fw;
- unsigned long uaddr;
- struct folio *folio;
- int rc;
-
- rc = -EFAULT;
- mmap_read_lock(gmap->mm);
-
- uaddr = __gmap_translate(gmap, gaddr);
- if (IS_ERR_VALUE(uaddr))
- goto out;
- vma = vma_lookup(gmap->mm, uaddr);
- if (!vma)
- goto out;
- /*
- * Huge pages should not be able to become secure
- */
- if (is_vm_hugetlb_page(vma))
- goto out;
-
- rc = 0;
- folio = folio_walk_start(&fw, vma, uaddr, 0);
- if (!folio)
- goto out;
- /*
- * See gmap_make_secure(): large folios cannot be secure. Small
- * folio implies FW_LEVEL_PTE.
- */
- if (folio_test_large(folio) || !pte_write(fw.pte))
- goto out_walk_end;
- rc = uv_destroy_folio(folio);
- /*
- * Fault handlers can race; it is possible that two CPUs will fault
- * on the same secure page. One CPU can destroy the page, reboot,
- * re-enter secure mode and import it, while the second CPU was
- * stuck at the beginning of the handler. At some point the second
- * CPU will be able to progress, and it will not be able to destroy
- * the page. In that case we do not want to terminate the process,
- * we instead try to export the page.
- */
- if (rc)
- rc = uv_convert_from_secure_folio(folio);
-out_walk_end:
- folio_walk_end(&fw, vma);
-out:
- mmap_read_unlock(gmap->mm);
- return rc;
+ lru_add_drain_all();
+ return -EAGAIN;
}
-EXPORT_SYMBOL_GPL(gmap_destroy_page);
+EXPORT_SYMBOL_GPL(uv_wiggle_folio);
/*
* To be called with the folio locked or with an extra reference! This will
diff --git a/arch/s390/kvm/Makefile b/arch/s390/kvm/Makefile
index 02217fb4ae10..d972dea657fd 100644
--- a/arch/s390/kvm/Makefile
+++ b/arch/s390/kvm/Makefile
@@ -8,7 +8,7 @@ include $(srctree)/virt/kvm/Makefile.kvm
ccflags-y := -Ivirt/kvm -Iarch/s390/kvm
kvm-y += kvm-s390.o intercept.o interrupt.o priv.o sigp.o
-kvm-y += diag.o gaccess.o guestdbg.o vsie.o pv.o
+kvm-y += diag.o gaccess.o guestdbg.o vsie.o pv.o gmap.o
kvm-$(CONFIG_VFIO_PCI_ZDEV_KVM) += pci.o
obj-$(CONFIG_KVM) += kvm.o
diff --git a/arch/s390/kvm/gmap.c b/arch/s390/kvm/gmap.c
new file mode 100644
index 000000000000..a142bbbddc25
--- /dev/null
+++ b/arch/s390/kvm/gmap.c
@@ -0,0 +1,183 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Guest memory management for KVM/s390
+ *
+ * Copyright IBM Corp. 2008, 2020, 2024
+ *
+ * Author(s): Claudio Imbrenda <imbrenda@xxxxxxxxxxxxx>
+ * Martin Schwidefsky <schwidefsky@xxxxxxxxxx>
+ * David Hildenbrand <david@xxxxxxxxxx>
+ * Janosch Frank <frankja@xxxxxxxxxxxxxxxxxx>
+ */
+
+#include <linux/compiler.h>
+#include <linux/kvm.h>
+#include <linux/kvm_host.h>
+#include <linux/pgtable.h>
+#include <linux/pagemap.h>
+
+#include <asm/lowcore.h>
+#include <asm/gmap.h>
+#include <asm/uv.h>
+
+#include "gmap.h"
+
+/**
+ * should_export_before_import - Determine whether an export is needed
+ * before an import-like operation
+ * @uvcb: the Ultravisor control block of the UVC to be performed
+ * @mm: the mm of the process
+ *
+ * Returns whether an export is needed before every import-like operation.
+ * This is needed for shared pages, which don't trigger a secure storage
+ * exception when accessed from a different guest.
+ *
+ * Although considered as one, the Unpin Page UVC is not an actual import,
+ * so it is not affected.
+ *
+ * No export is needed also when there is only one protected VM, because the
+ * page cannot belong to the wrong VM in that case (there is no "other VM"
+ * it can belong to).
+ *
+ * Return: true if an export is needed before every import, otherwise false.
+ */
+static bool should_export_before_import(struct uv_cb_header *uvcb, struct mm_struct *mm)
+{
+ /*
+ * The misc feature indicates, among other things, that importing a
+ * shared page from a different protected VM will automatically also
+ * transfer its ownership.
+ */
+ if (uv_has_feature(BIT_UV_FEAT_MISC))
+ return false;
+ if (uvcb->cmd == UVC_CMD_UNPIN_PAGE_SHARED)
+ return false;
+ return atomic_read(&mm->context.protected_count) > 1;
+}
+
+static int __gmap_make_secure(struct gmap *gmap, struct page *page, void *uvcb)
+{
+ struct folio *folio = page_folio(page);
+ int rc;
+
+ /*
+ * Secure pages cannot be huge and userspace should not combine both.
+ * In case userspace does it anyway this will result in an -EFAULT for
+ * the unpack. The guest is thus never reaching secure mode. If
+ * userspace is playing dirty tricky with mapping huge pages later