On 1/16/25 12:33 PM, Claudio Imbrenda wrote:
Move gmap related functions from kernel/uv into kvm.
Create a new file to collect gmap-related functions.
Signed-off-by: Claudio Imbrenda <imbrenda@xxxxxxxxxxxxx>
---
Thanks git, this is close to unreadable
/**
- * should_export_before_import - Determine whether an export is needed
- * before an import-like operation
- * @uvcb: the Ultravisor control block of the UVC to be performed
- * @mm: the mm of the process
- *
- * Returns whether an export is needed before every import-like operation.
- * This is needed for shared pages, which don't trigger a secure storage
- * exception when accessed from a different guest.
- *
- * Although considered as one, the Unpin Page UVC is not an actual import,
- * so it is not affected.
+ * uv_wiggle_folio() - try to drain extra references to a folio
+ * @folio: the folio
+ * @split: whether to split a large folio
*
- * No export is needed also when there is only one protected VM, because the
- * page cannot belong to the wrong VM in that case (there is no "other VM"
- * it can belong to).
- *
- * Return: true if an export is needed before every import, otherwise false.
+ * Context: Must be called while holding an extra reference to the folio;
+ * the mm lock should not be held.
*/
-static bool should_export_before_import(struct uv_cb_header *uvcb, struct mm_struct *mm)
+int uv_wiggle_folio(struct folio *folio, bool split)
Should I expect a drop of references to also split THPs?
Seems a bit odd to me but I do not know a lot about folios.
{
- /*
- * The misc feature indicates, among other things, that importing a
- * shared page from a different protected VM will automatically also
- * transfer its ownership.
- */
- if (uv_has_feature(BIT_UV_FEAT_MISC))
- return false;
- if (uvcb->cmd == UVC_CMD_UNPIN_PAGE_SHARED)
- return false;
- return atomic_read(&mm->context.protected_count) > 1;
-}
-
[...]
-/*
- * Requests the Ultravisor to make a page accessible to a guest.
- * If it's brought in the first time, it will be cleared. If
- * it has been exported before, it will be decrypted and integrity
- * checked.
- */
-int gmap_make_secure(struct gmap *gmap, unsigned long gaddr, void *uvcb)
-{
- struct vm_area_struct *vma;
- bool drain_lru_called = false;
- spinlock_t *ptelock;
- unsigned long uaddr;
- struct folio *folio;
- pte_t *ptep;
int rc;
-again:
- rc = -EFAULT;
- mmap_read_lock(gmap->mm);
-
- uaddr = __gmap_translate(gmap, gaddr);
- if (IS_ERR_VALUE(uaddr))
- goto out;
- vma = vma_lookup(gmap->mm, uaddr);
- if (!vma)
- goto out;
- /*
- * Secure pages cannot be huge and userspace should not combine both.
- * In case userspace does it anyway this will result in an -EFAULT for
- * the unpack. The guest is thus never reaching secure mode. If
- * userspace is playing dirty tricky with mapping huge pages later
- * on this will result in a segmentation fault.
- */
- if (is_vm_hugetlb_page(vma))
- goto out;
-
- rc = -ENXIO;
- ptep = get_locked_pte(gmap->mm, uaddr, &ptelock);
- if (!ptep)
- goto out;
- if (pte_present(*ptep) && !(pte_val(*ptep) & _PAGE_INVALID) && pte_write(*ptep)) {
- folio = page_folio(pte_page(*ptep));
- rc = -EAGAIN;
- if (folio_test_large(folio)) {
- rc = -E2BIG;
- } else if (folio_trylock(folio)) {
- if (should_export_before_import(uvcb, gmap->mm))
- uv_convert_from_secure(PFN_PHYS(folio_pfn(folio)));
- rc = make_folio_secure(folio, uvcb);
- folio_unlock(folio);
- }
-
- /*
- * Once we drop the PTL, the folio may get unmapped and
- * freed immediately. We need a temporary reference.
- */
- if (rc == -EAGAIN || rc == -E2BIG)
- folio_get(folio);
- }
- pte_unmap_unlock(ptep, ptelock);
-out:
- mmap_read_unlock(gmap->mm);
-
- switch (rc) {
- case -E2BIG:
+ folio_wait_writeback(folio);
Add an assert_not_held for the mm mutex above
"folio_wait_writeback(folio);"?
+ if (split) {
folio_lock(folio);
rc = split_folio(folio);
folio_unlock(folio);
- folio_put(folio);
-
- switch (rc) {
- case 0:
- /* Splitting succeeded, try again immediately. */
- goto again;
- case -EAGAIN:
- /* Additional folio references. */
- if (drain_lru(&drain_lru_called))
- goto again;
- return -EAGAIN;
- case -EBUSY:
- /* Unexpected race. */
+
+ if (rc == -EBUSY)
return -EAGAIN;
- }
- WARN_ON_ONCE(1);
- return -ENXIO;
- case -EAGAIN:
- /*
- * If we are here because the UVC returned busy or partial
- * completion, this is just a useless check, but it is safe.
- */
- folio_wait_writeback(folio);
- folio_put(folio);
- return -EAGAIN;
- case -EBUSY:
- /* Additional folio references. */
- if (drain_lru(&drain_lru_called))
- goto again;
- return -EAGAIN;
- case -ENXIO:
- if (gmap_fault(gmap, gaddr, FAULT_FLAG_WRITE))
- return -EFAULT;
- return -EAGAIN;
+ if (rc != -EAGAIN)
+ return rc;
}
- return rc;
-}
-EXPORT_SYMBOL_GPL(gmap_make_secure);
-
-int gmap_convert_to_secure(struct gmap *gmap, unsigned long gaddr)
-{
- struct uv_cb_cts uvcb = {
- .header.cmd = UVC_CMD_CONV_TO_SEC_STOR,
- .header.len = sizeof(uvcb),
- .guest_handle = gmap->guest_handle,
- .gaddr = gaddr,
- };
-
- return gmap_make_secure(gmap, gaddr, &uvcb);
-}
-EXPORT_SYMBOL_GPL(gmap_convert_to_secure);
-
-/**
- * gmap_destroy_page - Destroy a guest page.
- * @gmap: the gmap of the guest
- * @gaddr: the guest address to destroy
- *
- * An attempt will be made to destroy the given guest page. If the attempt
- * fails, an attempt is made to export the page. If both attempts fail, an
- * appropriate error is returned.
- */
-int gmap_destroy_page(struct gmap *gmap, unsigned long gaddr)
-{
- struct vm_area_struct *vma;
- struct folio_walk fw;
- unsigned long uaddr;
- struct folio *folio;
- int rc;
-
- rc = -EFAULT;
- mmap_read_lock(gmap->mm);
-
- uaddr = __gmap_translate(gmap, gaddr);
- if (IS_ERR_VALUE(uaddr))
- goto out;
- vma = vma_lookup(gmap->mm, uaddr);
- if (!vma)
- goto out;
- /*
- * Huge pages should not be able to become secure
- */
- if (is_vm_hugetlb_page(vma))
- goto out;
-
- rc = 0;
- folio = folio_walk_start(&fw, vma, uaddr, 0);
- if (!folio)
- goto out;
- /*
- * See gmap_make_secure(): large folios cannot be secure. Small
- * folio implies FW_LEVEL_PTE.
- */
- if (folio_test_large(folio) || !pte_write(fw.pte))
- goto out_walk_end;
- rc = uv_destroy_folio(folio);
- /*
- * Fault handlers can race; it is possible that two CPUs will fault
- * on the same secure page. One CPU can destroy the page, reboot,
- * re-enter secure mode and import it, while the second CPU was
- * stuck at the beginning of the handler. At some point the second
- * CPU will be able to progress, and it will not be able to destroy
- * the page. In that case we do not want to terminate the process,
- * we instead try to export the page.
- */
- if (rc)
- rc = uv_convert_from_secure_folio(folio);
-out_walk_end:
- folio_walk_end(&fw, vma);
-out:
- mmap_read_unlock(gmap->mm);
- return rc;
+ lru_add_drain_all();
+ return -EAGAIN;
}
-EXPORT_SYMBOL_GPL(gmap_destroy_page);
+EXPORT_SYMBOL_GPL(uv_wiggle_folio);
/*
* To be called with the folio locked or with an extra reference! This will
diff --git a/arch/s390/kvm/Makefile b/arch/s390/kvm/Makefile
index 02217fb4ae10..d972dea657fd 100644
--- a/arch/s390/kvm/Makefile
+++ b/arch/s390/kvm/Makefile
@@ -8,7 +8,7 @@ include $(srctree)/virt/kvm/Makefile.kvm
ccflags-y := -Ivirt/kvm -Iarch/s390/kvm
kvm-y += kvm-s390.o intercept.o interrupt.o priv.o sigp.o
-kvm-y += diag.o gaccess.o guestdbg.o vsie.o pv.o
+kvm-y += diag.o gaccess.o guestdbg.o vsie.o pv.o gmap.o
kvm-$(CONFIG_VFIO_PCI_ZDEV_KVM) += pci.o
obj-$(CONFIG_KVM) += kvm.o
diff --git a/arch/s390/kvm/gmap.c b/arch/s390/kvm/gmap.c
new file mode 100644
index 000000000000..c0911a863902
--- /dev/null
+++ b/arch/s390/kvm/gmap.c
@@ -0,0 +1,196 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Guest memory management for KVM/s390
+ *
+ * Copyright IBM Corp. 2008, 2020, 2024
+ *
+ * Author(s): Claudio Imbrenda <imbrenda@xxxxxxxxxxxxx>
+ * Martin Schwidefsky <schwidefsky@xxxxxxxxxx>
+ * David Hildenbrand <david@xxxxxxxxxx>
+ * Janosch Frank <frankja@xxxxxxxxxxxxxxxxxx>
+ */
+
+#include <linux/compiler.h>
+#include <linux/kvm.h>
+#include <linux/kvm_host.h>
+#include <linux/pgtable.h>
+#include <linux/pagemap.h>
+
+#include <asm/lowcore.h>
+#include <asm/gmap.h>
+#include <asm/uv.h>
+
+#include "gmap.h"
+
+/**
+ * should_export_before_import - Determine whether an export is needed
+ * before an import-like operation
+ * @uvcb: the Ultravisor control block of the UVC to be performed
+ * @mm: the mm of the process
+ *
+ * Returns whether an export is needed before every import-like operation.
+ * This is needed for shared pages, which don't trigger a secure storage
+ * exception when accessed from a different guest.
+ *
+ * Although considered as one, the Unpin Page UVC is not an actual import,
+ * so it is not affected.
+ *
+ * No export is needed also when there is only one protected VM, because the
+ * page cannot belong to the wrong VM in that case (there is no "other VM"
+ * it can belong to).
+ *
+ * Return: true if an export is needed before every import, otherwise false.
+ */
+static bool should_export_before_import(struct uv_cb_header *uvcb, struct mm_struct *mm)
+{
+ /*
+ * The misc feature indicates, among other things, that importing a
+ * shared page from a different protected VM will automatically also
+ * transfer its ownership.
+ */
+ if (uv_has_feature(BIT_UV_FEAT_MISC))
+ return false;
+ if (uvcb->cmd == UVC_CMD_UNPIN_PAGE_SHARED)
+ return false;
+ return atomic_read(&mm->context.protected_count) > 1;
+}
+
+static int __gmap_make_secure(struct gmap *gmap, struct page *page, void *uvcb)
+{
+ struct folio *folio = page_folio(page);
+ int rc;
+
+ /*
+ * Secure pages cannot be huge and userspace should not combine both.
+ * In case userspace does it anyway this will result in an -EFAULT for
+ * the unpack. The guest is thus never reaching secure mode.
+ * If userspace plays dirty tricks and decides to map huge pages at a
+ * later point in time, it will receive a segmentation fault or
+ * KVM_RUN will return -EFAULT.
+ */
+ if (folio_test_hugetlb(folio))
+ return -EFAULT;
+ if (folio_test_large(folio)) {
+ mmap_read_unlock(gmap->mm);
+ rc = uv_wiggle_folio(folio, true);
+ mmap_read_lock(gmap->mm);
+ if (rc)
+ return rc;
+ folio = page_folio(page);
+ }
+
+ rc = -EAGAIN;
+ if (folio_trylock(folio)) {
If you test for !folio_trylock() you could do an early return, no?
+ if (should_export_before_import(uvcb, gmap->mm))
+ uv_convert_from_secure(folio_to_phys(folio));
+ rc = make_folio_secure(folio, uvcb);
+ folio_unlock(folio);
+ }
+
+ /*
+ * Unlikely case: the page is not mapped anymore. Return success
+ * and let the proper fault handler fault in the page again.
+ */
+ if (rc == -ENXIO)
+ return 0;
+ /* The folio has too many references, try to shake some off */
+ if (rc == -EBUSY) {
+ mmap_read_unlock(gmap->mm);
+ uv_wiggle_folio(folio, false);
+ mmap_read_lock(gmap->mm);
+ return -EAGAIN;
+ }
+
+ return rc;
+}