On 1/25/22 8:29 AM, Pierre Morel wrote:
On 1/14/22 21:31, Matthew Rosato wrote:
These routines will be wired into the vfio_pci_zdev ioctl handlers to
respond to requests to enable / disable a device for PCI I/O Address
Translation assistance.
Signed-off-by: Matthew Rosato <mjrosato@xxxxxxxxxxxxx>
---
arch/s390/include/asm/kvm_pci.h | 15 ++++
arch/s390/include/asm/pci_dma.h | 2 +
arch/s390/kvm/pci.c | 139 ++++++++++++++++++++++++++++++++
arch/s390/kvm/pci.h | 2 +
4 files changed, 158 insertions(+)
diff --git a/arch/s390/include/asm/kvm_pci.h
b/arch/s390/include/asm/kvm_pci.h
index 01fe14fffd7a..770849f13a70 100644
--- a/arch/s390/include/asm/kvm_pci.h
+++ b/arch/s390/include/asm/kvm_pci.h
@@ -16,11 +16,21 @@
#include <linux/kvm_host.h>
#include <linux/kvm.h>
#include <linux/pci.h>
+#include <linux/mutex.h>
#include <asm/pci_insn.h>
+#include <asm/pci_dma.h>
+
+struct kvm_zdev_ioat {
+ unsigned long *head[ZPCI_TABLE_PAGES];
+ unsigned long **seg;
+ unsigned long ***pt;
+ struct mutex lock;
Can we please rename the mutex ioat_lock to have a unique name easy to
follow for maintenance.
Can you please add a description about when the lock should be used?
OK. The lock is meant to protect the contents of kvm_zdev_ioat -- I'll
think of something to describe it.
+};
struct kvm_zdev {
struct zpci_dev *zdev;
struct kvm *kvm;
+ struct kvm_zdev_ioat ioat;
struct zpci_fib fib;
};
@@ -33,6 +43,11 @@ int kvm_s390_pci_aif_enable(struct zpci_dev *zdev,
struct zpci_fib *fib,
bool assist);
int kvm_s390_pci_aif_disable(struct zpci_dev *zdev);
+int kvm_s390_pci_ioat_probe(struct zpci_dev *zdev);
+int kvm_s390_pci_ioat_enable(struct zpci_dev *zdev, u64 iota);
+int kvm_s390_pci_ioat_disable(struct zpci_dev *zdev);
+u8 kvm_s390_pci_get_dtsm(struct zpci_dev *zdev);
+
int kvm_s390_pci_interp_probe(struct zpci_dev *zdev);
int kvm_s390_pci_interp_enable(struct zpci_dev *zdev);
int kvm_s390_pci_interp_disable(struct zpci_dev *zdev);
diff --git a/arch/s390/include/asm/pci_dma.h
b/arch/s390/include/asm/pci_dma.h
index 91e63426bdc5..69e616d0712c 100644
--- a/arch/s390/include/asm/pci_dma.h
+++ b/arch/s390/include/asm/pci_dma.h
@@ -50,6 +50,8 @@ enum zpci_ioat_dtype {
#define ZPCI_TABLE_ALIGN ZPCI_TABLE_SIZE
#define ZPCI_TABLE_ENTRY_SIZE (sizeof(unsigned long))
#define ZPCI_TABLE_ENTRIES (ZPCI_TABLE_SIZE /
ZPCI_TABLE_ENTRY_SIZE)
+#define ZPCI_TABLE_PAGES (ZPCI_TABLE_SIZE >> PAGE_SHIFT)
+#define ZPCI_TABLE_ENTRIES_PAGES (ZPCI_TABLE_ENTRIES *
ZPCI_TABLE_PAGES)
#define ZPCI_TABLE_BITS 11
#define ZPCI_PT_BITS 8
diff --git a/arch/s390/kvm/pci.c b/arch/s390/kvm/pci.c
index 7ed9abc476b6..39c13c25a700 100644
--- a/arch/s390/kvm/pci.c
+++ b/arch/s390/kvm/pci.c
@@ -13,12 +13,15 @@
#include <asm/pci.h>
#include <asm/pci_insn.h>
#include <asm/pci_io.h>
+#include <asm/pci_dma.h>
#include <asm/sclp.h>
#include "pci.h"
#include "kvm-s390.h"
struct zpci_aift *aift;
+#define shadow_ioat_init zdev->kzdev->ioat.head[0]
+
static inline int __set_irq_noiib(u16 ctl, u8 isc)
{
union zpci_sic_iib iib = {{0}};
@@ -344,6 +347,135 @@ int kvm_s390_pci_aif_disable(struct zpci_dev *zdev)
}
EXPORT_SYMBOL_GPL(kvm_s390_pci_aif_disable);
+int kvm_s390_pci_ioat_probe(struct zpci_dev *zdev)
+{
+ /* Must have a KVM association registered */
may be add something like : "The ioat structure is embeded in kzdev"
+ if (!zdev->kzdev || !zdev->kzdev->kvm)
Why do we need to check for kvm ?
Having kzdev is already tested by the unique caller.
We probably don't need to check for the kzdev because the caller already
did this, agreed there.
But as for checking the kvm association, Alex asked for this in a
comment to v1 (comment was against one of the vfio patches that call
these routines) -- The reason being the probe comes from a userspace
request and can be against any vfio-pci(-zdev) device at any time, and
there's no point in proceeding if this device is not associated with a
KVM guest -- It's possible for the KVM notifier to also pass a null KVM
address -- so I think it's better to just be sure here. In a
well-behaved environment we would never see this (so, another case for
an s390dbf entry)
+ return -EINVAL;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(kvm_s390_pci_ioat_probe);
+
+int kvm_s390_pci_ioat_enable(struct zpci_dev *zdev, u64 iota)
+{
+ gpa_t gpa = (gpa_t)(iota & ZPCI_RTE_ADDR_MASK);
+ struct kvm_zdev_ioat *ioat;
+ struct page *page;
+ struct kvm *kvm;
+ unsigned int idx;
+ void *iaddr;
+ int i, rc = 0;
no need to initialize rc
Agree based on the changes below
+
+ if (shadow_ioat_init)
+ return -EINVAL;
+
+ /* Ensure supported type specified */
+ if ((iota & ZPCI_IOTA_RTTO_FLAG) != ZPCI_IOTA_RTTO_FLAG)
+ return -EINVAL;
+
+ kvm = zdev->kzdev->kvm;
+ ioat = &zdev->kzdev->ioat;
+ mutex_lock(&ioat->lock);
+ idx = srcu_read_lock(&kvm->srcu);
+ for (i = 0; i < ZPCI_TABLE_PAGES; i++) {
+ page = gfn_to_page(kvm, gpa_to_gfn(gpa));
+ if (is_error_page(page)) {
+ srcu_read_unlock(&kvm->srcu, idx);
+ rc = -EIO;
+ goto out;
goto unpin ?
Ah, right, in case we hit this error somewhere in the middle of the loop.
+ }
+ iaddr = page_to_virt(page) + (gpa & ~PAGE_MASK);
+ ioat->head[i] = (unsigned long *)iaddr;
+ gpa += PAGE_SIZE;
+ }
+ srcu_read_unlock(&kvm->srcu, idx);
+
+ zdev->kzdev->ioat.seg = kcalloc(ZPCI_TABLE_ENTRIES_PAGES,
+ sizeof(unsigned long *), GFP_KERNEL);
What about:
ioat->seg = kcalloc(ZPCI_TABLE_ENTRIES_PAGES,
sizeof(*ioat->seg), GFP_KERNEL);
if (!ioat->seg)
...
ioat->pt = ...
?
Yep, would be fine (seems I forgot about the local *ioat here)
+ if (!zdev->kzdev->ioat.seg)
+ goto unpin;
+ zdev->kzdev->ioat.pt = kcalloc(ZPCI_TABLE_ENTRIES,
+ sizeof(unsigned long **), GFP_KERNEL);
+ if (!zdev->kzdev->ioat.pt)
+ goto free_seg;
+
+out:
+ mutex_unlock(&ioat->lock);
+ return rc;
return 0 ?
Yes, we can do that now that we don't goto out: after is_error_page
+
+free_seg:
+ kfree(zdev->kzdev->ioat.seg);
kfree(ioat->seg) ?
rc = -ENOMEM;
+unpin:
+ for (i = 0; i < ZPCI_TABLE_PAGES; i++) {
+ kvm_release_pfn_dirty((u64)ioat->head[i] >> PAGE_SHIFT);
+ ioat->head[i] = 0;
+ }
+ mutex_unlock(&ioat->lock);
+ return -ENOMEM;
return rc;
And yes, agreed, now that we come here for other reasons (-EIO) we must
return rc here and also set rc=-ENOMEM as you say for the kfree case above.