Re: [PATCH 12/12] s390x/pci: let intercept devices have separate PCI groups

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 





On 12/7/21 22:04, Matthew Rosato wrote:
Let's use the reserved pool of simulated PCI groups to allow intercept
devices to have separate groups from interpreted devices as some group
values may be different. If we run out of simulated PCI groups, subsequent
intercept devices just get the default group.
Furthermore, if we encounter any PCI groups from hostdevs that are marked
as simulated, let's just assign them to the default group to avoid
conflicts between host simulated groups and our own simulated groups.

I have a problem here.
We will have the same hardware viewed by 2 different VFIO implementation (interpretation vs interception) reporting different groups ID.

The alternative is to have them reporting same group ID with different values.

I fear both are wrong.

On the other hand, should we have a difference in the QEMU command line between intercepted and interpreted devices for default values. If not why not give up the host values so that in an hypothetical future migration we are clean with the GID ?

I am not sure of this, just want to open a little discussion on this.

For example what could go wrong to keep the host values returned by the CAP?



Signed-off-by: Matthew Rosato <mjrosato@xxxxxxxxxxxxx>
---
  hw/s390x/s390-pci-bus.c         | 19 ++++++++++++++--
  hw/s390x/s390-pci-vfio.c        | 40 ++++++++++++++++++++++++++++++---
  include/hw/s390x/s390-pci-bus.h |  6 ++++-
  3 files changed, 59 insertions(+), 6 deletions(-)

diff --git a/hw/s390x/s390-pci-bus.c b/hw/s390x/s390-pci-bus.c
index ab442f17fb..8b0f3ef120 100644
--- a/hw/s390x/s390-pci-bus.c
+++ b/hw/s390x/s390-pci-bus.c
@@ -747,13 +747,14 @@ static void s390_pci_iommu_free(S390pciState *s, PCIBus *bus, int32_t devfn)
      object_unref(OBJECT(iommu));
  }
-S390PCIGroup *s390_group_create(int id)
+S390PCIGroup *s390_group_create(int id, int host_id)
  {
      S390PCIGroup *group;
      S390pciState *s = s390_get_phb();
group = g_new0(S390PCIGroup, 1);
      group->id = id;
+    group->host_id = host_id;
      QTAILQ_INSERT_TAIL(&s->zpci_groups, group, link);
      return group;
  }
@@ -771,12 +772,25 @@ S390PCIGroup *s390_group_find(int id)
      return NULL;
  }
+S390PCIGroup *s390_group_find_host_sim(int host_id)
+{
+    S390PCIGroup *group;
+    S390pciState *s = s390_get_phb();
+
+    QTAILQ_FOREACH(group, &s->zpci_groups, link) {
+        if (group->id >= ZPCI_SIM_GRP_START && group->host_id == host_id) {
+            return group;
+        }
+    }
+    return NULL;
+}
+
  static void s390_pci_init_default_group(void)
  {
      S390PCIGroup *group;
      ClpRspQueryPciGrp *resgrp;
- group = s390_group_create(ZPCI_DEFAULT_FN_GRP);
+    group = s390_group_create(ZPCI_DEFAULT_FN_GRP, ZPCI_DEFAULT_FN_GRP);
      resgrp = &group->zpci_group;
      resgrp->fr = 1;
      resgrp->dasm = 0;
@@ -824,6 +838,7 @@ static void s390_pcihost_realize(DeviceState *dev, Error **errp)
                                             NULL, g_free);
      s->zpci_table = g_hash_table_new_full(g_int_hash, g_int_equal, NULL, NULL);
      s->bus_no = 0;
+    s->next_sim_grp = ZPCI_SIM_GRP_START;
      QTAILQ_INIT(&s->pending_sei);
      QTAILQ_INIT(&s->zpci_devs);
      QTAILQ_INIT(&s->zpci_dma_limit);
diff --git a/hw/s390x/s390-pci-vfio.c b/hw/s390x/s390-pci-vfio.c
index c9269683f5..bdc5892287 100644
--- a/hw/s390x/s390-pci-vfio.c
+++ b/hw/s390x/s390-pci-vfio.c
@@ -305,13 +305,17 @@ static void s390_pci_read_group(S390PCIBusDevice *pbdev,
  {
      struct vfio_info_cap_header *hdr;
      struct vfio_device_info_cap_zpci_group *cap;
+    S390pciState *s = s390_get_phb();
      ClpRspQueryPciGrp *resgrp;
      VFIOPCIDevice *vpci =  container_of(pbdev->pdev, VFIOPCIDevice, pdev);
hdr = vfio_get_device_info_cap(info, VFIO_DEVICE_INFO_CAP_ZPCI_GROUP); - /* If capability not provided, just use the default group */
-    if (hdr == NULL) {
+    /*
+     * If capability not provided or the underlying hostdev is simulated, just
+     * use the default group.
+     */
+    if (hdr == NULL || pbdev->zpci_fn.pfgid >= ZPCI_SIM_GRP_START) {
          trace_s390_pci_clp_cap(vpci->vbasedev.name,
                                 VFIO_DEVICE_INFO_CAP_ZPCI_GROUP);
          pbdev->zpci_fn.pfgid = ZPCI_DEFAULT_FN_GRP;
@@ -320,11 +324,41 @@ static void s390_pci_read_group(S390PCIBusDevice *pbdev,
      }
      cap = (void *) hdr;
+ /*
+     * For an intercept device, let's use an existing simulated group if one
+     * one was already created for other intercept devices in this group.
+     * If not, create a new simulated group if any are still available.
+     * If all else fails, just fall back on the default group.
+     */
+    if (!pbdev->interp) {
+        pbdev->pci_group = s390_group_find_host_sim(pbdev->zpci_fn.pfgid);
+        if (pbdev->pci_group) {
+            /* Use existing simulated group */
+            pbdev->zpci_fn.pfgid = pbdev->pci_group->id;
+            return;
+        } else {
+            if (s->next_sim_grp == ZPCI_DEFAULT_FN_GRP) {
+                /* All out of simulated groups, use default */
+                trace_s390_pci_clp_cap(vpci->vbasedev.name,
+                                       VFIO_DEVICE_INFO_CAP_ZPCI_GROUP);
+                pbdev->zpci_fn.pfgid = ZPCI_DEFAULT_FN_GRP;
+                pbdev->pci_group = s390_group_find(ZPCI_DEFAULT_FN_GRP);
+                return;
+            } else {
+                /* We can assign a new simulated group */
+                pbdev->zpci_fn.pfgid = s->next_sim_grp;
+                s->next_sim_grp++;
+                /* Fall through to create the new sim group using CLP info */
+            }
+        }
+    }
+
      /* See if the PCI group is already defined, create if not */
      pbdev->pci_group = s390_group_find(pbdev->zpci_fn.pfgid);
if (!pbdev->pci_group) {
-        pbdev->pci_group = s390_group_create(pbdev->zpci_fn.pfgid);
+        pbdev->pci_group = s390_group_create(pbdev->zpci_fn.pfgid,
+                                             pbdev->zpci_fn.pfgid);
resgrp = &pbdev->pci_group->zpci_group;
          if (cap->flags & VFIO_DEVICE_INFO_ZPCI_FLAG_REFRESH) {
diff --git a/include/hw/s390x/s390-pci-bus.h b/include/hw/s390x/s390-pci-bus.h
index 9941ca0084..8664023d5d 100644
--- a/include/hw/s390x/s390-pci-bus.h
+++ b/include/hw/s390x/s390-pci-bus.h
@@ -315,13 +315,16 @@ typedef struct ZpciFmb {
  QEMU_BUILD_BUG_MSG(offsetof(ZpciFmb, fmt0) != 48, "padding in ZpciFmb");
#define ZPCI_DEFAULT_FN_GRP 0xFF
+#define ZPCI_SIM_GRP_START 0xF0
  typedef struct S390PCIGroup {
      ClpRspQueryPciGrp zpci_group;
      int id;
+    int host_id;
      QTAILQ_ENTRY(S390PCIGroup) link;
  } S390PCIGroup;
-S390PCIGroup *s390_group_create(int id);
+S390PCIGroup *s390_group_create(int id, int host_id);
  S390PCIGroup *s390_group_find(int id);
+S390PCIGroup *s390_group_find_host_sim(int host_id);
struct S390PCIBusDevice {
      DeviceState qdev;
@@ -370,6 +373,7 @@ struct S390pciState {
      QTAILQ_HEAD(, S390PCIBusDevice) zpci_devs;
      QTAILQ_HEAD(, S390PCIDMACount) zpci_dma_limit;
      QTAILQ_HEAD(, S390PCIGroup) zpci_groups;
+    uint8_t next_sim_grp;
  };
S390pciState *s390_get_phb(void);


--
Pierre Morel
IBM Lab Boeblingen



[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux