Am 20.09.24 um 00:35 schrieb Michał Winiarski:
VF MMIO resource reservation, either created by system firmware and
inherited by Linux PCI subsystem or created by the subsystem itself,
contains enough space to fit the BAR of all SR-IOV Virtual Functions
that can potentially be created (total VFs supported by the device).
This can be leveraged when the device is exposing lower than optimal BAR
size as a default, allowing access to the entire resource when lower
number of VFs are created.
It is achieved by dynamically resizing the BAR to largest possible value
that allows to fit all newly created VFs within the original resource
boundary.
Signed-off-by: Michał Winiarski <michal.winiarski@xxxxxxxxx>
---
drivers/pci/iov.c | 92 ++++++++++++++++++++++++++++++++++++++++++++-
drivers/pci/pci.h | 1 +
include/linux/pci.h | 3 ++
3 files changed, 95 insertions(+), 1 deletion(-)
diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c
index e8ccd2ae0f024..d88efbfa70e42 100644
--- a/drivers/pci/iov.c
+++ b/drivers/pci/iov.c
@@ -181,6 +181,86 @@ bool pci_iov_memory_decoding_enabled(struct pci_dev *dev)
return cmd & PCI_SRIOV_CTRL_MSE;
}
+static void pci_iov_resource_do_extend(struct pci_dev *dev, int resno, u16 num_vfs)
+{
+ resource_size_t size;
+ int ret, old, i;
+ u32 sizes;
+
+ pci_config_pm_runtime_get(dev);
+
+ if (pci_iov_memory_decoding_enabled(dev)) {
+ ret = -EBUSY;
+ goto err;
+ }
+
+ sizes = pci_rebar_get_possible_sizes(dev, resno);
+ if (!sizes) {
+ ret = -ENOTSUPP;
+ goto err;
+ }
+
+ old = pci_rebar_get_current_size(dev, resno);
+ if (old < 0) {
+ ret = old;
+ goto err;
+ }
+
+ while (sizes > 0) {
+ i = __fls(sizes);
+ size = pci_rebar_size_to_bytes(i);
+ if (size * num_vfs <= pci_resource_len(dev, resno)) {
+ if (i != old) {
+ ret = pci_rebar_set_size(dev, resno, size);
+ if (ret)
+ goto err;
+
+ pci_iov_resource_set_size(dev, resno, size);
+ pci_iov_update_resource(dev, resno);
+ }
+ break;
+ }
+ sizes &= ~BIT(i);
+ }
+
+ pci_config_pm_runtime_put(dev);
+
+ return;
+
+err:
+ dev_WARN(&dev->dev, "Failed to extend %s: %d\n",
+ pci_resource_name(dev, resno), ret);
+
+ pci_config_pm_runtime_put(dev);
+}
+
+static void pci_iov_resource_do_restore(struct pci_dev *dev, int resno)
+{
+ if (dev->sriov->rebar_extend[resno - PCI_IOV_RESOURCES])
+ pci_iov_resource_do_extend(dev, resno, dev->sriov->total_VFs);
+}
+
+int pci_iov_resource_extend(struct pci_dev *dev, int resno, bool enable)
+{
+ if (!pci_resource_is_iov(dev, resno)) {
+ dev_WARN(&dev->dev, "%s is not an IOV resource\n",
+ pci_resource_name(dev, resno));
+
+ return -ENODEV;
+ }
+
+ if (!pci_rebar_get_possible_sizes(dev, resno))
+ return -ENOTSUPP;
+
+ if (!enable)
+ pci_iov_resource_do_restore(dev, resno);
+
+ dev->sriov->rebar_extend[resno - PCI_IOV_RESOURCES] = enable;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(pci_iov_resource_extend);
+
static void pci_read_vf_config_common(struct pci_dev *virtfn)
{
struct pci_dev *physfn = virtfn->physfn;
@@ -445,7 +525,7 @@ static ssize_t sriov_numvfs_store(struct device *dev,
const char *buf, size_t count)
{
struct pci_dev *pdev = to_pci_dev(dev);
- int ret = 0;
+ int i, ret = 0;
u16 num_vfs;
if (kstrtou16(buf, 0, &num_vfs) < 0)
@@ -487,6 +567,11 @@ static ssize_t sriov_numvfs_store(struct device *dev,
goto exit;
}
+ for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) {
+ if (pdev->sriov->rebar_extend[i])
+ pci_iov_resource_do_extend(pdev, i + PCI_IOV_RESOURCES, num_vfs);
+ }
+
That sounds like a really bad idea to me.
Basically the suggestion is here that the PCI subsystem should silently
extend and shrink the VF BARs when the number of VFs change?
Bjorn has the last word on that but I would say that instead the driver
owning the PCIe device as hypervisor should resize the VF BARs to a
desired size and that in turn restricts the number of VFs you can enable.
Regards,
Christian.
ret = pdev->driver->sriov_configure(pdev, num_vfs);
if (ret < 0)
goto exit;
@@ -881,8 +966,13 @@ static int sriov_init(struct pci_dev *dev, int pos)
static void sriov_release(struct pci_dev *dev)
{
+ int i;
+
BUG_ON(dev->sriov->num_VFs);
+ for (i = 0; i < PCI_SRIOV_NUM_BARS; i++)
+ pci_iov_resource_do_restore(dev, i + PCI_IOV_RESOURCES);
+
if (dev != dev->sriov->dev)
pci_dev_put(dev->sriov->dev);
diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index e763b3fd4c7a2..47ed2633232aa 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -385,6 +385,7 @@ struct pci_sriov {
u16 subsystem_vendor; /* VF subsystem vendor */
u16 subsystem_device; /* VF subsystem device */
resource_size_t barsz[PCI_SRIOV_NUM_BARS]; /* VF BAR size */
+ bool rebar_extend[PCI_SRIOV_NUM_BARS]; /* Resize VF BAR */
bool drivers_autoprobe; /* Auto probing of VFs by driver */
};
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 4cf89a4b4cbcf..c007119da7b3d 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -2364,6 +2364,7 @@ int pci_sriov_set_totalvfs(struct pci_dev *dev, u16 numvfs);
int pci_sriov_get_totalvfs(struct pci_dev *dev);
int pci_sriov_configure_simple(struct pci_dev *dev, int nr_virtfn);
resource_size_t pci_iov_resource_size(struct pci_dev *dev, int resno);
+int pci_iov_resource_extend(struct pci_dev *dev, int resno, bool enable);
void pci_vf_drivers_autoprobe(struct pci_dev *dev, bool probe);
/* Arch may override these (weak) */
@@ -2416,6 +2417,8 @@ static inline int pci_sriov_get_totalvfs(struct pci_dev *dev)
#define pci_sriov_configure_simple NULL
static inline resource_size_t pci_iov_resource_size(struct pci_dev *dev, int resno)
{ return 0; }
+static inline void pci_iov_resource_extend(struct pci_dev *dev, int resno, bool enable)
+{ return -ENODEV; }
static inline void pci_vf_drivers_autoprobe(struct pci_dev *dev, bool probe) { }
#endif