On Fri, Sep 20, 2024 at 12:07:34PM +0200, Christian König wrote: > Am 20.09.24 um 00:35 schrieb Michał Winiarski: > > VF MMIO resource reservation, either created by system firmware and > > inherited by Linux PCI subsystem or created by the subsystem itself, > > contains enough space to fit the BAR of all SR-IOV Virtual Functions > > that can potentially be created (total VFs supported by the device). > > This can be leveraged when the device is exposing lower than optimal BAR > > size as a default, allowing access to the entire resource when lower > > number of VFs are created. > > It is achieved by dynamically resizing the BAR to largest possible value > > that allows to fit all newly created VFs within the original resource > > boundary. > > > > Signed-off-by: Michał Winiarski <michal.winiarski@xxxxxxxxx> > > --- > > drivers/pci/iov.c | 92 ++++++++++++++++++++++++++++++++++++++++++++- > > drivers/pci/pci.h | 1 + > > include/linux/pci.h | 3 ++ > > 3 files changed, 95 insertions(+), 1 deletion(-) > > > > diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c > > index e8ccd2ae0f024..d88efbfa70e42 100644 > > --- a/drivers/pci/iov.c > > +++ b/drivers/pci/iov.c > > @@ -181,6 +181,86 @@ bool pci_iov_memory_decoding_enabled(struct pci_dev *dev) > > return cmd & PCI_SRIOV_CTRL_MSE; > > } > > +static void pci_iov_resource_do_extend(struct pci_dev *dev, int resno, u16 num_vfs) > > +{ > > + resource_size_t size; > > + int ret, old, i; > > + u32 sizes; > > + > > + pci_config_pm_runtime_get(dev); > > + > > + if (pci_iov_memory_decoding_enabled(dev)) { > > + ret = -EBUSY; > > + goto err; > > + } > > + > > + sizes = pci_rebar_get_possible_sizes(dev, resno); > > + if (!sizes) { > > + ret = -ENOTSUPP; > > + goto err; > > + } > > + > > + old = pci_rebar_get_current_size(dev, resno); > > + if (old < 0) { > > + ret = old; > > + goto err; > > + } > > + > > + while (sizes > 0) { > > + i = __fls(sizes); > > + size = pci_rebar_size_to_bytes(i); > > + if (size * num_vfs <= pci_resource_len(dev, resno)) { > > + if (i != old) { > > + ret = pci_rebar_set_size(dev, resno, size); > > + if (ret) > > + goto err; > > + > > + pci_iov_resource_set_size(dev, resno, size); > > + pci_iov_update_resource(dev, resno); > > + } > > + break; > > + } > > + sizes &= ~BIT(i); > > + } > > + > > + pci_config_pm_runtime_put(dev); > > + > > + return; > > + > > +err: > > + dev_WARN(&dev->dev, "Failed to extend %s: %d\n", > > + pci_resource_name(dev, resno), ret); > > + > > + pci_config_pm_runtime_put(dev); > > +} > > + > > +static void pci_iov_resource_do_restore(struct pci_dev *dev, int resno) > > +{ > > + if (dev->sriov->rebar_extend[resno - PCI_IOV_RESOURCES]) > > + pci_iov_resource_do_extend(dev, resno, dev->sriov->total_VFs); > > +} > > + > > +int pci_iov_resource_extend(struct pci_dev *dev, int resno, bool enable) > > +{ > > + if (!pci_resource_is_iov(dev, resno)) { > > + dev_WARN(&dev->dev, "%s is not an IOV resource\n", > > + pci_resource_name(dev, resno)); > > + > > + return -ENODEV; > > + } > > + > > + if (!pci_rebar_get_possible_sizes(dev, resno)) > > + return -ENOTSUPP; > > + > > + if (!enable) > > + pci_iov_resource_do_restore(dev, resno); > > + > > + dev->sriov->rebar_extend[resno - PCI_IOV_RESOURCES] = enable; > > + > > + return 0; > > +} > > +EXPORT_SYMBOL_GPL(pci_iov_resource_extend); > > + > > static void pci_read_vf_config_common(struct pci_dev *virtfn) > > { > > struct pci_dev *physfn = virtfn->physfn; > > @@ -445,7 +525,7 @@ static ssize_t sriov_numvfs_store(struct device *dev, > > const char *buf, size_t count) > > { > > struct pci_dev *pdev = to_pci_dev(dev); > > - int ret = 0; > > + int i, ret = 0; > > u16 num_vfs; > > if (kstrtou16(buf, 0, &num_vfs) < 0) > > @@ -487,6 +567,11 @@ static ssize_t sriov_numvfs_store(struct device *dev, > > goto exit; > > } > > + for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) { > > + if (pdev->sriov->rebar_extend[i]) > > + pci_iov_resource_do_extend(pdev, i + PCI_IOV_RESOURCES, num_vfs); > > + } > > + > > That sounds like a really bad idea to me. > > Basically the suggestion is here that the PCI subsystem should silently > extend and shrink the VF BARs when the number of VFs change? Why do you think it's a bad idea? Everything is under PCI subsystem control and the driver in charge has to explicitly opt-in to this behavior on a per-BAR basis. > Bjorn has the last word on that but I would say that instead the driver > owning the PCIe device as hypervisor should resize the VF BARs to a desired > size and that in turn restricts the number of VFs you can enable. Then the PCI subsystem would silently change the driver_max_VFs (or new variable, as driver_max_VFs is under PF control, so it's either new var or checking VF BAR size in pci_sriov_set_totalvfs). It also means that we have to do the maths to calculate the new VF limit in both PCI subsystem and the caller. We can go this route as well - I just think it's cleaner to keep this all under PCI subsystem control. I'll keep the current behavior in v3, but I'm open to changing it. Thanks, -Michał > > Regards, > Christian. > > > ret = pdev->driver->sriov_configure(pdev, num_vfs); > > if (ret < 0) > > goto exit; > > @@ -881,8 +966,13 @@ static int sriov_init(struct pci_dev *dev, int pos) > > static void sriov_release(struct pci_dev *dev) > > { > > + int i; > > + > > BUG_ON(dev->sriov->num_VFs); > > + for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) > > + pci_iov_resource_do_restore(dev, i + PCI_IOV_RESOURCES); > > + > > if (dev != dev->sriov->dev) > > pci_dev_put(dev->sriov->dev); > > diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h > > index e763b3fd4c7a2..47ed2633232aa 100644 > > --- a/drivers/pci/pci.h > > +++ b/drivers/pci/pci.h > > @@ -385,6 +385,7 @@ struct pci_sriov { > > u16 subsystem_vendor; /* VF subsystem vendor */ > > u16 subsystem_device; /* VF subsystem device */ > > resource_size_t barsz[PCI_SRIOV_NUM_BARS]; /* VF BAR size */ > > + bool rebar_extend[PCI_SRIOV_NUM_BARS]; /* Resize VF BAR */ > > bool drivers_autoprobe; /* Auto probing of VFs by driver */ > > }; > > diff --git a/include/linux/pci.h b/include/linux/pci.h > > index 4cf89a4b4cbcf..c007119da7b3d 100644 > > --- a/include/linux/pci.h > > +++ b/include/linux/pci.h > > @@ -2364,6 +2364,7 @@ int pci_sriov_set_totalvfs(struct pci_dev *dev, u16 numvfs); > > int pci_sriov_get_totalvfs(struct pci_dev *dev); > > int pci_sriov_configure_simple(struct pci_dev *dev, int nr_virtfn); > > resource_size_t pci_iov_resource_size(struct pci_dev *dev, int resno); > > +int pci_iov_resource_extend(struct pci_dev *dev, int resno, bool enable); > > void pci_vf_drivers_autoprobe(struct pci_dev *dev, bool probe); > > /* Arch may override these (weak) */ > > @@ -2416,6 +2417,8 @@ static inline int pci_sriov_get_totalvfs(struct pci_dev *dev) > > #define pci_sriov_configure_simple NULL > > static inline resource_size_t pci_iov_resource_size(struct pci_dev *dev, int resno) > > { return 0; } > > +static inline void pci_iov_resource_extend(struct pci_dev *dev, int resno, bool enable) > > +{ return -ENODEV; } > > static inline void pci_vf_drivers_autoprobe(struct pci_dev *dev, bool probe) { } > > #endif >