On Fri, Aug 2, 2024 at 6:00 PM Louis Peens <louis.peens@xxxxxxxxxxxx> wrote: > > From: Kyle Xu <zhenbing.xu@xxxxxxxxxxxx> > > Add a new kernel module ‘nfp_vdpa’ for the NFP vDPA networking driver. > > The vDPA driver initializes the necessary resources on the VF and the > data path will be offloaded. It also implements the ‘vdpa_config_ops’ > and the corresponding callback interfaces according to the requirement > of kernel vDPA framework. > > Signed-off-by: Kyle Xu <zhenbing.xu@xxxxxxxxxxxx> > Signed-off-by: Louis Peens <louis.peens@xxxxxxxxxxxx> Are there performance numbers for this card? > --- > MAINTAINERS | 1 + > drivers/vdpa/Kconfig | 10 + > drivers/vdpa/Makefile | 1 + > drivers/vdpa/netronome/Makefile | 5 + > drivers/vdpa/netronome/nfp_vdpa_main.c | 821 +++++++++++++++++++++++++ > 5 files changed, 838 insertions(+) > create mode 100644 drivers/vdpa/netronome/Makefile > create mode 100644 drivers/vdpa/netronome/nfp_vdpa_main.c > > diff --git a/MAINTAINERS b/MAINTAINERS > index c0a3d9e93689..3231b80af331 100644 > --- a/MAINTAINERS > +++ b/MAINTAINERS > @@ -15836,6 +15836,7 @@ R: Jakub Kicinski <kuba@xxxxxxxxxx> > L: oss-drivers@xxxxxxxxxxxx > S: Maintained > F: drivers/net/ethernet/netronome/ > +F: drivers/vdpa/netronome/ > > NETWORK BLOCK DEVICE (NBD) > M: Josef Bacik <josef@xxxxxxxxxxxxxx> > diff --git a/drivers/vdpa/Kconfig b/drivers/vdpa/Kconfig > index 5265d09fc1c4..da5a8461359e 100644 > --- a/drivers/vdpa/Kconfig > +++ b/drivers/vdpa/Kconfig > @@ -137,4 +137,14 @@ config OCTEONEP_VDPA > Please note that this driver must be built as a module and it > cannot be loaded until the Octeon emulation software is running. > > +config NFP_VDPA > + tristate "vDPA driver for NFP devices" > + depends on NFP > + help > + VDPA network driver for NFP4000 NFP5000 NFP6000 and newer. Provides > + offloading of virtio_net datapath such that descriptors put on the > + ring will be executed by the hardware. It also supports a variety > + of stateless offloads depending on the actual device used and > + firmware version. > + > endif # VDPA > diff --git a/drivers/vdpa/Makefile b/drivers/vdpa/Makefile > index 5654d36707af..a8e335756829 100644 > --- a/drivers/vdpa/Makefile > +++ b/drivers/vdpa/Makefile > @@ -9,3 +9,4 @@ obj-$(CONFIG_ALIBABA_ENI_VDPA) += alibaba/ > obj-$(CONFIG_SNET_VDPA) += solidrun/ > obj-$(CONFIG_PDS_VDPA) += pds/ > obj-$(CONFIG_OCTEONEP_VDPA) += octeon_ep/ > +obj-$(CONFIG_NFP_VDPA) += netronome/ > diff --git a/drivers/vdpa/netronome/Makefile b/drivers/vdpa/netronome/Makefile > new file mode 100644 > index 000000000000..ccba4ead3e4f > --- /dev/null > +++ b/drivers/vdpa/netronome/Makefile > @@ -0,0 +1,5 @@ > +# SPDX-License-Identifier: GPL-2.0 > +ccflags-y += -I$(srctree)/drivers/net/ethernet/netronome/nfp > +ccflags-y += -I$(srctree)/drivers/net/ethernet/netronome/nfp/nfpcore > +obj-$(CONFIG_NFP_VDPA) += nfp_vdpa.o > +nfp_vdpa-$(CONFIG_NFP_VDPA) += nfp_vdpa_main.o > diff --git a/drivers/vdpa/netronome/nfp_vdpa_main.c b/drivers/vdpa/netronome/nfp_vdpa_main.c > new file mode 100644 > index 000000000000..a60905848094 > --- /dev/null > +++ b/drivers/vdpa/netronome/nfp_vdpa_main.c > @@ -0,0 +1,821 @@ > +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) > +/* Copyright (C) 2023 Corigine, Inc. */ > +/* > + * nfp_vdpa_main.c > + * Main entry point for vDPA device driver. > + * Author: Xinying Yu <xinying.yu@xxxxxxxxxxxx> > + * Zhenbing Xu <zhenbing.xu@xxxxxxxxxxxx> > + */ > +#include <linux/module.h> > +#include <linux/pci.h> > +#include <linux/vdpa.h> > + > +#include <uapi/linux/virtio_config.h> > +#include <uapi/linux/virtio_ids.h> > +#include <uapi/linux/virtio_net.h> > +#include <uapi/linux/virtio_ring.h> > + > +#include "nfp_net.h" > +#include "nfp_dev.h" > + > +/* Only one queue pair for now. */ > +#define NFP_VDPA_NUM_QUEUES 2 > + > +/* RX queue index in queue pair */ > +#define NFP_VDPA_RX_QUEUE 0 > + > +/* TX queue index in queue pair */ > +#define NFP_VDPA_TX_QUEUE 1 > + > +/* Max MTU supported */ > +#define NFP_VDPA_MTU_MAX 9216 > + > +/* Default freelist buffer size */ > +#define NFP_VDPA_FL_BUF_SZ 10240 > + > +/* Max queue supported */ > +#define NFP_VDPA_QUEUE_MAX 256 > + > +/* Queue space stride */ > +#define NFP_VDPA_QUEUE_SPACE_STRIDE 4 > + > +/* Notification area base on VF CFG BAR */ > +#define NFP_VDPA_NOTIFY_AREA_BASE 0x4000 > + > +/* Notification area offset of each queue */ > +#define NFP_VDPA_QUEUE_NOTIFY_OFFSET 0x1000 > + > +/* Maximum number of rings supported */ > +#define NFP_VDPA_QUEUE_RING_MAX 1 > + > +/* VF auxiliary device name */ > +#define NFP_NET_VF_ADEV_NAME "nfp" > + > +#define NFP_NET_SUPPORTED_FEATURES \ > + ((1ULL << VIRTIO_F_ANY_LAYOUT) | \ > + (1ULL << VIRTIO_F_VERSION_1) | \ > + (1ULL << VIRTIO_F_ACCESS_PLATFORM) | \ > + (1ULL << VIRTIO_NET_F_MTU) | \ > + (1ULL << VIRTIO_NET_F_MAC) | \ > + (1ULL << VIRTIO_NET_F_STATUS)) > + > +struct nfp_vdpa_virtqueue { > + u64 desc; > + u64 avail; > + u64 used; > + u16 size; > + u16 last_avail_idx; > + u16 last_used_idx; > + bool ready; > + > + void __iomem *kick_addr; > + struct vdpa_callback cb; > +}; > + > +struct nfp_vdpa_net { > + struct vdpa_device vdpa; > + > + void __iomem *ctrl_bar; > + void __iomem *q_bar; > + void __iomem *qcp_cfg; > + > + struct nfp_vdpa_virtqueue vring[NFP_VDPA_NUM_QUEUES]; > + > + u32 ctrl; > + u32 ctrl_w1; > + > + u32 reconfig_in_progress_update; > + struct semaphore bar_lock; > + > + u8 status; > + u64 features; > + struct virtio_net_config config; > + > + struct msix_entry vdpa_rx_irq; > + struct nfp_net_r_vector vdpa_rx_vec; > + > + struct msix_entry vdpa_tx_irq; > + struct nfp_net_r_vector vdpa_tx_vec; Does this mean config interrupt is not supported? (or MSI-X is not supported in config interrupt?) > +}; > + > +struct nfp_vdpa_mgmt_dev { > + struct vdpa_mgmt_dev mdev; > + struct nfp_vdpa_net *ndev; > + struct pci_dev *pdev; > + const struct nfp_dev_info *dev_info; > +}; > + > +static uint16_t vdpa_cfg_readw(struct nfp_vdpa_net *ndev, int off) > +{ > + return readw(ndev->ctrl_bar + off); > +} > + > +static u32 vdpa_cfg_readl(struct nfp_vdpa_net *ndev, int off) > +{ > + return readl(ndev->ctrl_bar + off); > +} > + > +static void vdpa_cfg_writeb(struct nfp_vdpa_net *ndev, int off, uint8_t val) > +{ > + writeb(val, ndev->ctrl_bar + off); > +} > + > +static void vdpa_cfg_writel(struct nfp_vdpa_net *ndev, int off, u32 val) > +{ > + writel(val, ndev->ctrl_bar + off); > +} > + > +static void vdpa_cfg_writeq(struct nfp_vdpa_net *ndev, int off, u64 val) > +{ > + writeq(val, ndev->ctrl_bar + off); > +} > + > +static bool nfp_vdpa_is_little_endian(struct nfp_vdpa_net *ndev) > +{ > + return virtio_legacy_is_little_endian() || > + (ndev->features & BIT_ULL(VIRTIO_F_VERSION_1)); > +} > + > +static __virtio16 cpu_to_nfpvdpa16(struct nfp_vdpa_net *ndev, u16 val) > +{ > + return __cpu_to_virtio16(nfp_vdpa_is_little_endian(ndev), val); > +} > + > +static void nfp_vdpa_net_reconfig_start(struct nfp_vdpa_net *ndev, u32 update) > +{ > + vdpa_cfg_writel(ndev, NFP_NET_CFG_UPDATE, update); > + /* Flush posted PCI writes by reading something without side effects */ > + vdpa_cfg_readl(ndev, NFP_NET_CFG_VERSION); > + /* Write a none-zero value to the QCP pointer for configuration notification */ > + writel(1, ndev->qcp_cfg + NFP_QCP_QUEUE_ADD_WPTR); Macro is preferred over magic numbers. > + ndev->reconfig_in_progress_update |= update; > +} > + > +static bool nfp_vdpa_net_reconfig_check_done(struct nfp_vdpa_net *ndev, bool last_check) > +{ > + u32 reg; > + > + reg = vdpa_cfg_readl(ndev, NFP_NET_CFG_UPDATE); > + if (reg == 0) > + return true; > + if (reg & NFP_NET_CFG_UPDATE_ERR) { > + dev_err(ndev->vdpa.dma_dev, "Reconfig error (status: 0x%08x update: 0x%08x ctrl: 0x%08x)\n", > + reg, ndev->reconfig_in_progress_update, > + vdpa_cfg_readl(ndev, NFP_NET_CFG_CTRL)); Any reason for this retry here? > + return true; > + } else if (last_check) { > + dev_err(ndev->vdpa.dma_dev, "Reconfig timeout (status: 0x%08x update: 0x%08x ctrl: 0x%08x)\n", > + reg, ndev->reconfig_in_progress_update, > + vdpa_cfg_readl(ndev, NFP_NET_CFG_CTRL)); > + return true; > + } > + > + return false; > +} > + > +static bool __nfp_vdpa_net_reconfig_wait(struct nfp_vdpa_net *ndev, unsigned long deadline) > +{ > + bool timed_out = false; > + int i; > + > + /* Poll update field, waiting for NFP to ack the config. > + * Do an opportunistic wait-busy loop, afterward sleep. > + */ > + for (i = 0; i < 50; i++) { > + if (nfp_vdpa_net_reconfig_check_done(ndev, false)) > + return false; > + udelay(4); > + } > + > + while (!nfp_vdpa_net_reconfig_check_done(ndev, timed_out)) { > + usleep_range(250, 500); > + timed_out = time_is_before_eq_jiffies(deadline); > + } > + > + return timed_out; > +} > + > +static int nfp_vdpa_net_reconfig_wait(struct nfp_vdpa_net *ndev, unsigned long deadline) > +{ > + if (__nfp_vdpa_net_reconfig_wait(ndev, deadline)) > + return -EIO; > + > + if (vdpa_cfg_readl(ndev, NFP_NET_CFG_UPDATE) & NFP_NET_CFG_UPDATE_ERR) > + return -EIO; > + > + return 0; > +} > + > +static int nfp_vdpa_net_reconfig(struct nfp_vdpa_net *ndev, u32 update) > +{ > + int ret; > + > + down(&ndev->bar_lock); > + > + nfp_vdpa_net_reconfig_start(ndev, update); > + ret = nfp_vdpa_net_reconfig_wait(ndev, jiffies + HZ * NFP_NET_POLL_TIMEOUT); > + ndev->reconfig_in_progress_update = 0; > + > + up(&ndev->bar_lock); > + return ret; > +} > + > +static irqreturn_t nfp_vdpa_irq_rx(int irq, void *data) > +{ > + struct nfp_net_r_vector *r_vec = data; > + struct nfp_vdpa_net *ndev; > + > + ndev = container_of(r_vec, struct nfp_vdpa_net, vdpa_rx_vec); > + > + ndev->vring[NFP_VDPA_RX_QUEUE].cb.callback(ndev->vring[NFP_VDPA_RX_QUEUE].cb.private); > + > + vdpa_cfg_writeq(ndev, NFP_NET_CFG_ICR(ndev->vdpa_rx_irq.entry), NFP_NET_CFG_ICR_UNMASKED); This seems to be a hack, any reason that prevents you from using a common IRQ handler? (Or why should we know the if it's a tx or rx queue here?) > + > + /* The FW auto-masks any interrupt, either via the MASK bit in > + * the MSI-X table or via the per entry ICR field. So there > + * is no need to disable interrupts here. > + */ > + return IRQ_HANDLED; > +} > + > +static irqreturn_t nfp_vdpa_irq_tx(int irq, void *data) > +{ > + struct nfp_net_r_vector *r_vec = data; > + struct nfp_vdpa_net *ndev; > + > + ndev = container_of(r_vec, struct nfp_vdpa_net, vdpa_tx_vec); > + > + /* This memory barrier is needed to make sure the used ring and index > + * has been written back before we notify the frontend driver. > + */ > + dma_rmb(); Why do we need it here? I guess the issue is that you may want to use a strong barrier but current vDPA lacks the interface to report that to the virtio layer? vq = vring_create_virtqueue_dma(index, max_num, align, vdev, true, may_reduce_num, ctx, notify, callback, name, dma_dev); Here the weak_barrier is always true. If this is the case, we may introduce a boolean in vdpa device and use that boolean when creating virtqueues. > + > + ndev->vring[NFP_VDPA_TX_QUEUE].cb.callback(ndev->vring[NFP_VDPA_TX_QUEUE].cb.private); > + > + vdpa_cfg_writeq(ndev, NFP_NET_CFG_ICR(ndev->vdpa_tx_irq.entry), NFP_NET_CFG_ICR_UNMASKED); So it seems the irq handler could be unified unless I miss something. > + > + /* The FW auto-masks any interrupt, either via the MASK bit in > + * the MSI-X table or via the per entry ICR field. So there > + * is no need to disable interrupts here. > + */ > + return IRQ_HANDLED; > +} > + > +static struct nfp_vdpa_net *vdpa_to_ndev(struct vdpa_device *vdpa_dev) > +{ > + return container_of(vdpa_dev, struct nfp_vdpa_net, vdpa); > +} > + > +static void nfp_vdpa_ring_addr_cfg(struct nfp_vdpa_net *ndev) > +{ > + vdpa_cfg_writeq(ndev, NFP_NET_CFG_TXR_ADDR(0), ndev->vring[NFP_VDPA_TX_QUEUE].desc); > + vdpa_cfg_writeb(ndev, NFP_NET_CFG_TXR_SZ(0), ilog2(ndev->vring[NFP_VDPA_TX_QUEUE].size)); > + vdpa_cfg_writeq(ndev, NFP_NET_CFG_TXR_ADDR(1), ndev->vring[NFP_VDPA_TX_QUEUE].avail); > + vdpa_cfg_writeq(ndev, NFP_NET_CFG_TXR_ADDR(2), ndev->vring[NFP_VDPA_TX_QUEUE].used); > + > + vdpa_cfg_writeq(ndev, NFP_NET_CFG_RXR_ADDR(0), ndev->vring[NFP_VDPA_RX_QUEUE].desc); > + vdpa_cfg_writeb(ndev, NFP_NET_CFG_RXR_SZ(0), ilog2(ndev->vring[NFP_VDPA_RX_QUEUE].size)); > + vdpa_cfg_writeq(ndev, NFP_NET_CFG_RXR_ADDR(1), ndev->vring[NFP_VDPA_RX_QUEUE].avail); > + vdpa_cfg_writeq(ndev, NFP_NET_CFG_RXR_ADDR(2), ndev->vring[NFP_VDPA_RX_QUEUE].used); > +} > + > +static int nfp_vdpa_setup_driver(struct vdpa_device *vdpa_dev) > +{ > + struct nfp_vdpa_net *ndev = vdpa_to_ndev(vdpa_dev); > + u32 new_ctrl, new_ctrl_w1, update = 0; > + > + nfp_vdpa_ring_addr_cfg(ndev); > + > + vdpa_cfg_writeb(ndev, NFP_NET_CFG_TXR_VEC(1), ndev->vdpa_tx_vec.irq_entry); > + vdpa_cfg_writeb(ndev, NFP_NET_CFG_RXR_VEC(0), ndev->vdpa_rx_vec.irq_entry); > + > + vdpa_cfg_writeq(ndev, NFP_NET_CFG_TXRS_ENABLE, 1); > + vdpa_cfg_writeq(ndev, NFP_NET_CFG_RXRS_ENABLE, 1); > + > + vdpa_cfg_writel(ndev, NFP_NET_CFG_MTU, NFP_VDPA_MTU_MAX); > + vdpa_cfg_writel(ndev, NFP_NET_CFG_FLBUFSZ, NFP_VDPA_FL_BUF_SZ); > + > + /* Enable device */ > + new_ctrl = NFP_NET_CFG_CTRL_ENABLE; > + new_ctrl_w1 = NFP_NET_CFG_CTRL_VIRTIO | NFP_NET_CFG_CTRL_ENABLE_VNET; > + update |= NFP_NET_CFG_UPDATE_GEN | NFP_NET_CFG_UPDATE_RING | NFP_NET_CFG_UPDATE_MSIX; > + > + vdpa_cfg_writel(ndev, NFP_NET_CFG_CTRL, new_ctrl); > + vdpa_cfg_writel(ndev, NFP_NET_CFG_CTRL_WORD1, new_ctrl_w1); > + if (nfp_vdpa_net_reconfig(ndev, update) < 0) > + return -EINVAL; > + > + ndev->ctrl = new_ctrl; > + ndev->ctrl_w1 = new_ctrl_w1; > + return 0; > +} > + > +static void nfp_reset_vring(struct nfp_vdpa_net *ndev) > +{ > + unsigned int i; > + > + for (i = 0; i < NFP_VDPA_NUM_QUEUES; i++) { > + ndev->vring[i].last_avail_idx = 0; > + ndev->vring[i].desc = 0; > + ndev->vring[i].avail = 0; > + ndev->vring[i].used = 0; > + ndev->vring[i].ready = 0; > + ndev->vring[i].cb.callback = NULL; > + ndev->vring[i].cb.private = NULL; > + } > +} > + > +static int nfp_vdpa_set_vq_address(struct vdpa_device *vdpa_dev, u16 qid, > + u64 desc_area, u64 driver_area, > + u64 device_area) > +{ > + struct nfp_vdpa_net *ndev = vdpa_to_ndev(vdpa_dev); > + > + ndev->vring[qid].desc = desc_area; > + ndev->vring[qid].avail = driver_area; > + ndev->vring[qid].used = device_area; > + > + return 0; > +} > + > +static void nfp_vdpa_set_vq_num(struct vdpa_device *vdpa_dev, u16 qid, u32 num) > +{ > + struct nfp_vdpa_net *ndev = vdpa_to_ndev(vdpa_dev); > + > + ndev->vring[qid].size = num; > +} > + > +static void nfp_vdpa_kick_vq(struct vdpa_device *vdpa_dev, u16 qid) > +{ > + struct nfp_vdpa_net *ndev = vdpa_to_ndev(vdpa_dev); > + > + if (!ndev->vring[qid].ready) > + return; > + > + writel(qid, ndev->vring[qid].kick_addr); > +} > + > +static void nfp_vdpa_set_vq_cb(struct vdpa_device *vdpa_dev, u16 qid, > + struct vdpa_callback *cb) > +{ > + struct nfp_vdpa_net *ndev = vdpa_to_ndev(vdpa_dev); > + > + ndev->vring[qid].cb = *cb; > +} > + > +static void nfp_vdpa_set_vq_ready(struct vdpa_device *vdpa_dev, u16 qid, > + bool ready) > +{ > + struct nfp_vdpa_net *ndev = vdpa_to_ndev(vdpa_dev); > + > + ndev->vring[qid].ready = ready; > +} > + > +static bool nfp_vdpa_get_vq_ready(struct vdpa_device *vdpa_dev, u16 qid) > +{ > + struct nfp_vdpa_net *ndev = vdpa_to_ndev(vdpa_dev); > + > + return ndev->vring[qid].ready; > +} > + > +static int nfp_vdpa_set_vq_state(struct vdpa_device *vdev, u16 idx, > + const struct vdpa_vq_state *state) > +{ > + /* Required by live migration, leave for future work */ > + return 0; > +} > + > +static int nfp_vdpa_get_vq_state(struct vdpa_device *vdev, u16 idx, > + struct vdpa_vq_state *state) > +{ > + /* Required by live migration, leave for future work */ > + return 0; > +} > + > +static u32 nfp_vdpa_get_vq_align(struct vdpa_device *vdpa_dev) > +{ > + return PAGE_SIZE; > +} > + > +static u64 nfp_vdpa_get_features(struct vdpa_device *vdpa_dev) > +{ > + struct nfp_vdpa_net *ndev = vdpa_to_ndev(vdpa_dev); > + > + return ndev->features; > +} > + > +static int nfp_vdpa_set_features(struct vdpa_device *vdpa_dev, u64 features) Let's use nfp_vdpa_set_driver_features here. > +{ > + struct nfp_vdpa_net *ndev = vdpa_to_ndev(vdpa_dev); > + > + /* DMA mapping must be done by driver */ > + if (!(features & BIT_ULL(VIRTIO_F_ACCESS_PLATFORM))) > + return -EINVAL; > + > + ndev->features = features & NFP_NET_SUPPORTED_FEATURES; > + > + return 0; > +} > + > +static void nfp_vdpa_set_config_cb(struct vdpa_device *vdpa_dev, > + struct vdpa_callback *cb) > +{ > + /* Don't support config interrupt yet */ > +} > + > +static u16 nfp_vdpa_get_vq_num_max(struct vdpa_device *vdpa) > +{ > + /* Currently the firmware for kernel vDPA only support ring size 256 */ > + return NFP_VDPA_QUEUE_MAX; > +} > + > +static u32 nfp_vdpa_get_device_id(struct vdpa_device *vdpa_dev) > +{ > + return VIRTIO_ID_NET; > +} > + > +static u32 nfp_vdpa_get_vendor_id(struct vdpa_device *vdpa_dev) > +{ > + struct nfp_vdpa_mgmt_dev *mgmt; > + > + mgmt = container_of(vdpa_dev->mdev, struct nfp_vdpa_mgmt_dev, mdev); > + return mgmt->pdev->vendor; > +} > + > +static u8 nfp_vdpa_get_status(struct vdpa_device *vdpa_dev) > +{ > + struct nfp_vdpa_net *ndev = vdpa_to_ndev(vdpa_dev); > + > + return ndev->status; I think this should require access to hardware status? > +} > + > +static void nfp_vdpa_set_status(struct vdpa_device *vdpa_dev, u8 status) > +{ > + struct nfp_vdpa_net *ndev = vdpa_to_ndev(vdpa_dev); > + > + if ((status ^ ndev->status) & VIRTIO_CONFIG_S_DRIVER_OK) { > + if ((status & VIRTIO_CONFIG_S_DRIVER_OK) == 0) { > + dev_err(ndev->vdpa.dma_dev, > + "Did not expect DRIVER_OK to be cleared\n"); > + return; > + } > + > + if (nfp_vdpa_setup_driver(vdpa_dev)) { > + ndev->status |= VIRTIO_CONFIG_S_FAILED; > + dev_err(ndev->vdpa.dma_dev, > + "Failed to setup driver\n"); > + return; > + } > + } > + > + ndev->status = status; > +} > + > +static int nfp_vdpa_reset(struct vdpa_device *vdpa_dev) > +{ > + struct nfp_vdpa_net *ndev = vdpa_to_ndev(vdpa_dev); > + u32 new_ctrl, new_ctrl_w1, update = 0; > + > + if (ndev->status == 0) > + return 0; > + > + vdpa_cfg_writeb(ndev, NFP_NET_CFG_TXR_VEC(1), 0); > + vdpa_cfg_writeb(ndev, NFP_NET_CFG_RXR_VEC(0), 0); > + > + nfp_vdpa_ring_addr_cfg(ndev); > + > + vdpa_cfg_writeq(ndev, NFP_NET_CFG_TXRS_ENABLE, 0); > + vdpa_cfg_writeq(ndev, NFP_NET_CFG_RXRS_ENABLE, 0); Just wondering, from the name it looks like we want to enable TX/RX, is this intended? > + > + new_ctrl = ndev->ctrl & ~NFP_NET_CFG_CTRL_ENABLE; > + update = NFP_NET_CFG_UPDATE_GEN | NFP_NET_CFG_UPDATE_RING | NFP_NET_CFG_UPDATE_MSIX; > + vdpa_cfg_writel(ndev, NFP_NET_CFG_CTRL, new_ctrl); > + > + new_ctrl_w1 = ndev->ctrl_w1 & ~NFP_NET_CFG_CTRL_VIRTIO; > + vdpa_cfg_writel(ndev, NFP_NET_CFG_CTRL_WORD1, new_ctrl_w1); > + > + if (nfp_vdpa_net_reconfig(ndev, update) < 0) > + return -EINVAL; > + > + nfp_reset_vring(ndev); > + > + ndev->ctrl = new_ctrl; > + ndev->ctrl_w1 = new_ctrl_w1; > + > + ndev->status = 0; > + return 0; > +} > + > +static size_t nfp_vdpa_get_config_size(struct vdpa_device *vdev) > +{ > + struct nfp_vdpa_net *ndev = vdpa_to_ndev(vdev); > + > + return sizeof(ndev->config); > +} > + > +static void nfp_vdpa_get_config(struct vdpa_device *vdev, unsigned int offset, > + void *buf, unsigned int len) > +{ > + struct nfp_vdpa_net *ndev = vdpa_to_ndev(vdev); > + > + if (offset + len > sizeof(ndev->config)) > + return; > + > + memcpy(buf, (void *)&ndev->config + offset, len); Though we don't support config interrupt, it's still better to read it from the device? > +} > + > +static void nfp_vdpa_set_config(struct vdpa_device *vdev, unsigned int offset, > + const void *buf, unsigned int len) > +{ > + struct nfp_vdpa_net *ndev = vdpa_to_ndev(vdev); > + > + if (offset + len > sizeof(ndev->config)) > + return; > + > + memcpy((void *)&ndev->config + offset, buf, len); And I think it requires setting the config to the device? > +} > + > +static const struct vdpa_config_ops nfp_vdpa_ops = { > + .set_vq_address = nfp_vdpa_set_vq_address, > + .set_vq_num = nfp_vdpa_set_vq_num, > + .kick_vq = nfp_vdpa_kick_vq, > + .set_vq_cb = nfp_vdpa_set_vq_cb, > + .set_vq_ready = nfp_vdpa_set_vq_ready, > + .get_vq_ready = nfp_vdpa_get_vq_ready, > + .set_vq_state = nfp_vdpa_set_vq_state, > + .get_vq_state = nfp_vdpa_get_vq_state, > + .get_vq_align = nfp_vdpa_get_vq_align, > + .get_device_features = nfp_vdpa_get_features, Please use nfp_vdpa_get_device_features > + .get_driver_features = nfp_vdpa_get_features, > + .set_driver_features = nfp_vdpa_set_features, Please use nfp_vdpa_set/get_driver_features. > + .set_config_cb = nfp_vdpa_set_config_cb, > + .get_vq_num_max = nfp_vdpa_get_vq_num_max, > + .get_device_id = nfp_vdpa_get_device_id, > + .get_vendor_id = nfp_vdpa_get_vendor_id, > + .get_status = nfp_vdpa_get_status, > + .set_status = nfp_vdpa_set_status, > + .reset = nfp_vdpa_reset, > + .get_config_size = nfp_vdpa_get_config_size, > + .get_config = nfp_vdpa_get_config, > + .set_config = nfp_vdpa_set_config, > +}; > + > +static int nfp_vdpa_map_resources(struct nfp_vdpa_net *ndev, > + struct pci_dev *pdev, > + const struct nfp_dev_info *dev_info) > +{ > + unsigned int bar_off, bar_sz, tx_bar_sz, rx_bar_sz; > + unsigned int max_tx_rings, max_rx_rings, txq, rxq; > + u64 tx_bar_off, rx_bar_off; > + resource_size_t map_addr; > + void __iomem *tx_bar; > + void __iomem *rx_bar; > + int err; > + > + /* Map CTRL BAR */ > + ndev->ctrl_bar = ioremap(pci_resource_start(pdev, NFP_NET_CTRL_BAR), > + NFP_NET_CFG_BAR_SZ); > + if (!ndev->ctrl_bar) > + return -EIO; > + > + /* Find out how many rings are supported */ > + max_tx_rings = readl(ndev->ctrl_bar + NFP_NET_CFG_MAX_TXRINGS); > + max_rx_rings = readl(ndev->ctrl_bar + NFP_NET_CFG_MAX_RXRINGS); > + /* Currently, only one ring is supported */ > + if (max_tx_rings != NFP_VDPA_QUEUE_RING_MAX || max_rx_rings != NFP_VDPA_QUEUE_RING_MAX) { > + err = -EINVAL; > + goto ctrl_bar_unmap; > + } > + > + /* Map Q0_BAR as a single overlapping BAR mapping */ > + tx_bar_sz = NFP_QCP_QUEUE_ADDR_SZ * max_tx_rings * NFP_VDPA_QUEUE_SPACE_STRIDE; > + rx_bar_sz = NFP_QCP_QUEUE_ADDR_SZ * max_rx_rings * NFP_VDPA_QUEUE_SPACE_STRIDE; > + > + txq = readl(ndev->ctrl_bar + NFP_NET_CFG_START_TXQ); > + tx_bar_off = nfp_qcp_queue_offset(dev_info, txq); > + rxq = readl(ndev->ctrl_bar + NFP_NET_CFG_START_RXQ); > + rx_bar_off = nfp_qcp_queue_offset(dev_info, rxq); > + > + bar_off = min(tx_bar_off, rx_bar_off); > + bar_sz = max(tx_bar_off + tx_bar_sz, rx_bar_off + rx_bar_sz); > + bar_sz -= bar_off; > + > + map_addr = pci_resource_start(pdev, NFP_NET_Q0_BAR) + bar_off; > + ndev->q_bar = ioremap(map_addr, bar_sz); > + if (!ndev->q_bar) { > + err = -EIO; > + goto ctrl_bar_unmap; > + } > + > + tx_bar = ndev->q_bar + (tx_bar_off - bar_off); > + rx_bar = ndev->q_bar + (rx_bar_off - bar_off); > + > + /* TX queues */ > + ndev->vring[txq].kick_addr = ndev->ctrl_bar + NFP_VDPA_NOTIFY_AREA_BASE > + + txq * NFP_VDPA_QUEUE_NOTIFY_OFFSET; > + /* RX queues */ > + ndev->vring[rxq].kick_addr = ndev->ctrl_bar + NFP_VDPA_NOTIFY_AREA_BASE > + + rxq * NFP_VDPA_QUEUE_NOTIFY_OFFSET; > + /* Stash the re-configuration queue away. First odd queue in TX Bar */ > + ndev->qcp_cfg = tx_bar + NFP_QCP_QUEUE_ADDR_SZ; What did "re-configuration queue" mean here? > + > + return 0; > + > +ctrl_bar_unmap: > + iounmap(ndev->ctrl_bar); > + return err; > +} > + > +static int nfp_vdpa_init_ndev(struct nfp_vdpa_net *ndev) > +{ > + ndev->features = NFP_NET_SUPPORTED_FEATURES; > + > + ndev->config.mtu = cpu_to_nfpvdpa16(ndev, NFP_NET_DEFAULT_MTU); > + ndev->config.status = cpu_to_nfpvdpa16(ndev, VIRTIO_NET_S_LINK_UP); > + > + put_unaligned_be32(vdpa_cfg_readl(ndev, NFP_NET_CFG_MACADDR + 0), &ndev->config.mac[0]); > + put_unaligned_be16(vdpa_cfg_readw(ndev, NFP_NET_CFG_MACADDR + 6), &ndev->config.mac[4]); > + > + return 0; > +} > + > +static int nfp_vdpa_mgmt_dev_add(struct vdpa_mgmt_dev *mdev, > + const char *name, > + const struct vdpa_dev_set_config *add_config) > +{ > + struct nfp_vdpa_mgmt_dev *mgmt = container_of(mdev, struct nfp_vdpa_mgmt_dev, mdev); > + struct msix_entry vdpa_irq[NFP_VDPA_NUM_QUEUES]; > + struct device *dev = &mgmt->pdev->dev; > + struct nfp_vdpa_net *ndev; > + int ret; > + > + /* Only allow one ndev at a time. */ > + if (mgmt->ndev) > + return -EOPNOTSUPP; > + > + ndev = vdpa_alloc_device(struct nfp_vdpa_net, vdpa, dev, &nfp_vdpa_ops, 1, 1, name, false); > + > + if (IS_ERR(ndev)) > + return PTR_ERR(ndev); > + > + mgmt->ndev = ndev; > + > + ret = nfp_net_irqs_alloc(mgmt->pdev, (struct msix_entry *)&vdpa_irq, 2, 2); > + if (!ret) { > + ret = -ENOMEM; > + goto free_dev; > + } > + > + ndev->vdpa_rx_irq.entry = vdpa_irq[NFP_VDPA_RX_QUEUE].entry; > + ndev->vdpa_rx_irq.vector = vdpa_irq[NFP_VDPA_RX_QUEUE].vector; > + > + snprintf(ndev->vdpa_rx_vec.name, sizeof(ndev->vdpa_rx_vec.name), "nfp-vdpa-rx0"); > + ndev->vdpa_rx_vec.irq_entry = ndev->vdpa_rx_irq.entry; > + ndev->vdpa_rx_vec.irq_vector = ndev->vdpa_rx_irq.vector; > + > + ndev->vdpa_tx_irq.entry = vdpa_irq[NFP_VDPA_TX_QUEUE].entry; > + ndev->vdpa_tx_irq.vector = vdpa_irq[NFP_VDPA_TX_QUEUE].vector; > + > + snprintf(ndev->vdpa_tx_vec.name, sizeof(ndev->vdpa_tx_vec.name), "nfp-vdpa-tx0"); > + ndev->vdpa_tx_vec.irq_entry = ndev->vdpa_tx_irq.entry; > + ndev->vdpa_tx_vec.irq_vector = ndev->vdpa_tx_irq.vector; > + > + ret = request_irq(ndev->vdpa_tx_vec.irq_vector, nfp_vdpa_irq_tx, > + 0, ndev->vdpa_tx_vec.name, &ndev->vdpa_tx_vec); > + if (ret) > + goto disable_irq; > + > + ret = request_irq(ndev->vdpa_rx_vec.irq_vector, nfp_vdpa_irq_rx, > + 0, ndev->vdpa_rx_vec.name, &ndev->vdpa_rx_vec); > + if (ret) > + goto free_tx_irq; This is fine, but to save resources it's better to allocate the irq during DRIVER_OK. > + > + ret = nfp_vdpa_map_resources(mgmt->ndev, mgmt->pdev, mgmt->dev_info); > + if (ret) > + goto free_rx_irq; > + > + ret = nfp_vdpa_init_ndev(mgmt->ndev); > + if (ret) > + goto unmap_resources; > + > + sema_init(&ndev->bar_lock, 1); > + > + ndev->vdpa.dma_dev = dev; > + ndev->vdpa.mdev = &mgmt->mdev; > + > + mdev->supported_features = NFP_NET_SUPPORTED_FEATURES; > + mdev->max_supported_vqs = NFP_VDPA_QUEUE_MAX; > + > + ret = _vdpa_register_device(&ndev->vdpa, NFP_VDPA_NUM_QUEUES); > + if (ret) > + goto unmap_resources; > + > + return 0; > + > +unmap_resources: > + iounmap(ndev->ctrl_bar); > + iounmap(ndev->q_bar); > +free_rx_irq: > + free_irq(ndev->vdpa_rx_vec.irq_vector, &ndev->vdpa_rx_vec); > +free_tx_irq: > + free_irq(ndev->vdpa_tx_vec.irq_vector, &ndev->vdpa_tx_vec); > +disable_irq: > + nfp_net_irqs_disable(mgmt->pdev); > +free_dev: > + put_device(&ndev->vdpa.dev); > + return ret; > +} > + > +static void nfp_vdpa_mgmt_dev_del(struct vdpa_mgmt_dev *mdev, > + struct vdpa_device *dev) > +{ > + struct nfp_vdpa_mgmt_dev *mgmt = container_of(mdev, struct nfp_vdpa_mgmt_dev, mdev); > + struct nfp_vdpa_net *ndev = vdpa_to_ndev(dev); > + > + free_irq(ndev->vdpa_rx_vec.irq_vector, &ndev->vdpa_rx_vec); > + free_irq(ndev->vdpa_tx_vec.irq_vector, &ndev->vdpa_tx_vec); > + nfp_net_irqs_disable(mgmt->pdev); > + _vdpa_unregister_device(dev); > + > + iounmap(ndev->ctrl_bar); > + iounmap(ndev->q_bar); > + > + mgmt->ndev = NULL; > +} > + > +static const struct vdpa_mgmtdev_ops nfp_vdpa_mgmt_dev_ops = { > + .dev_add = nfp_vdpa_mgmt_dev_add, > + .dev_del = nfp_vdpa_mgmt_dev_del, > +}; > + > +static struct virtio_device_id nfp_vdpa_mgmt_id_table[] = { > + { VIRTIO_ID_NET, VIRTIO_DEV_ANY_ID }, > + { 0 }, > +}; > + > +static int nfp_vdpa_probe(struct auxiliary_device *adev, const struct auxiliary_device_id *id) > +{ > + struct nfp_net_vf_aux_dev *nfp_vf_aux_dev; > + struct nfp_vdpa_mgmt_dev *mgmt; > + int ret; > + > + nfp_vf_aux_dev = container_of(adev, struct nfp_net_vf_aux_dev, aux_dev); > + > + mgmt = kzalloc(sizeof(*mgmt), GFP_KERNEL); > + if (!mgmt) > + return -ENOMEM; > + > + mgmt->pdev = nfp_vf_aux_dev->pdev; > + > + mgmt->mdev.device = &nfp_vf_aux_dev->pdev->dev; > + mgmt->mdev.ops = &nfp_vdpa_mgmt_dev_ops; > + mgmt->mdev.id_table = nfp_vdpa_mgmt_id_table; > + mgmt->dev_info = nfp_vf_aux_dev->dev_info; > + > + ret = vdpa_mgmtdev_register(&mgmt->mdev); > + if (ret) > + goto err_free_mgmt; > + > + auxiliary_set_drvdata(adev, mgmt); > + > + return 0; > + > +err_free_mgmt: > + kfree(mgmt); > + > + return ret; > +} > + > +static void nfp_vdpa_remove(struct auxiliary_device *adev) > +{ > + struct nfp_vdpa_mgmt_dev *mgmt; > + > + mgmt = auxiliary_get_drvdata(adev); > + if (!mgmt) > + return; > + > + vdpa_mgmtdev_unregister(&mgmt->mdev); > + kfree(mgmt); > + > + auxiliary_set_drvdata(adev, NULL); > +} > + > +static const struct auxiliary_device_id nfp_vdpa_id_table[] = { > + { .name = NFP_NET_VF_ADEV_NAME "." NFP_NET_VF_ADEV_DRV_MATCH_NAME, }, > + {}, > +}; > + > +MODULE_DEVICE_TABLE(auxiliary, nfp_vdpa_id_table); > + > +static struct auxiliary_driver nfp_vdpa_driver = { > + .name = NFP_NET_VF_ADEV_DRV_MATCH_NAME, > + .probe = nfp_vdpa_probe, > + .remove = nfp_vdpa_remove, > + .id_table = nfp_vdpa_id_table, > +}; > + > +module_auxiliary_driver(nfp_vdpa_driver); > + > +MODULE_AUTHOR("Corigine, Inc. <oss-drivers@xxxxxxxxxxxx>"); > +MODULE_DESCRIPTION("NFP vDPA driver"); > +MODULE_LICENSE("GPL"); > -- > 2.34.1 > Thanks