From: Tatyana Nikolova <tatyana.e.nikolova@xxxxxxxxx> Date: Wed, 24 Jul 2024 18:39:01 -0500 > From: Joshua Hay <joshua.a.hay@xxxxxxxxx> > > The rdma driver needs to map its own mmio regions for the sake of > performance, meaning the idpf needs to avoid mapping portions of the bar > space. However, to be vendor agnostic, the idpf cannot assume where > these are and must avoid mapping hard coded regions. Instead, the idpf > will map the bare minimum to load and communicate with the control > plane, i.e. the mailbox registers and the reset state registers. The > idpf will then call a new virtchnl op to fetch a list of mmio regions > that it should map. All other registers will calculate which region they > should store their address from. Pls Cc netdev and IWL next time. [...] > /** > + * idpf_get_mbx_reg_addr - Get BAR0 mailbox register address > + * @adapter: private data struct > + * @reg_offset: register offset value > + * > + * Based on the register offset, return the actual BAR0 register address > + */ > +static inline void __iomem *idpf_get_mbx_reg_addr(struct idpf_adapter *adapter, > + resource_size_t reg_offset) > +{ > + return (void __iomem *)(adapter->hw.mbx.addr + reg_offset); Why so many casts to `void __iomem` if you already has mbx.addr of this type? > +} > + > +/** > + * idpf_get_rstat_reg_addr - Get BAR0 rstat register address > + * @adapter: private data struct > + * @reg_offset: register offset value > + * > + * Based on the register offset, return the actual BAR0 register address > + */ > +static inline > +void __iomem *idpf_get_rstat_reg_addr(struct idpf_adapter *adapter, > + resource_size_t reg_offset) > +{ > + reg_offset -= adapter->dev_ops.rstat_reg_start; > + > + return (void __iomem *)(adapter->hw.rstat.addr + reg_offset); Same. > +} > + > +/** > * idpf_get_reg_addr - Get BAR0 register address > * @adapter: private data struct > * @reg_offset: register offset value > @@ -740,7 +780,27 @@ static inline u8 idpf_get_min_tx_pkt_len(struct idpf_adapter *adapter) > static inline void __iomem *idpf_get_reg_addr(struct idpf_adapter *adapter, > resource_size_t reg_offset) > { > - return (void __iomem *)(adapter->hw.hw_addr + reg_offset); > + struct idpf_hw *hw = &adapter->hw; > + int i; > + > + for (i = 0; i < hw->num_lan_regs; i++) { Pls declare @i right here, not outside the loop. > + struct idpf_mmio_reg *region = &hw->lan_regs[i]; > + > + if (reg_offset >= region->addr_start && > + reg_offset < (region->addr_start + region->addr_len)) { > + reg_offset -= region->addr_start; > + > + return (u8 __iomem *)(region->addr + reg_offset); (same) > + } > + } > + > + /* It's impossible to hit this case with offsets from the CP. But if we > + * do for any other reason, the kernel will panic on that register > + * access. Might as well do it here to make it clear what's happening. > + */ > + BUG(); > + > + return NULL; I hope this function is not called anywhere on hotpath? Have you run scripts/bloat-o-meter on idpf.ko before/after the patch to see the difference? I'd like to see the output in the commitmsg. [...] > @@ -102,13 +107,34 @@ static int idpf_cfg_hw(struct idpf_adapter *adapter) > { > struct pci_dev *pdev = adapter->pdev; > struct idpf_hw *hw = &adapter->hw; > + resource_size_t res_start; > + long len; > + > + /* Map mailbox space for virtchnl communication */ > + res_start = pci_resource_start(pdev, 0) + > + adapter->dev_ops.mbx_reg_start; > + len = adapter->dev_ops.mbx_reg_sz; > + hw->mbx.addr = ioremap(res_start, len); > + if (!hw->mbx.addr) { > + pci_err(pdev, "failed to allocate bar0 mbx region\n"); > + > + return -ENOMEM; > + } > + hw->mbx.addr_start = adapter->dev_ops.mbx_reg_start; > + hw->mbx.addr_len = len; > > - hw->hw_addr = pcim_iomap_table(pdev)[0]; > - if (!hw->hw_addr) { > - pci_err(pdev, "failed to allocate PCI iomap table\n"); > + /* Map rstat space for resets */ > + res_start = pci_resource_start(pdev, 0) + Why call this function multiple times? > + adapter->dev_ops.rstat_reg_start; > + len = adapter->dev_ops.rstat_reg_sz; > + hw->rstat.addr = ioremap(res_start, len); > + if (!hw->rstat.addr) { > + pci_err(pdev, "failed to allocate bar0 rstat region\n"); > > return -ENOMEM; > } > + hw->rstat.addr_start = adapter->dev_ops.rstat_reg_start; > + hw->rstat.addr_len = len; > > hw->back = adapter; > > @@ -155,9 +181,9 @@ static int idpf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) > if (err) > goto err_free; > > - err = pcim_iomap_regions(pdev, BIT(0), pci_name(pdev)); > + err = pci_request_mem_regions(pdev, pci_name(pdev)); > if (err) { > - pci_err(pdev, "pcim_iomap_regions failed %pe\n", ERR_PTR(err)); > + pci_err(pdev, "pci_request_mem_regions failed %pe\n", ERR_PTR(err)); > > goto err_free; > } Why not pcim/devm variants of all these functions? Have you considered introducing a generic API which would do something similar to what you're doing here manually? > diff --git a/drivers/net/ethernet/intel/idpf/idpf_mem.h b/drivers/net/ethernet/intel/idpf/idpf_mem.h > index b21a04f..d7cc938 100644 > --- a/drivers/net/ethernet/intel/idpf/idpf_mem.h > +++ b/drivers/net/ethernet/intel/idpf/idpf_mem.h > @@ -12,9 +12,9 @@ struct idpf_dma_mem { > size_t size; > }; > > -#define wr32(a, reg, value) writel((value), ((a)->hw_addr + (reg))) > -#define rd32(a, reg) readl((a)->hw_addr + (reg)) > -#define wr64(a, reg, value) writeq((value), ((a)->hw_addr + (reg))) > -#define rd64(a, reg) readq((a)->hw_addr + (reg)) > +#define wr32(a, reg, value) writel((value), ((a)->mbx.addr + (reg))) > +#define rd32(a, reg) readl((a)->mbx.addr + (reg)) > +#define wr64(a, reg, value) writeq((value), ((a)->mbx.addr + (reg))) > +#define rd64(a, reg) readq((a)->mbx.addr + (reg)) If you hardcode ->mbx.addr here, the names of these helpers must be changed. First of all, I'd expect 'idpf_' prefix. Second, they don't reflect they're for the mailbox *only*. IOW, I'd expect something like idpf_mbx_wr32() etc. [...] > +static int idpf_send_get_lan_memory_regions(struct idpf_adapter *adapter) > +{ > + struct virtchnl2_get_lan_memory_regions *rcvd_regions; > + struct idpf_vc_xn_params xn_params = {}; > + int num_regions, size, i; > + struct idpf_hw *hw; > + ssize_t reply_sz; > + int err = 0; > + > + rcvd_regions = kzalloc(IDPF_CTLQ_MAX_BUF_LEN, GFP_KERNEL); __free(kfree) for automatic freeing. > + if (!rcvd_regions) > + return -ENOMEM; > + > + xn_params.vc_op = VIRTCHNL2_OP_GET_LAN_MEMORY_REGIONS; > + xn_params.recv_buf.iov_base = rcvd_regions; > + xn_params.recv_buf.iov_len = IDPF_CTLQ_MAX_BUF_LEN; > + xn_params.timeout_ms = IDPF_VC_XN_DEFAULT_TIMEOUT_MSEC; Most of these can be declared right when you're initializing the structure itself. > + reply_sz = idpf_vc_xn_exec(adapter, &xn_params); > + if (reply_sz < 0) { > + err = reply_sz; > + goto send_get_lan_regions_out; > + } > + > + num_regions = le16_to_cpu(rcvd_regions->num_memory_regions); > + size = struct_size(rcvd_regions, mem_reg, num_regions); > + if (reply_sz < size) { > + err = -EIO; > + goto send_get_lan_regions_out; > + } > + > + if (size > IDPF_CTLQ_MAX_BUF_LEN) { > + err = -EINVAL; > + goto send_get_lan_regions_out; > + } > + > + hw = &adapter->hw; > + hw->lan_regs = > + kcalloc(num_regions, sizeof(struct idpf_mmio_reg), GFP_ATOMIC); 1. Invalid line split. 2. sizeof(*hw->lan_regs) as the second argument. 3. Why %GPF_ATOMIC here?! > + if (!hw->lan_regs) { > + err = -ENOMEM; > + goto send_get_lan_regions_out; > + } > + > + for (i = 0; i < num_regions; i++) { Declare @i here. > + hw->lan_regs[i].addr_len = > + le64_to_cpu(rcvd_regions->mem_reg[i].size); > + hw->lan_regs[i].addr_start = > + le64_to_cpu(rcvd_regions->mem_reg[i].start_offset); > + } > + hw->num_lan_regs = num_regions; > + > +send_get_lan_regions_out: > + kfree(rcvd_regions); > + > + return err; > +} > + > +/** > + * idpf_calc_remaining_mmio_regs - calcuate MMIO regions outside mbx and rstat > + * @adapter: Driver specific private structure > + * > + * Called when idpf_send_get_lan_memory_regions fails or is not supported. This > + * will calculate the offsets and sizes for the regions before, in between, and > + * after the mailbox and rstat MMIO mappings. > + */ > +static int idpf_calc_remaining_mmio_regs(struct idpf_adapter *adapter) > +{ > + struct idpf_hw *hw = &adapter->hw; > + > + hw->num_lan_regs = 3; Hardcode? > + hw->lan_regs = kcalloc(hw->num_lan_regs, > + sizeof(struct idpf_mmio_reg), > + GFP_ATOMIC); 1. sizeof(), see above. 2. %GFP_ATOMIC, see above. > + if (!hw->lan_regs) > + return -ENOMEM; > + > + /* Region preceding mailbox */ > + hw->lan_regs[0].addr_start = 0; > + hw->lan_regs[0].addr_len = adapter->dev_ops.mbx_reg_start; > + /* Region between mailbox and rstat */ > + hw->lan_regs[1].addr_start = adapter->dev_ops.mbx_reg_start + > + adapter->dev_ops.mbx_reg_sz; > + hw->lan_regs[1].addr_len = adapter->dev_ops.rstat_reg_start - > + hw->lan_regs[1].addr_start; > + /* Region after rstat */ > + hw->lan_regs[2].addr_start = adapter->dev_ops.rstat_reg_start + > + adapter->dev_ops.rstat_reg_sz; > + hw->lan_regs[2].addr_len = pci_resource_len(adapter->pdev, 0) - > + hw->lan_regs[2].addr_start; > + > + return 0; > +} > + > +/** > + * idpf_map_lan_mmio_regs - map remaining LAN BAR regions > + * @adapter: Driver specific private structure > + */ > +static int idpf_map_lan_mmio_regs(struct idpf_adapter *adapter) > +{ > + struct pci_dev *pdev = adapter->pdev; > + struct idpf_hw *hw = &adapter->hw; > + int i; > + > + for (i = 0; i < hw->num_lan_regs; i++) { @i, see above. > + resource_size_t res_start; > + long len; > + > + len = hw->lan_regs[i].addr_len; > + if (!len) > + continue; > + res_start = hw->lan_regs[i].addr_start; > + res_start += pci_resource_start(pdev, 0); Again calling this function multiple times. > + > + hw->lan_regs[i].addr = ioremap(res_start, len); devm/pcim/etc.? > + if (!hw->lan_regs[i].addr) { > + pci_err(pdev, "failed to allocate bar0 region\n"); > + return -ENOMEM; > + } > + } > + > + return 0; > +} [...] Thanks, Olek