Hi Marc, Thanks for your review. On Tue, 2021-03-09 at 11:23 +0000, Marc Zyngier wrote: > On Wed, 24 Feb 2021 06:11:30 +0000, > Jianjun Wang <jianjun.wang@xxxxxxxxxxxx> wrote: > > > > Add MSI support for MediaTek Gen3 PCIe controller. > > > > This PCIe controller supports up to 256 MSI vectors, the MSI hardware > > block diagram is as follows: > > > > +-----+ > > | GIC | > > +-----+ > > ^ > > | > > port->irq > > | > > +-+-+-+-+-+-+-+-+ > > |0|1|2|3|4|5|6|7| (PCIe intc) > > +-+-+-+-+-+-+-+-+ > > ^ ^ ^ > > | | ... | > > +-------+ +------+ +-----------+ > > | | | > > +-+-+---+--+--+ +-+-+---+--+--+ +-+-+---+--+--+ > > |0|1|...|30|31| |0|1|...|30|31| |0|1|...|30|31| (MSI sets) > > +-+-+---+--+--+ +-+-+---+--+--+ +-+-+---+--+--+ > > ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ > > | | | | | | | | | | | | (MSI vectors) > > | | | | | | | | | | | | > > > > (MSI SET0) (MSI SET1) ... (MSI SET7) > > > > With 256 MSI vectors supported, the MSI vectors are composed of 8 sets, > > each set has its own address for MSI message, and supports 32 MSI vectors > > to generate interrupt. > > > > Signed-off-by: Jianjun Wang <jianjun.wang@xxxxxxxxxxxx> > > Acked-by: Ryder Lee <ryder.lee@xxxxxxxxxxxx> > > --- > > drivers/pci/controller/pcie-mediatek-gen3.c | 277 ++++++++++++++++++++ > > 1 file changed, 277 insertions(+) > > > > diff --git a/drivers/pci/controller/pcie-mediatek-gen3.c b/drivers/pci/controller/pcie-mediatek-gen3.c > > index 8b3b5f838b69..3cbec22ece0c 100644 > > --- a/drivers/pci/controller/pcie-mediatek-gen3.c > > +++ b/drivers/pci/controller/pcie-mediatek-gen3.c > > @@ -14,6 +14,7 @@ > > #include <linux/irqdomain.h> > > #include <linux/kernel.h> > > #include <linux/module.h> > > +#include <linux/msi.h> > > #include <linux/pci.h> > > #include <linux/phy/phy.h> > > #include <linux/platform_device.h> > > @@ -48,12 +49,29 @@ > > #define PCIE_LINK_STATUS_REG 0x154 > > #define PCIE_PORT_LINKUP BIT(8) > > > > +#define PCIE_MSI_SET_NUM 8 > > +#define PCIE_MSI_IRQS_PER_SET 32 > > +#define PCIE_MSI_IRQS_NUM \ > > + (PCIE_MSI_IRQS_PER_SET * PCIE_MSI_SET_NUM) > > + > > #define PCIE_INT_ENABLE_REG 0x180 > > +#define PCIE_MSI_ENABLE GENMASK(PCIE_MSI_SET_NUM + 8 - 1, 8) > > +#define PCIE_MSI_SHIFT 8 > > #define PCIE_INTX_SHIFT 24 > > #define PCIE_INTX_ENABLE \ > > GENMASK(PCIE_INTX_SHIFT + PCI_NUM_INTX - 1, PCIE_INTX_SHIFT) > > > > #define PCIE_INT_STATUS_REG 0x184 > > +#define PCIE_MSI_SET_ENABLE_REG 0x190 > > +#define PCIE_MSI_SET_ENABLE GENMASK(PCIE_MSI_SET_NUM - 1, 0) > > + > > +#define PCIE_MSI_SET_BASE_REG 0xc00 > > +#define PCIE_MSI_SET_OFFSET 0x10 > > +#define PCIE_MSI_SET_STATUS_OFFSET 0x04 > > +#define PCIE_MSI_SET_ENABLE_OFFSET 0x08 > > + > > +#define PCIE_MSI_SET_ADDR_HI_BASE 0xc80 > > +#define PCIE_MSI_SET_ADDR_HI_OFFSET 0x04 > > > > #define PCIE_TRANS_TABLE_BASE_REG 0x800 > > #define PCIE_ATR_SRC_ADDR_MSB_OFFSET 0x4 > > @@ -73,6 +91,16 @@ > > #define PCIE_ATR_TLP_TYPE_MEM PCIE_ATR_TLP_TYPE(0) > > #define PCIE_ATR_TLP_TYPE_IO PCIE_ATR_TLP_TYPE(2) > > > > +/** > > + * struct mtk_pcie_msi - MSI information for each set > > + * @base: IO mapped register base > > + * @msg_addr: MSI message address > > + */ > > +struct mtk_msi_set { > > + void __iomem *base; > > + phys_addr_t msg_addr; > > +}; > > + > > /** > > * struct mtk_pcie_port - PCIe port information > > * @dev: pointer to PCIe device > > @@ -86,6 +114,11 @@ > > * @irq: PCIe controller interrupt number > > * @irq_lock: lock protecting IRQ register access > > * @intx_domain: legacy INTx IRQ domain > > + * @msi_domain: MSI IRQ domain > > + * @msi_bottom_domain: MSI IRQ bottom domain > > + * @msi_sets: MSI sets information > > + * @lock: lock protecting IRQ bit map > > + * @msi_irq_in_use: bit map for assigned MSI IRQ > > */ > > struct mtk_pcie_port { > > struct device *dev; > > @@ -100,6 +133,11 @@ struct mtk_pcie_port { > > int irq; > > raw_spinlock_t irq_lock; > > struct irq_domain *intx_domain; > > + struct irq_domain *msi_domain; > > + struct irq_domain *msi_bottom_domain; > > + struct mtk_msi_set msi_sets[PCIE_MSI_SET_NUM]; > > + struct mutex lock; > > + DECLARE_BITMAP(msi_irq_in_use, PCIE_MSI_IRQS_NUM); > > }; > > > > /** > > @@ -197,6 +235,35 @@ static int mtk_pcie_set_trans_table(struct mtk_pcie_port *port, > > return 0; > > } > > > > +static void mtk_pcie_enable_msi(struct mtk_pcie_port *port) > > +{ > > + int i; > > + u32 val; > > + > > + val = readl_relaxed(port->base + PCIE_MSI_SET_ENABLE_REG); > > + val |= PCIE_MSI_SET_ENABLE; > > + writel_relaxed(val, port->base + PCIE_MSI_SET_ENABLE_REG); > > + > > + val = readl_relaxed(port->base + PCIE_INT_ENABLE_REG); > > + val |= PCIE_MSI_ENABLE; > > + writel_relaxed(val, port->base + PCIE_INT_ENABLE_REG); > > Shouldn't you configure the capture addresses *before* enabling > things? Is there any need for locking here, given that you are > modifying global registers? Yes, I will move these codes to the back of the configure capture address in the next version. I think the lock may not be needed because this function is only executed once when driver probe. > > > + > > + for (i = 0; i < PCIE_MSI_SET_NUM; i++) { > > + struct mtk_msi_set *msi_set = &port->msi_sets[i]; > > + > > + msi_set->base = port->base + PCIE_MSI_SET_BASE_REG + > > + i * PCIE_MSI_SET_OFFSET; > > + msi_set->msg_addr = port->reg_base + PCIE_MSI_SET_BASE_REG + > > + i * PCIE_MSI_SET_OFFSET; > > + > > + /* Configure the MSI capture address */ > > + writel_relaxed(lower_32_bits(msi_set->msg_addr), msi_set->base); > > + writel_relaxed(upper_32_bits(msi_set->msg_addr), > > + port->base + PCIE_MSI_SET_ADDR_HI_BASE + > > + i * PCIE_MSI_SET_ADDR_HI_OFFSET); > > + } > > +} > > + > > static int mtk_pcie_startup_port(struct mtk_pcie_port *port) > > { > > struct resource_entry *entry; > > @@ -247,6 +314,8 @@ static int mtk_pcie_startup_port(struct mtk_pcie_port *port) > > return err; > > } > > > > + mtk_pcie_enable_msi(port); > > + > > /* Set PCIe translation windows */ > > resource_list_for_each_entry(entry, &host->windows) { > > struct resource *res = entry->res; > > @@ -290,6 +359,148 @@ static int mtk_pcie_set_affinity(struct irq_data *data, > > return -EINVAL; > > } > > > > +static void mtk_pcie_irq_mask(struct irq_data *data) > > It'd be good if you used _msi_ in function names that deal with MSIs. OK, I will fix it in the next version. > > > +{ > > + pci_msi_mask_irq(data); > > + irq_chip_mask_parent(data); > > +} > > + > > +static void mtk_pcie_irq_unmask(struct irq_data *data) > > +{ > > + pci_msi_unmask_irq(data); > > + irq_chip_unmask_parent(data); > > +} > > + > > +static struct irq_chip mtk_msi_irq_chip = { > > + .name = "MSI", > > + .irq_enable = mtk_pcie_irq_unmask, > > + .irq_disable = mtk_pcie_irq_mask, > > Same comment as for the previous patch: enable/disable serve no > purpose here. Replied in the previous patch, the enable/disable callback is used when the system suspend/resume. > > > + .irq_ack = irq_chip_ack_parent, > > + .irq_mask = mtk_pcie_irq_mask, > > + .irq_unmask = mtk_pcie_irq_unmask, > > +}; > > + > > +static struct msi_domain_info mtk_msi_domain_info = { > > + .flags = (MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_PCI_MSIX | > > + MSI_FLAG_USE_DEF_CHIP_OPS | MSI_FLAG_MULTI_PCI_MSI), > > minor nit: keep the *_OPS flag on one line, and the *_PCI_* flags on > another. Sure, I will fix it in the next version. > > > + .chip = &mtk_msi_irq_chip, > > +}; > > + > > +static void mtk_compose_msi_msg(struct irq_data *data, struct msi_msg *msg) > > +{ > > + struct mtk_msi_set *msi_set = irq_data_get_irq_chip_data(data); > > + struct mtk_pcie_port *port = data->domain->host_data; > > + unsigned long hwirq; > > + > > + hwirq = data->hwirq % PCIE_MSI_IRQS_PER_SET; > > + > > + msg->address_hi = upper_32_bits(msi_set->msg_addr); > > + msg->address_lo = lower_32_bits(msi_set->msg_addr); > > + msg->data = hwirq; > > + dev_dbg(port->dev, "msi#%#lx address_hi %#x address_lo %#x data %d\n", > > + hwirq, msg->address_hi, msg->address_lo, msg->data); > > +} > > + > > +static void mtk_msi_bottom_irq_ack(struct irq_data *data) > > +{ > > + struct mtk_msi_set *msi_set = irq_data_get_irq_chip_data(data); > > + unsigned long hwirq; > > + > > + hwirq = data->hwirq % PCIE_MSI_IRQS_PER_SET; > > + > > + writel_relaxed(BIT(hwirq), msi_set->base + PCIE_MSI_SET_STATUS_OFFSET); > > +} > > + > > +static void mtk_msi_bottom_irq_mask(struct irq_data *data) > > +{ > > + struct mtk_msi_set *msi_set = irq_data_get_irq_chip_data(data); > > + struct mtk_pcie_port *port = data->domain->host_data; > > + unsigned long hwirq, flags; > > + u32 val; > > + > > + hwirq = data->hwirq % PCIE_MSI_IRQS_PER_SET; > > + > > + raw_spin_lock_irqsave(&port->irq_lock, flags); > > + val = readl_relaxed(msi_set->base + PCIE_MSI_SET_ENABLE_OFFSET); > > + val &= ~BIT(hwirq); > > + writel_relaxed(val, msi_set->base + PCIE_MSI_SET_ENABLE_OFFSET); > > + raw_spin_unlock_irqrestore(&port->irq_lock, flags); > > +} > > + > > +static void mtk_msi_bottom_irq_unmask(struct irq_data *data) > > +{ > > + struct mtk_msi_set *msi_set = irq_data_get_irq_chip_data(data); > > + struct mtk_pcie_port *port = data->domain->host_data; > > + unsigned long hwirq, flags; > > + u32 val; > > + > > + hwirq = data->hwirq % PCIE_MSI_IRQS_PER_SET; > > + > > + raw_spin_lock_irqsave(&port->irq_lock, flags); > > + val = readl_relaxed(msi_set->base + PCIE_MSI_SET_ENABLE_OFFSET); > > + val |= BIT(hwirq); > > + writel_relaxed(val, msi_set->base + PCIE_MSI_SET_ENABLE_OFFSET); > > + raw_spin_unlock_irqrestore(&port->irq_lock, flags); > > +} > > + > > +static struct irq_chip mtk_msi_bottom_irq_chip = { > > + .irq_ack = mtk_msi_bottom_irq_ack, > > + .irq_mask = mtk_msi_bottom_irq_mask, > > + .irq_unmask = mtk_msi_bottom_irq_unmask, > > + .irq_compose_msi_msg = mtk_compose_msi_msg, > > + .irq_set_affinity = mtk_pcie_set_affinity, > > + .name = "MSI", > > +}; > > + > > +static int mtk_msi_bottom_domain_alloc(struct irq_domain *domain, > > + unsigned int virq, unsigned int nr_irqs, > > + void *arg) > > +{ > > + struct mtk_pcie_port *port = domain->host_data; > > + struct mtk_msi_set *msi_set; > > + int i, hwirq, set_idx; > > + > > + mutex_lock(&port->lock); > > + > > + hwirq = bitmap_find_free_region(port->msi_irq_in_use, PCIE_MSI_IRQS_NUM, > > + order_base_2(nr_irqs)); > > + > > + mutex_unlock(&port->lock); > > + > > + if (hwirq < 0) > > + return -ENOSPC; > > + > > + set_idx = hwirq / PCIE_MSI_IRQS_PER_SET; > > + msi_set = &port->msi_sets[set_idx]; > > + > > + for (i = 0; i < nr_irqs; i++) > > + irq_domain_set_info(domain, virq + i, hwirq + i, > > + &mtk_msi_bottom_irq_chip, msi_set, > > + handle_edge_irq, NULL, NULL); > > + > > + return 0; > > +} > > + > > +static void mtk_msi_bottom_domain_free(struct irq_domain *domain, > > + unsigned int virq, unsigned int nr_irqs) > > +{ > > + struct mtk_pcie_port *port = domain->host_data; > > + struct irq_data *data = irq_domain_get_irq_data(domain, virq); > > + > > + mutex_lock(&port->lock); > > + > > + bitmap_clear(port->msi_irq_in_use, data->hwirq, nr_irqs); > > + > > + mutex_unlock(&port->lock); > > + > > + irq_domain_free_irqs_common(domain, virq, nr_irqs); > > +} > > + > > +static const struct irq_domain_ops mtk_msi_bottom_domain_ops = { > > + .alloc = mtk_msi_bottom_domain_alloc, > > + .free = mtk_msi_bottom_domain_free, > > +}; > > + > > static void mtk_intx_mask(struct irq_data *data) > > { > > struct mtk_pcie_port *port = irq_data_get_irq_chip_data(data); > > @@ -360,6 +571,7 @@ static int mtk_pcie_init_irq_domains(struct mtk_pcie_port *port) > > { > > struct device *dev = port->dev; > > struct device_node *intc_node, *node = dev->of_node; > > + int ret; > > > > raw_spin_lock_init(&port->irq_lock); > > > > @@ -377,7 +589,34 @@ static int mtk_pcie_init_irq_domains(struct mtk_pcie_port *port) > > return -ENODEV; > > } > > > > + /* Setup MSI */ > > + mutex_init(&port->lock); > > + > > + port->msi_bottom_domain = irq_domain_add_linear(node, PCIE_MSI_IRQS_NUM, > > + &mtk_msi_bottom_domain_ops, port); > > + if (!port->msi_bottom_domain) { > > + dev_info(dev, "failed to create MSI bottom domain\n"); > > + ret = -ENODEV; > > + goto err_msi_bottom_domain; > > + } > > + > > + port->msi_domain = pci_msi_create_irq_domain(dev->fwnode, > > + &mtk_msi_domain_info, > > + port->msi_bottom_domain); > > + if (!port->msi_domain) { > > + dev_info(dev, "failed to create MSI domain\n"); > > + ret = -ENODEV; > > + goto err_msi_domain; > > + } > > + > > return 0; > > + > > +err_msi_domain: > > + irq_domain_remove(port->msi_bottom_domain); > > +err_msi_bottom_domain: > > + irq_domain_remove(port->intx_domain); > > + > > + return ret; > > } > > > > static void mtk_pcie_irq_teardown(struct mtk_pcie_port *port) > > @@ -387,9 +626,39 @@ static void mtk_pcie_irq_teardown(struct mtk_pcie_port *port) > > if (port->intx_domain) > > irq_domain_remove(port->intx_domain); > > > > + if (port->msi_domain) > > + irq_domain_remove(port->msi_domain); > > + > > + if (port->msi_bottom_domain) > > + irq_domain_remove(port->msi_bottom_domain); > > + > > irq_dispose_mapping(port->irq); > > } > > > > +static void mtk_pcie_msi_handler(struct mtk_pcie_port *port, int set_idx) > > +{ > > + struct mtk_msi_set *msi_set = &port->msi_sets[set_idx]; > > + unsigned long msi_enable, msi_status; > > + unsigned int virq; > > + irq_hw_number_t bit, hwirq; > > + > > + msi_enable = readl_relaxed(msi_set->base + PCIE_MSI_SET_ENABLE_OFFSET); > > + > > + do { > > + msi_status = readl_relaxed(msi_set->base + > > + PCIE_MSI_SET_STATUS_OFFSET); > > + msi_status &= msi_enable; > > + if (!msi_status) > > + break; > > + > > + for_each_set_bit(bit, &msi_status, PCIE_MSI_IRQS_PER_SET) { > > + hwirq = bit + set_idx * PCIE_MSI_IRQS_PER_SET; > > + virq = irq_find_mapping(port->msi_bottom_domain, hwirq); > > + generic_handle_irq(virq); > > + } > > + } while (true); > > +} > > + > > static void mtk_pcie_irq_handler(struct irq_desc *desc) > > { > > struct mtk_pcie_port *port = irq_desc_get_handler_data(desc); > > @@ -408,6 +677,14 @@ static void mtk_pcie_irq_handler(struct irq_desc *desc) > > generic_handle_irq(virq); > > } > > > > + irq_bit = PCIE_MSI_SHIFT; > > + for_each_set_bit_from(irq_bit, &status, PCIE_MSI_SET_NUM + > > + PCIE_MSI_SHIFT) { > > + mtk_pcie_msi_handler(port, irq_bit - PCIE_MSI_SHIFT); > > + > > + writel_relaxed(BIT(irq_bit), port->base + PCIE_INT_STATUS_REG); > > Isn't this write the same thing you have for EOI in the INTx case? > While I could understand your description in that case (this is a > resampling operation), I don't get what this does here. Either this is > also an EOI, but your initial description doesn't make sense, or it is > an Ack, and it should be moved to the right place. > > Which one is it? I think it should be an EOI which used to clear the interrupt status of a single set in the PCIe intc field, maybe I should move it to the end of the mtk_pcie_msi_handler() function. +-----+ | GIC | +-----+ ^ | port->irq | +-+-+-+-+-+-+-+-+ |0|1|2|3|4|5|6|7| (PCIe intc) +-+-+-+-+-+-+-+-+ ^ ^ ^ | | ... | +-------+ +------+ +-----------+ | | | +-+-+---+--+--+ +-+-+---+--+--+ +-+-+---+--+--+ |0|1|...|30|31| |0|1|...|30|31| |0|1|...|30|31| (MSI sets) +-+-+---+--+--+ +-+-+---+--+--+ +-+-+---+--+--+ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ ^ | | | | | | | | | | | | (MSI vectors) | | | | | | | | | | | | (MSI SET0) (MSI SET1) ... (MSI SET7) I would like to ask another question. In this interrupt architecture, we cannot implement an affinity for PCIe interrupts, so we return a negative value in the mtk_pcie_set_affinity callback as follows: +static int mtk_pcie_set_affinity(struct irq_data *data, + const struct cpumask *mask, bool force) +{ + return -EINVAL; +} But there will always be error logs when hotplug a CPU: ~ # echo 0 > /sys/devices/system/cpu/cpu1/online [ 93.633059] IRQ255: set affinity failed(-22). [ 93.633624] IRQ256: set affinity failed(-22). [ 93.634222] CPU1: shutdown [ 93.634586] psci: CPU1 killed (polled 0 ms) Or when the system suspends: ~ # echo mem > /sys/power/state [ 93.635145] cpuhp: cpu_off cluster=0, cpu=1 [ 169.835653] PM: suspend entry (deep) [ 169.836717] Filesystems sync: 0.000 seconds [ 169.837924] Freezing user space processes ... (elapsed 0.001 seconds) done. [ 169.839922] OOM killer disabled. [ 169.840336] Freezing remaining freezable tasks ... (elapsed 0.001 seconds) done. [ 169.844715] Disabling non-boot CPUs ... [ 169.846443] IRQ255: set affinity failed(-22). [ 169.847002] IRQ256: set affinity failed(-22). [ 169.847586] CPU2: shutdown [ 169.847943] psci: CPU2 killed (polled 0 ms) [ 169.848489] cpuhp: cpu_off cluster=0, cpu=2 [ 169.850285] IRQ255: set affinity failed(-22). [ 169.851369] IRQ256: set affinity failed(-22). ... Sometimes this can cause misunderstandings to users, do we have a chance to prevent this error log? > > Thanks, > > M. > Thanks.