Re: [PATCH v10 2/2] irqchip/loongarch-avec: Add AVEC irqchip support

Thomas Gleixner <tglx@xxxxxxxxxxxxx> · Tue, 20 Aug 2024 18:29:29 +0200

On Thu, Aug 15 2024 at 19:26, Tianyang Zhang wrote:
>  .../arch/loongarch/irq-chip-model.rst         |  32 ++
>  .../zh_CN/arch/loongarch/irq-chip-model.rst   |  32 ++
>  arch/loongarch/Kconfig                        |   1 +
>  arch/loongarch/include/asm/cpu-features.h     |   1 +
>  arch/loongarch/include/asm/cpu.h              |   2 +
>  arch/loongarch/include/asm/hardirq.h          |   3 +-
>  arch/loongarch/include/asm/hw_irq.h           |   2 +
>  arch/loongarch/include/asm/irq.h              |  25 +-
>  arch/loongarch/include/asm/loongarch.h        |  18 +-
>  arch/loongarch/include/asm/smp.h              |   2 +
>  arch/loongarch/kernel/cpu-probe.c             |   3 +-
>  arch/loongarch/kernel/irq.c                   |  15 +-
>  arch/loongarch/kernel/paravirt.c              |   5 +
>  arch/loongarch/kernel/smp.c                   |   6 +
>  drivers/irqchip/Makefile                      |   2 +-
>  drivers/irqchip/irq-loongarch-avec.c          | 426 ++++++++++++++++++
>  drivers/irqchip/irq-loongarch-cpu.c           |   5 +-
>  drivers/irqchip/irq-loongson-eiointc.c        |   7 +-
>  drivers/irqchip/irq-loongson-pch-msi.c        |  24 +-
>  include/linux/cpuhotplug.h                    |   3 +-

This patch is doing too many things at once and is absolutely not
reviewable.

Please split it up into the obvious bits and pieces:

   1) The IRQ_NOPROBE change

   2) See below

   3) Documentation

   4) Add the arch/loongson parts, i.e. all the defines and
      basic required function prototypes with a little twist.
      Add a Kconfig symbol:

	Kconfig IRQ_LOONGARCH_AVEC
        	bool  

      in drivers/irqchip/Kconfig. This allows you to add all
      arch/loongarch/ changes with the simple tweak:

      #ifdef CONFIG_IRQ_LOONGARCH_AVEC
      # define cpu_has_avecint		cpu_opt(LOONGARCH_CPU_AVECINT)
      #else
      # define cpu_has_avecint		false
      #endif

      and

      #ifdef CONFIG_IRQ_LOONGARCH_AVEC
      # define SMP_CLEAR_VECTOR		BIT(ACTION_CLEAR_VECTOR)
      #else
      # define SMP_CLEAR_VECTOR		(0)
      #endif

      That way the compiler will optimize out stuff like the
      SMP_CLEAR_VECTOR handling and you only need the prototype of
      complete_irq_moving(), but no implementation.

   5) Change the CPU hotplug callback for EOINTC and do
      the acpi_cascade_irqdomain_init() change.

   6) Prepare get_pch_msi_handle() in the pch MSI driver

   7) Implement the driver and select IRQ_LOONGARCH_AVEC
      from IRQ_LOONGARCH_CPU

   8) Remove the IRQ_LOONGARCH_AVEC helpers

> diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
> index 70f169210b52..0e3abf7b0bd3 100644
> --- a/arch/loongarch/Kconfig
> +++ b/arch/loongarch/Kconfig
> @@ -85,6 +85,7 @@ config LOONGARCH
>  	select GENERIC_ENTRY
>  	select GENERIC_GETTIMEOFDAY
>  	select GENERIC_IOREMAP if !ARCH_IOREMAP
> +	select GENERIC_IRQ_MATRIX_ALLOCATOR

Please move this to IRQ_LOONGARCH_CPU in patch #7

> @@ -92,15 +103,21 @@ int liointc_acpi_init(struct irq_domain *parent,
>  					struct acpi_madt_lio_pic *acpi_liointc);
>  int eiointc_acpi_init(struct irq_domain *parent,
>  					struct acpi_madt_eio_pic *acpi_eiointc);
> +int avecintc_acpi_init(struct irq_domain *parent);
> +
> +void complete_irq_moving(void);
>  
>  int htvec_acpi_init(struct irq_domain *parent,
>  					struct acpi_madt_ht_pic *acpi_htvec);
>  int pch_lpc_acpi_init(struct irq_domain *parent,
>  					struct acpi_madt_lpc_pic *acpi_pchlpc);
> -int pch_msi_acpi_init(struct irq_domain *parent,
> -					struct acpi_madt_msi_pic *acpi_pchmsi);
>  int pch_pic_acpi_init(struct irq_domain *parent,
>  					struct acpi_madt_bio_pic *acpi_pchpic);
> +int pch_msi_acpi_init(struct irq_domain *parent,
> +					struct acpi_madt_msi_pic *acpi_pchmsi);
> +int pch_msi_acpi_init_v2(struct irq_domain *parent,
> +					struct acpi_madt_msi_pic *acpi_pchmsi);

This is really the wrong place for all these prototypes. They are only
used in drivers/irqchip/... except for complete_irq_moving().

So the proper place for them is drivers/irqchip/irq-loongarch.h

Move them there. This is patch #2 which I referred to above.

>
> +static phys_addr_t msi_base_addr;
>

So you have everything related to the avec chip in loongarch_avec, so
why don't you move that into that data structure?

> +struct avecintc_chip {
> +	struct fwnode_handle	*fwnode;
> +	struct irq_domain	*domain;
> +	struct irq_matrix	*vector_matrix;
> +	raw_spinlock_t		lock;
> +};

The lock should be the first member as spinlocks have alignment
requirements....

> +static int avecintc_domain_alloc(struct irq_domain *domain,
> +				 unsigned int virq, unsigned int nr_irqs, void *arg)
> +{
> +	guard(raw_spinlock_irqsave)(&loongarch_avec.lock);
> +
> +	for (unsigned int i = 0; i < nr_irqs; i++) {
> +		struct irq_data *irqd = irq_domain_get_irq_data(domain, virq + i);
> +		struct avecintc_data *adata = kzalloc(sizeof(*adata), GFP_KERNEL);

That was never tested with any debug. You _cannot_ do a GFP_KERNEL
allocation with the raw spinlock held. And no, don't use
GFP_ATOMIC. There is absolutely zero reason to hold the lock accross all
of that. As you got your ideas from x86_vector_alloc_irqs(), you could
have looked at how that's done correctly.

> +		unsigned int cpu, ret;
> +
> +		if (!adata)
> +			return -ENOMEM;
> +
> +		ret = irq_matrix_alloc(loongarch_avec.vector_matrix, cpu_online_mask, false, &cpu);
> +		if (ret < 0) {
> +			kfree(adata);
> +			return ret;
> +		}
> +
> +		adata->moving = 0;

Redundant. The struct is allocated with kzalloc()...

> +		adata->prev_cpu = adata->cpu = cpu;
> +		adata->prev_vec = adata->vec = ret;
> +		adata->managed = irqd_affinity_is_managed(irqd);

If you want to support managed interrupts, then you cannot allocate
from the CPU online mask. See x86...

> +		irq_domain_set_info(domain, virq + i, virq + i, &avec_irq_controller,
> +				    adata, handle_edge_irq, NULL, NULL);
> +		irqd_set_single_target(irqd);
> +		irqd_set_affinity_on_activate(irqd);
> +
> +		per_cpu_ptr(irq_map, adata->cpu)[adata->vec] = irq_data_to_desc(irqd);

static int avecintc_alloc_vector(struct avecintc_adata *adata)
{
        int ret, cpu;

	guard(raw_spinlock_irqsave)(&loongarch_avec.lock);
	ret = irq_matrix_alloc(loongarch_avec.vector_matrix, cpu_online_mask, false, &cpu);
        if (ret < 0)
              return ret;

	adata->prev_cpu = adata->cpu = cpu;
	adata->prev_vec = adata->vec = ret;
        per_cpu_ptr(irq_map, adata->cpu)[adata->vec] = irq_data_to_desc(irqd);
        return 0;
}

static int avecintc_domain_alloc(struct irq_domain *domain, ...)
{
	for (unsigned int i = 0; i < nr_irqs; i++) {
		struct irq_data *irqd = irq_domain_get_irq_data(domain, virq + i);
		struct avecintc_data *adata = kzalloc(sizeof(*adata), GFP_KERNEL);
                int ret;

		if (!adata)
			return -ENOMEM;

		irq_domain_set_info(domain, virq + i, virq + i, &avec_irq_controller,
				    adata, handle_edge_irq, NULL, NULL);
		irqd_set_single_target(irqd);
		irqd_set_affinity_on_activate(irqd);

		ret = avecintc_alloc_vector(adata);
                if (ret < 0) {
			kfree(adata);
                        return ret;
                }
         }
No?

> +static void clear_free_vector(struct irq_data *irqd)
> +{
> +	struct avecintc_data *adata = irq_data_get_irq_chip_data(irqd);
> +	bool managed = irqd_affinity_is_managed(irqd);

Don't even try. Your managed support is broken at the allocation side
and at several other places.

> +	per_cpu(irq_map, adata->cpu)[adata->vec] = NULL;
> +	irq_matrix_free(loongarch_avec.vector_matrix, adata->cpu, adata->vec, managed);
> +	adata->cpu = UINT_MAX;
> +	adata->vec = UINT_MAX;
> +
> +#ifdef CONFIG_SMP
> +	if (!adata->moving)
> +		return;
> +
> +	per_cpu(irq_map, adata->prev_cpu)[adata->prev_vec] = NULL;
> +	irq_matrix_free(loongarch_avec.vector_matrix,
> +			adata->prev_cpu, adata->prev_vec, adata->managed);
> +	adata->moving = 0;
> +	adata->prev_vec = UINT_MAX;
> +	adata->prev_cpu = UINT_MAX;

What's all the clearing for when you kfree() it two lines further down?

> +	list_del_init(&adata->entry);
> +#endif
> +	kfree(adata);

And no, not under the lock .... Move the locking into this function and
kfree() at the call site. There is zero reason to hold the lock over the
full loop.

> +static int __init pch_msi_parse_madt(union acpi_subtable_headers *header,
> +				     const unsigned long end)
> +{
> +	struct acpi_madt_msi_pic *pchmsi_entry = (struct acpi_madt_msi_pic *)header;
> +
> +	msi_base_addr = pchmsi_entry->msg_address - AVEC_MSG_OFFSET;
> +
> +	return pch_msi_acpi_init_v2(loongarch_avec.domain, pchmsi_entry);
> +}

...

> +int __init pch_msi_acpi_init_v2(struct irq_domain *parent, struct acpi_madt_msi_pic *acpi_pchmsi)

The second argument is required because?

Thanks,

        tglx