On 2024/11/12 23:29, Marc Zyngier wrote: > On Tue, 12 Nov 2024 09:04:26 +0000, > Zhou Wang <wangzhou1@xxxxxxxxxxxxx> wrote: >> >> When enabling GICv4.1 in hip09, VMAPP will fail to clear some caches >> during unmapping operation, which will cause some vSGIs lost. >> >> To fix the issue, it needs to send VINVALL command after VMOVP. >> >> Signed-off-by: Nianyao Tang <tangnianyao@xxxxxxxxxx> >> Signed-off-by: Zhou Wang <wangzhou1@xxxxxxxxxxxxx> >> --- >> Documentation/arch/arm64/silicon-errata.rst | 2 ++ >> arch/arm64/Kconfig | 10 +++++++ >> drivers/irqchip/irq-gic-v3-its.c | 29 +++++++++++++++++++++ >> 3 files changed, 41 insertions(+) >> >> diff --git a/Documentation/arch/arm64/silicon-errata.rst b/Documentation/arch/arm64/silicon-errata.rst >> index 65bfab1b1861..77db10e944f0 100644 >> --- a/Documentation/arch/arm64/silicon-errata.rst >> +++ b/Documentation/arch/arm64/silicon-errata.rst >> @@ -258,6 +258,8 @@ stable kernels. >> | Hisilicon | Hip{08,09,10,10C| #162001900 | N/A | >> | | ,11} SMMU PMCG | | | >> +----------------+-----------------+-----------------+-----------------------------+ >> +| Hisilicon | Hip09 | #162100801 | HISILICON_ERRATUM_162100801 | >> ++----------------+-----------------+-----------------+-----------------------------+ >> +----------------+-----------------+-----------------+-----------------------------+ >> | Qualcomm Tech. | Kryo/Falkor v1 | E1003 | QCOM_FALKOR_ERRATUM_1003 | >> +----------------+-----------------+-----------------+-----------------------------+ >> diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig >> index 70d7f4f20225..d41cf6bf1458 100644 >> --- a/arch/arm64/Kconfig >> +++ b/arch/arm64/Kconfig >> @@ -1232,6 +1232,16 @@ config HISILICON_ERRATUM_161600802 >> >> If unsure, say Y. >> >> +config HISILICON_ERRATUM_162100801 >> + bool "Hip09 162100801 erratum support" >> + default y >> + help >> + When enabling GICv4.1 in hip09, VMAPP will fail to clear some caches >> + during unmapping operation, which will cause some vSGIs lost. >> + So fix it by sending VINVALL command after VMOVP. >> + >> + If unsure, say Y. >> + >> config QCOM_FALKOR_ERRATUM_1003 >> bool "Falkor E1003: Incorrect translation due to ASID change" >> default y >> diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c >> index 52f625e07658..e98d13d0adf9 100644 >> --- a/drivers/irqchip/irq-gic-v3-its.c >> +++ b/drivers/irqchip/irq-gic-v3-its.c >> @@ -44,6 +44,7 @@ >> #define ITS_FLAGS_WORKAROUND_CAVIUM_22375 (1ULL << 1) >> #define ITS_FLAGS_WORKAROUND_CAVIUM_23144 (1ULL << 2) >> #define ITS_FLAGS_FORCE_NON_SHAREABLE (1ULL << 3) >> +#define ITS_FLAGS_WORKAROUND_HISILICON_162100801 (1ULL << 4) >> >> #define RD_LOCAL_LPI_ENABLED BIT(0) >> #define RD_LOCAL_PENDTABLE_PREALLOCATED BIT(1) >> @@ -3804,6 +3805,7 @@ static int its_vpe_set_affinity(struct irq_data *d, >> struct its_vpe *vpe = irq_data_get_irq_chip_data(d); >> unsigned int from, cpu = nr_cpu_ids; >> struct cpumask *table_mask; >> + struct its_node *its; >> unsigned long flags; >> >> /* >> @@ -3866,6 +3868,17 @@ static int its_vpe_set_affinity(struct irq_data *d, >> vpe->col_idx = cpu; >> >> its_send_vmovp(vpe); >> + >> + /* >> + * Version of ITS is same in one system. As there is no cache in ITS, >> + * and only cache in related GICR should be clean, so one VINVALL is >> + * enough here. >> + */ >> + its = list_first_entry(&its_nodes, struct its_node, entry); >> + if ((its->flags & ITS_FLAGS_WORKAROUND_HISILICON_162100801) && >> + is_v4_1(its)) >> + its_send_vinvall(its, vpe); > > Can this be done using the GICR_INVALLR register instead? I would > expect it to be a bit better performance wise, as it doesn't require > to take a global lock. Something like the hack below (untested). > > Thanks, > > M. Hi Marc, Many thanks for your suggestion! It is OK in theory and it works fine in test as well. I will send V3 soon. Best, Zhou > > diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c > index fdec478ba5e7..d30d6ee727c1 100644 > --- a/drivers/irqchip/irq-gic-v3-its.c > +++ b/drivers/irqchip/irq-gic-v3-its.c > @@ -62,6 +62,8 @@ static u32 lpi_id_bits; > > static u8 __ro_after_init lpi_prop_prio; > > +static struct its_node *find_4_1_its(void); > + > /* > * Collection structure - just an ID, and a redistributor address to > * ping. We use one per CPU as a bag of interrupts assigned to this > @@ -3797,6 +3799,22 @@ static void its_vpe_db_proxy_move(struct its_vpe *vpe, int from, int to) > raw_spin_unlock_irqrestore(&vpe_proxy.lock, flags); > } > > +static void its_vpe_4_1_invall_locked(int cpu, struct its_vpe *vpe) > +{ > + void __iomem *rdbase; > + u64 val; > + > + val = GICR_INVALLR_V; > + val |= FIELD_PREP(GICR_INVALLR_VPEID, vpe->vpe_id); > + > + raw_spin_lock(&gic_data_rdist_cpu(cpu)->rd_lock); > + rdbase = per_cpu_ptr(gic_rdists->rdist, cpu)->rd_base; > + gic_write_lpir(val, rdbase + GICR_INVALLR); > + > + wait_for_syncr(rdbase); > + raw_spin_unlock(&gic_data_rdist_cpu(cpu)->rd_lock); > +} > + > static int its_vpe_set_affinity(struct irq_data *d, > const struct cpumask *mask_val, > bool force) > @@ -3849,6 +3867,10 @@ static int its_vpe_set_affinity(struct irq_data *d, > vpe->col_idx = cpu; > > its_send_vmovp(vpe); > + > + if (find_4_1_its()->flags & ITS_FLAGS_WORKAROUND_HISILICON_162100801) > + its_vpe_4_1_invall_locked(cpu, vpe); > + > its_vpe_db_proxy_move(vpe, from, cpu); > > out: > @@ -4156,22 +4178,12 @@ static void its_vpe_4_1_deschedule(struct its_vpe *vpe, > > static void its_vpe_4_1_invall(struct its_vpe *vpe) > { > - void __iomem *rdbase; > unsigned long flags; > - u64 val; > int cpu; > > - val = GICR_INVALLR_V; > - val |= FIELD_PREP(GICR_INVALLR_VPEID, vpe->vpe_id); > - > /* Target the redistributor this vPE is currently known on */ > cpu = vpe_to_cpuid_lock(vpe, &flags); > - raw_spin_lock(&gic_data_rdist_cpu(cpu)->rd_lock); > - rdbase = per_cpu_ptr(gic_rdists->rdist, cpu)->rd_base; > - gic_write_lpir(val, rdbase + GICR_INVALLR); > - > - wait_for_syncr(rdbase); > - raw_spin_unlock(&gic_data_rdist_cpu(cpu)->rd_lock); > + its_vpe_4_1_invall_locked(cpu, vpe); > vpe_to_cpuid_unlock(vpe, flags); > } > >