On Mon, Feb 24, 2020 at 05:13:18PM +0100, Peter Zijlstra wrote: > Damn, true. That also means I need to fix the arm64 bits, and that's a > little more tricky. > > Something like so perhaps.. hmm? > > --- > --- a/arch/arm64/include/asm/hardirq.h > +++ b/arch/arm64/include/asm/hardirq.h > @@ -32,30 +32,52 @@ u64 smp_irq_stat_cpu(unsigned int cpu); > > struct nmi_ctx { > u64 hcr; > + unsigned int cnt; > }; > > DECLARE_PER_CPU(struct nmi_ctx, nmi_contexts); > > -#define arch_nmi_enter() \ > - do { \ > - if (is_kernel_in_hyp_mode() && !in_nmi()) { \ > - struct nmi_ctx *nmi_ctx = this_cpu_ptr(&nmi_contexts); \ > - nmi_ctx->hcr = read_sysreg(hcr_el2); \ > - if (!(nmi_ctx->hcr & HCR_TGE)) { \ > - write_sysreg(nmi_ctx->hcr | HCR_TGE, hcr_el2); \ > - isb(); \ > - } \ > - } \ > - } while (0) > +#define arch_nmi_enter() \ > +do { \ > + struct nmi_ctx *___ctx; \ > + unsigned int ___cnt; \ > + \ > + if (!is_kernel_in_hyp_mode() || in_nmi()) \ > + break; \ > + \ > + ___ctx = this_cpu_ptr(&nmi_contexts); \ > + ___cnt = ___ctx->cnt; \ > + if (!(___cnt & 1) && __cnt) { \ > + ___ctx->cnt += 2; \ > + break; \ > + } \ > + \ > + ___ctx->cnt |= 1; \ > + barrier(); \ > + nmi_ctx->hcr = read_sysreg(hcr_el2); \ > + if (!(nmi_ctx->hcr & HCR_TGE)) { \ > + write_sysreg(nmi_ctx->hcr | HCR_TGE, hcr_el2); \ > + isb(); \ > + } \ > + barrier(); \ Suppose the first NMI is interrupted here. nmi_ctx->hcr has HCR_TGE unset. The new NMI is going to overwrite nmi_ctx->hcr with HCR_TGE set. Then the first NMI will not restore the correct value upon arch_nmi_exit(). So perhaps the below, but I bet I overlooked something obvious. #define arch_nmi_enter() \ do { \ struct nmi_ctx *___ctx; \ u64 ___hcr; \ \ if (!is_kernel_in_hyp_mode()) \ break; \ \ ___ctx = this_cpu_ptr(&nmi_contexts); \ if (___ctx->cnt) { \ ___ctx->cnt++; \ break; \ } \ \ ___hcr = read_sysreg(hcr_el2); \ if (!(___hcr & HCR_TGE)) { \ write_sysreg(___hcr | HCR_TGE, hcr_el2); \ isb(); \ } \ ___ctx->cnt = 1; \ barrier(); \ ___ctx->hcr = ___hcr; \ } while (0) #define arch_nmi_exit() \ do { \ struct nmi_ctx *___ctx; \ u64 ___hcr; \ \ if (!is_kernel_in_hyp_mode()) \ break; \ \ ___ctx = this_cpu_ptr(&nmi_contexts); \ ___hcr = nmi_ctx->hcr; \ barrier(); \ --___ctx->cnt; \ barrier(); \ if (!___ctx->cnt && !(___hcr & HCR_TGE)) \ write_sysreg(___hcr, hcr_el2); \ } while (0)