Re: [PATCH 08/16] ARM: KVM: vgic: don't rely on maintenance interrupt to be nearly synchronous

Marc Zyngier <marc.zyngier@xxxxxxx> · Fri, 14 Dec 2012 10:34:33 +0000

On 13/12/12 21:52, Christoffer Dall wrote:
> 
> 
> On Thursday, December 6, 2012, Marc Zyngier wrote:
> 
>     The VGIC code strongly relies on the maintenance interrupt to have
>     a very low latency, assuming it to be nearly synchronous. This is
>     wrong, as the architecture doesn't guarantee such thing.
> 
>     This means the interrupt may be handled too late, possibly after
>     the triggering VM has been context switched away from the CPU and
>     another brought in. Disaster looming.
> 
>     The fix is not to rely on the interrupt itself to do the maintenance.
>     We can deal with this on the VM exit path, and the VGIC interrupt
>     only becomes a scheduling hint.
> 
>     Also, it means that the "kick reduction optimisation" feature becomes
>     pointless, as it effectively relied on the VGIC interrupt to reduce
>     the number of times we kick a vcpu. This code is thus dropped
>     altogether.
> 
>     Signed-off-by: Marc Zyngier <marc.zyngier@xxxxxxx <javascript:;>>
>     ---
>      arch/arm/include/asm/kvm_vgic.h |   4 --
>      arch/arm/kvm/arm.c              |  10 +--
>      arch/arm/kvm/interrupts_head.S  |   7 ++-
>      arch/arm/kvm/vgic.c             | 131
>     ++++++++++++++++++++--------------------
>      4 files changed, 74 insertions(+), 78 deletions(-)
> 
>     diff --git a/arch/arm/include/asm/kvm_vgic.h
>     b/arch/arm/include/asm/kvm_vgic.h
>     index 9a55c5f..2e2813a 100644
>     --- a/arch/arm/include/asm/kvm_vgic.h
>     +++ b/arch/arm/include/asm/kvm_vgic.h
>     @@ -132,9 +132,6 @@ struct vgic_cpu {
>             u32             vgic_elrsr[2];  /* Saved only */
>             u32             vgic_apr;
>             u32             vgic_lr[64];    /* A15 has only 4... */
>     -
>     -       /* Number of level-triggered interrupt in progress */
>     -       atomic_t        irq_active_count;
>      #endif
>      };
> 
>     @@ -174,7 +171,6 @@ bool vgic_handle_mmio(struct kvm_vcpu *vcpu,
>     struct kvm_run *run,
> 
>      #define irqchip_in_kernel(k)   (!!((k)->arch.vgic.vctrl_base))
>      #define vgic_initialized(k)    ((k)->arch.vgic.ready)
>     -#define vgic_active_irq(v)    
>     (atomic_read(&(v)->arch.vgic_cpu.irq_active_count) == 0)
> 
>      #else
>      static inline int kvm_vgic_hyp_init(void)
>     diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
>     index 4d4da72..fea8fbb 100644
>     --- a/arch/arm/kvm/arm.c
>     +++ b/arch/arm/kvm/arm.c
>     @@ -94,15 +94,7 @@ int kvm_arch_hardware_enable(void *garbage)
> 
>      int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
>      {
>     -       if (kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE) {
>     -               if (vgic_active_irq(vcpu) &&
>     -                   cmpxchg(&vcpu->mode, EXITING_GUEST_MODE,
>     IN_GUEST_MODE) == EXITING_GUEST_MODE)
>     -                       return 0;
>     -
>     -               return 1;
>     -       }
>     -
>     -       return 0;
>     +       return (kvm_vcpu_exiting_guest_mode(vcpu) == IN_GUEST_MODE);
>      }
> 
>      void kvm_arch_hardware_disable(void *garbage)
>     diff --git a/arch/arm/kvm/interrupts_head.S
>     b/arch/arm/kvm/interrupts_head.S
>     index 07c85f2..5308eb2 100644
>     --- a/arch/arm/kvm/interrupts_head.S
>     +++ b/arch/arm/kvm/interrupts_head.S
>     @@ -416,12 +416,17 @@ vcpu      .req    r0              @ vcpu
>     pointer always in r0
>             str     r9, [r11, #(VGIC_CPU_ELRSR + 4)]
>             str     r10, [r11, #VGIC_CPU_APR]
> 
>     +       /* Clear GICH_HCR */
>     +       mov     r5, #0
>     +       str     r5, [r2, #GICH_HCR]
>     +
>             /* Save list registers */
>             add     r2, r2, #GICH_LR0
>             add     r3, r11, #VGIC_CPU_LR
>             ldr     r4, [r11, #VGIC_CPU_NR_LR]
>     -1:     ldr     r6, [r2], #4
>     +1:     ldr     r6, [r2]
>             str     r6, [r3], #4
>     +       str     r5, [r2], #4            @ Wipe the LR we just saved
> 
> 
> that's a shame - a whole bunch of more mmio operations. Didn't we just
> disable the whole vgic above and we replace all the lr's on another vm
> entry, so why is this necessary?

Good point. Clearing GICH_HCR should be enough already.

>  
> 
>             subs    r4, r4, #1
>             bne     1b
>      2:
>     diff --git a/arch/arm/kvm/vgic.c b/arch/arm/kvm/vgic.c
>     index df4f19d..9592067 100644
>     --- a/arch/arm/kvm/vgic.c
>     +++ b/arch/arm/kvm/vgic.c
>     @@ -181,8 +181,6 @@ static void vgic_irq_set_active(struct kvm_vcpu
>     *vcpu, int irq)
>             struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
> 
>             vgic_bitmap_set_irq_val(&dist->irq_active, vcpu->vcpu_id,
>     irq, 1);
>     -       atomic_inc(&vcpu->arch.vgic_cpu.irq_active_count);
>     -       smp_mb__after_atomic_inc();
>      }
> 
>      static void vgic_irq_clear_active(struct kvm_vcpu *vcpu, int irq)
>     @@ -190,8 +188,13 @@ static void vgic_irq_clear_active(struct
>     kvm_vcpu *vcpu, int irq)
>             struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
> 
>             vgic_bitmap_set_irq_val(&dist->irq_active, vcpu->vcpu_id,
>     irq, 0);
>     -       atomic_dec(&vcpu->arch.vgic_cpu.irq_active_count);
>     -       smp_mb__after_atomic_dec();
> 
> 
> didn't you just add this one?

As the commit log says above, this feature is simply removed, as it
doesn't make sense anymore.

> 
>     +}
>     +
>     +static int vgic_dist_irq_is_pending(struct kvm_vcpu *vcpu, int irq)
>     +{
>     +       struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
>     +
>     +       return vgic_bitmap_get_irq_val(&dist->irq_state,
>     vcpu->vcpu_id, irq);
>      }
> 
>      static void vgic_dist_irq_set(struct kvm_vcpu *vcpu, int irq)
>     @@ -987,6 +990,56 @@ epilog:
>             }
>      }
> 
>     +static bool vgic_process_maintenance(struct kvm_vcpu *vcpu)
>     +{
>     +       struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
>     +       bool level_pending = false;
>     +
>     +       kvm_debug("MISR = %08x\n", vgic_cpu->vgic_misr);
>     +
>     +       /*
>     +        * We do not need to take the distributor lock here, since
>     the only
>     +        * action we perform is clearing the irq_active_bit for an EOIed
>     +        * level interrupt.  There is a potential race with
>     +        * the queuing of an interrupt in __kvm_sync_to_cpu(), where
>     we check
>     +        * if the interrupt is already active. Two possibilities:
>     +        *
>     +        * - The queuing is occuring on the same vcpu: cannot
>     happen, as we're
> 
> 
> occurring
> 
>  
> 
>     +        *   already in the context of this vcpu, and executing the
>     handler
>     +        * - The interrupt has been migrated to another vcpu, and we
>     ignore
>     +        *   this interrupt for this run. Big deal. It is still
>     pending though,
>     +        *   and will get considered when this vcpu exits.
>     +        */
>     +       if (vgic_cpu->vgic_misr & VGIC_MISR_EOI) {
>     +               /*
>     +                * Some level interrupts have been EOIed. Clear their
>     +                * active bit.
>     +                */
>     +               int lr, irq;
>     +
>     +               for_each_set_bit(lr, (unsigned long
>     *)vgic_cpu->vgic_eisr,
>     +                                vgic_cpu->nr_lr) {
>     +                       irq = vgic_cpu->vgic_lr[lr] & VGIC_LR_VIRTUALID;
>     +
>     +                       vgic_irq_clear_active(vcpu, irq);
>     +                       vgic_cpu->vgic_lr[lr] &= ~VGIC_LR_EOI;
>     +
>     +                       /* Any additional pending interrupt? */
>     +                       if (vgic_dist_irq_is_pending(vcpu, irq)) {
>     +                               vgic_cpu_irq_set(vcpu, irq);
>     +                               level_pending = true;
>     +                       } else {
>     +                               vgic_cpu_irq_clear(vcpu, irq);
>     +                       }
>     +               }
>     +       }
>     +
>     +       if (vgic_cpu->vgic_misr & VGIC_MISR_U)
>     +               vgic_cpu->vgic_hcr &= ~VGIC_HCR_UIE;
>     +
>     +       return level_pending;
>     +}
>     +
>      /*
>       * Sync back the VGIC state after a guest run. We do not really touch
>       * the distributor here (the irq_pending_on_cpu bit is safe to set),
>     @@ -997,6 +1050,10 @@ static void __kvm_vgic_sync_from_cpu(struct
>     kvm_vcpu *vcpu)
>             struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
>             struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
>             int lr, pending;
>     +       bool level_pending;
>     +
>     +       /* Process anything that has to do with maintenance
>     interrupt first */
> 
> 
> this comment doesn't really help understanding anything - the function
> name says it all
>  
> 
>     +       level_pending = vgic_process_maintenance(vcpu);
> 
>             /* Clear mappings for empty LRs */
>             for_each_set_bit(lr, (unsigned long *)vgic_cpu->vgic_elrsr,
>     @@ -1015,10 +1072,8 @@ static void __kvm_vgic_sync_from_cpu(struct
>     kvm_vcpu *vcpu)
>             /* Check if we still have something up our sleeve... */
>             pending = find_first_zero_bit((unsigned long
>     *)vgic_cpu->vgic_elrsr,
>                                           vgic_cpu->nr_lr);
>     -       if (pending < vgic_cpu->nr_lr) {
>     +       if (level_pending || pending < vgic_cpu->nr_lr)
>                     set_bit(vcpu->vcpu_id, &dist->irq_pending_on_cpu);
>     -               smp_mb();
>     -       }
>      }
> 
>      void kvm_vgic_sync_to_cpu(struct kvm_vcpu *vcpu)
>     @@ -1080,7 +1135,7 @@ static bool vgic_update_irq_state(struct kvm
>     *kvm, int cpuid,
>             vcpu = kvm_get_vcpu(kvm, cpuid);
>             is_edge = vgic_irq_is_edge(vcpu, irq_num);
>             is_level = !is_edge;
>     -       state = vgic_bitmap_get_irq_val(&dist->irq_state, cpuid,
>     irq_num);
>     +       state = vgic_dist_irq_is_pending(vcpu, irq_num);
> 
>             /*
>              * Only inject an interrupt if:
>     @@ -1156,64 +1211,12 @@ int kvm_vgic_inject_irq(struct kvm *kvm, int
>     cpuid, unsigned int irq_num,
> 
>      static irqreturn_t vgic_maintenance_handler(int irq, void *data)
>      {
>     -       struct kvm_vcpu *vcpu = *(struct kvm_vcpu **)data;
>     -       struct vgic_dist *dist;
>     -       struct vgic_cpu *vgic_cpu;
>     -
>     -       if (WARN(!vcpu,
>     -                "VGIC interrupt on CPU %d with no vcpu\n",
>     smp_processor_id()))
>     -               return IRQ_HANDLED;
>     -
>     -       vgic_cpu = &vcpu->arch.vgic_cpu;
>     -       dist = &vcpu->kvm->arch.vgic;
>     -       kvm_debug("MISR = %08x\n", vgic_cpu->vgic_misr);
>     -
>             /*
>     -        * We do not need to take the distributor lock here, since
>     the only
>     -        * action we perform is clearing the irq_active_bit for an EOIed
>     -        * level interrupt.  There is a potential race with
>     -        * the queuing of an interrupt in __kvm_sync_to_cpu(), where
>     we check
>     -        * if the interrupt is already active. Two possibilities:
>     -        *
>     -        * - The queuing is occuring on the same vcpu: cannot
>     happen, as we're
>     -        *   already in the context of this vcpu, and executing the
>     handler
>     -        * - The interrupt has been migrated to another vcpu, and we
>     ignore
>     -        *   this interrupt for this run. Big deal. It is still
>     pending though,
>     -        *   and will get considered when this vcpu exits.
>     +        * We cannot rely on the vgic maintenance interrupt to be
>     +        * delivered synchronously. Which means we can only use it to
> 
> 
> Starting a sentence with Which sounds strange to me - granted I'm not a
> native English speaker, but I would think that this should be either
> "..., which means" or "... This means that ..."
>  
> 
>     +        * exit the VM, and not do anything useful. The real
> 
> 
> skip the ", and not do anything useful." Also "The real processing" is
> rather vague, this should describe what we do on the exit path instead,
> or it should be described in that code and this comment should say
> something like "see vgic_process_maintenance() for details.".
>  
> 
>     +        * processing is done on the exit path.
>              */
>     -       if (vgic_cpu->vgic_misr & VGIC_MISR_EOI) {
>     -               /*
>     -                * Some level interrupts have been EOIed. Clear their
>     -                * active bit.
>     -                */
>     -               int lr, irq;
>     -
>     -               for_each_set_bit(lr, (unsigned long
>     *)vgic_cpu->vgic_eisr,
>     -                                vgic_cpu->nr_lr) {
>     -                       irq = vgic_cpu->vgic_lr[lr] & VGIC_LR_VIRTUALID;
>     -
>     -                       vgic_irq_clear_active(vcpu, irq);
>     -                       vgic_cpu->vgic_lr[lr] &= ~VGIC_LR_EOI;
>     -                       writel_relaxed(vgic_cpu->vgic_lr[lr],
>     -                                      dist->vctrl_base + GICH_LR0 +
>     (lr << 2));
>     -
>     -                       /* Any additionnal pending interrupt? */
>     -                       if (vgic_bitmap_get_irq_val(&dist->irq_state,
>     -                                                   vcpu->vcpu_id,
>     irq)) {
>     -                               vgic_cpu_irq_set(vcpu, irq);
>     -                               set_bit(vcpu->vcpu_id,
>     -                                       &dist->irq_pending_on_cpu);
>     -                       } else {
>     -                               vgic_cpu_irq_clear(vcpu, irq);
>     -                       }
>     -               }
>     -       }
>     -
>     -       if (vgic_cpu->vgic_misr & VGIC_MISR_U) {
>     -               vgic_cpu->vgic_hcr &= ~VGIC_HCR_UIE;
>     -               writel_relaxed(vgic_cpu->vgic_hcr, dist->vctrl_base
>     + GICH_HCR);
>     -       }
>     -
>             return IRQ_HANDLED;
>      }
> 
>     --
>     1.8.0.1
> 
> 
> 
>     _______________________________________________
>     kvmarm mailing list
>     kvmarm@xxxxxxxxxxxxxxxxxxxxx <javascript:;>
>     https://lists.cs.columbia.edu/cucslists/listinfo/kvmarm
> 

-- 
Jazz is not dead. It just smells funny...

_______________________________________________
kvmarm mailing list
kvmarm@xxxxxxxxxxxxxxxxxxxxx
https://lists.cs.columbia.edu/cucslists/listinfo/kvmarm