On 30/08/15 14:54, Christoffer Dall wrote: > The arch timer currently uses edge-triggered semantics in the sense that > the line is never sampled by the vgic and lowering the line from the > timer to the vgic doesn't have any affect on the pending state of > virtual interrupts in the vgic. This means that we do not support a > guest with the otherwise valid behavior of (1) disable interrupts (2) > enable the timer (3) disable the timer (4) enable interrupts. Such a > guest would validly not expect to see any interrupts on real hardware, > but will see interrupts on KVM. > > This patches fixes this shortcoming through the following series of > changes. > > First, we change the flow of the timer/vgic sync/flush operations. Now > the timer is always flushed/synced before the vgic, because the vgic > samples the state of the timer output. This has the implication that we > move the timer operations in to non-preempible sections, but that is > fine after the previous commit getting rid of hrtimer schedules on every > entry/exit. > > Second, we change the internal behavior of the timer, letting the timer > keep track of its previous output state, and only lower/raise the line > to the vgic when the state changes. Note that in theory this could have > been accomplished more simply by signalling the vgic every time the > state *potentially* changed, but we don't want to be hitting the vgic > more often than necessary. > > Third, we get rid of the use of the map->active field in the vgic and > instead simply set the interrupt as active on the physical distributor > whenever we signal a mapped interrupt to the guest, and we reset the > active state when we sync back the HW state from the vgic. > > Fourth, and finally, we now initialize the timer PPIs (and all the other > unused PPIs for now), to be level-triggered, and modify the sync code to > sample the line state on HW sync and re-inject a new interrupt if it is > still pending at that time. > > Signed-off-by: Christoffer Dall <christoffer.dall@xxxxxxxxxx> > --- > arch/arm/kvm/arm.c | 11 +++++-- > include/kvm/arm_arch_timer.h | 2 +- > include/kvm/arm_vgic.h | 3 -- > virt/kvm/arm/arch_timer.c | 68 +++++++++++++++++++++++++++++++------------- > virt/kvm/arm/vgic.c | 67 +++++++++++++++---------------------------- > 5 files changed, 81 insertions(+), 70 deletions(-) > > diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c > index bdf8871..102a4aa 100644 > --- a/arch/arm/kvm/arm.c > +++ b/arch/arm/kvm/arm.c > @@ -561,9 +561,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) > > if (ret <= 0 || need_new_vmid_gen(vcpu->kvm)) { > local_irq_enable(); > + kvm_timer_sync_hwstate(vcpu); > kvm_vgic_sync_hwstate(vcpu); > preempt_enable(); > - kvm_timer_sync_hwstate(vcpu); > continue; > } > > @@ -608,12 +608,17 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) > kvm_guest_exit(); > trace_kvm_exit(kvm_vcpu_trap_get_class(vcpu), *vcpu_pc(vcpu)); > > + /* > + * We must sync the timer state before the vgic state so that > + * the vgic can properly sample the updated state of the > + * interrupt line. > + */ > + kvm_timer_sync_hwstate(vcpu); > + > kvm_vgic_sync_hwstate(vcpu); > > preempt_enable(); > > - kvm_timer_sync_hwstate(vcpu); > - > ret = handle_exit(vcpu, run, ret); > } > > diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h > index ef14cc1..1800227 100644 > --- a/include/kvm/arm_arch_timer.h > +++ b/include/kvm/arm_arch_timer.h > @@ -51,7 +51,7 @@ struct arch_timer_cpu { > bool armed; > > /* Timer IRQ */ > - const struct kvm_irq_level *irq; > + struct kvm_irq_level irq; > > /* VGIC mapping */ > struct irq_phys_map *map; > diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h > index d901f1a..99011a0 100644 > --- a/include/kvm/arm_vgic.h > +++ b/include/kvm/arm_vgic.h > @@ -163,7 +163,6 @@ struct irq_phys_map { > u32 virt_irq; > u32 phys_irq; > u32 irq; > - bool active; > }; > > struct irq_phys_map_entry { > @@ -358,8 +357,6 @@ int kvm_vgic_vcpu_active_irq(struct kvm_vcpu *vcpu); > struct irq_phys_map *kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, > int virt_irq, int irq); > int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, struct irq_phys_map *map); > -bool kvm_vgic_get_phys_irq_active(struct irq_phys_map *map); > -void kvm_vgic_set_phys_irq_active(struct irq_phys_map *map, bool active); > > #define irqchip_in_kernel(k) (!!((k)->arch.vgic.in_kernel)) > #define vgic_initialized(k) (!!((k)->arch.vgic.nr_cpus)) > diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c > index 018f3d6..747302f 100644 > --- a/virt/kvm/arm/arch_timer.c > +++ b/virt/kvm/arm/arch_timer.c > @@ -59,18 +59,6 @@ static void timer_disarm(struct arch_timer_cpu *timer) > } > } > > -static void kvm_timer_inject_irq(struct kvm_vcpu *vcpu) > -{ > - int ret; > - struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; > - > - kvm_vgic_set_phys_irq_active(timer->map, true); > - ret = kvm_vgic_inject_mapped_irq(vcpu->kvm, vcpu->vcpu_id, > - timer->map, > - timer->irq->level); > - WARN_ON(ret); > -} > - > static irqreturn_t kvm_arch_timer_handler(int irq, void *dev_id) > { > struct kvm_vcpu *vcpu = *(struct kvm_vcpu **)dev_id; > @@ -116,8 +104,7 @@ static bool kvm_timer_irq_enabled(struct kvm_vcpu *vcpu) > struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; > > return !(timer->cntv_ctl & ARCH_TIMER_CTRL_IT_MASK) && > - (timer->cntv_ctl & ARCH_TIMER_CTRL_ENABLE) && > - !kvm_vgic_get_phys_irq_active(timer->map); > + (timer->cntv_ctl & ARCH_TIMER_CTRL_ENABLE); > } > > bool kvm_timer_should_fire(struct kvm_vcpu *vcpu) > @@ -134,6 +121,45 @@ bool kvm_timer_should_fire(struct kvm_vcpu *vcpu) > return cval <= now; > } > > +static void kvm_timer_update_irq(struct kvm_vcpu *vcpu) > +{ > + int ret; > + struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; > + > + BUG_ON(!vgic_initialized(vcpu->kvm)); > + > + ret = kvm_vgic_inject_mapped_irq(vcpu->kvm, vcpu->vcpu_id, > + timer->map, > + timer->irq.level); > + WARN_ON(ret); > +} > + > +/* > + * Check if there was a change in the timer state (should we raise or lower > + * the line level to the GIC). > + */ > +static void kvm_timer_update_state(struct kvm_vcpu *vcpu) > +{ > + struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; > + > + /* > + * If userspace modified the timer registers via SET_ONE_REG before > + * the vgic was initialized, we mustn't set the timer->irq.level value > + * because the guest would never see the interrupt. Instead wait > + * until we call this funciton from kvm_timer_flush_hwstate. > + */ > + if (!vgic_initialized(vcpu->kvm)) > + return; > + > + if (kvm_timer_should_fire(vcpu) && !timer->irq.level) { > + timer->irq.level = 1; > + kvm_timer_update_irq(vcpu); > + } else if (!kvm_timer_should_fire(vcpu) && timer->irq.level) { > + timer->irq.level = 0; > + kvm_timer_update_irq(vcpu); > + } > +} > + It took me ages to parse this, so I rewrote it to match my understanding: diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c index 8a0fdfc..a722f0f 100644 --- a/virt/kvm/arm/arch_timer.c +++ b/virt/kvm/arm/arch_timer.c @@ -121,13 +121,14 @@ bool kvm_timer_should_fire(struct kvm_vcpu *vcpu) return cval <= now; } -static void kvm_timer_update_irq(struct kvm_vcpu *vcpu) +static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_state) { int ret; struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; BUG_ON(!vgic_initialized(vcpu->kvm)); + timer->irq.level = new_state; ret = kvm_vgic_inject_mapped_irq(vcpu->kvm, vcpu->vcpu_id, timer->map, timer->irq.level); @@ -151,13 +152,8 @@ static void kvm_timer_update_state(struct kvm_vcpu *vcpu) if (!vgic_initialized(vcpu->kvm)) return; - if (kvm_timer_should_fire(vcpu) && !timer->irq.level) { - timer->irq.level = 1; - kvm_timer_update_irq(vcpu); - } else if (!kvm_timer_should_fire(vcpu) && timer->irq.level) { - timer->irq.level = 0; - kvm_timer_update_irq(vcpu); - } + if (kvm_timer_should_fire(vcpu) != timer->irq.level) + kvm_timer_update_irq(vcpu, !timer->irq.level); } /* Did I get it right? > /* > * Schedule the background timer before calling kvm_vcpu_block, so that this > * thread is removed from its waitqueue and made runnable when there's a timer > @@ -191,8 +217,7 @@ void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu) > * If the timer expired while we were not scheduled, now is the time > * to inject it. > */ > - if (kvm_timer_should_fire(vcpu)) > - kvm_timer_inject_irq(vcpu); > + kvm_timer_update_state(vcpu); > } > > /** > @@ -208,8 +233,11 @@ void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu) > > BUG_ON(timer_is_armed(timer)); > > - if (kvm_timer_should_fire(vcpu)) > - kvm_timer_inject_irq(vcpu); > + /* > + * The guest could have modified the timer registers or the timer > + * could have expired, update the timer state. > + */ > + kvm_timer_update_state(vcpu); > } > > int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu, > @@ -224,7 +252,7 @@ int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu, > * kvm_vcpu_set_target(). To handle this, we determine > * vcpu timer irq number when the vcpu is reset. > */ > - timer->irq = irq; > + timer->irq.irq = irq->irq; > > /* > * Tell the VGIC that the virtual interrupt is tied to a > @@ -269,6 +297,8 @@ int kvm_arm_timer_set_reg(struct kvm_vcpu *vcpu, u64 regid, u64 value) > default: > return -1; > } > + > + kvm_timer_update_state(vcpu); > return 0; > } > > diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c > index 9ed8d53..f4ea950 100644 > --- a/virt/kvm/arm/vgic.c > +++ b/virt/kvm/arm/vgic.c > @@ -1422,34 +1422,43 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu) > /* > * Save the physical active state, and reset it to inactive. > * > - * Return 1 if HW interrupt went from active to inactive, and 0 otherwise. > + * Return true if there's a pending level triggered interrupt line to queue. > */ > -static int vgic_sync_hwirq(struct kvm_vcpu *vcpu, struct vgic_lr vlr) > +static bool vgic_sync_hwirq(struct kvm_vcpu *vcpu, int lr, struct vgic_lr vlr) > { > struct irq_phys_map *map; > + bool phys_active; > int ret; > > if (!(vlr.state & LR_HW)) > return 0; > > map = vgic_irq_map_search(vcpu, vlr.irq); > - BUG_ON(!map || !map->active); > + BUG_ON(!map); > > ret = irq_get_irqchip_state(map->irq, > IRQCHIP_STATE_ACTIVE, > - &map->active); > + &phys_active); > > WARN_ON(ret); > > - if (map->active) { > + if (phys_active) { > + /* > + * Interrupt still marked as active on the physical > + * distributor, so guest did not EOI it yet. Reset to > + * non-active so that other VMs can see interrupts from this > + * device. > + */ > ret = irq_set_irqchip_state(map->irq, > IRQCHIP_STATE_ACTIVE, > false); > WARN_ON(ret); > - return 0; > + return false; > } > > - return 1; > + /* Mapped edge-triggered interrupts not yet supported. */ > + WARN_ON(vgic_irq_is_edge(vcpu, vlr.irq)); Hmmm. What are we missing? > + return process_level_irq(vcpu, lr, vlr); > } > > /* Sync back the VGIC state after a guest run */ > @@ -1474,18 +1483,8 @@ static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) > continue; > > vlr = vgic_get_lr(vcpu, lr); > - if (vgic_sync_hwirq(vcpu, vlr)) { > - /* > - * So this is a HW interrupt that the guest > - * EOI-ed. Clean the LR state and allow the > - * interrupt to be sampled again. > - */ > - vlr.state = 0; > - vlr.hwirq = 0; > - vgic_set_lr(vcpu, lr, vlr); > - vgic_irq_clear_queued(vcpu, vlr.irq); > - set_bit(lr, elrsr_ptr); > - } > + if (vgic_sync_hwirq(vcpu, lr, vlr)) > + level_pending = true; > > if (!test_bit(lr, elrsr_ptr)) > continue; > @@ -1861,30 +1860,6 @@ static void vgic_free_phys_irq_map_rcu(struct rcu_head *rcu) > } > > /** > - * kvm_vgic_get_phys_irq_active - Return the active state of a mapped IRQ > - * > - * Return the logical active state of a mapped interrupt. This doesn't > - * necessarily reflects the current HW state. > - */ > -bool kvm_vgic_get_phys_irq_active(struct irq_phys_map *map) > -{ > - BUG_ON(!map); > - return map->active; > -} > - > -/** > - * kvm_vgic_set_phys_irq_active - Set the active state of a mapped IRQ > - * > - * Set the logical active state of a mapped interrupt. This doesn't > - * immediately affects the HW state. > - */ > -void kvm_vgic_set_phys_irq_active(struct irq_phys_map *map, bool active) > -{ > - BUG_ON(!map); > - map->active = active; > -} > - > -/** > * kvm_vgic_unmap_phys_irq - Remove a virtual to physical IRQ mapping > * @vcpu: The VCPU pointer > * @map: The pointer to a mapping obtained through kvm_vgic_map_phys_irq > @@ -2112,10 +2087,14 @@ int vgic_init(struct kvm *kvm) > if (i < VGIC_NR_SGIS) > vgic_bitmap_set_irq_val(&dist->irq_enabled, > vcpu->vcpu_id, i, 1); > - if (i < VGIC_NR_PRIVATE_IRQS) > + if (i < VGIC_NR_SGIS) > vgic_bitmap_set_irq_val(&dist->irq_cfg, > vcpu->vcpu_id, i, > VGIC_CFG_EDGE); > + else if (i < VGIC_NR_PRIVATE_IRQS) /* PPIs */ > + vgic_bitmap_set_irq_val(&dist->irq_cfg, > + vcpu->vcpu_id, i, > + VGIC_CFG_LEVEL); > } > > vgic_enable(vcpu); > My only real objection to this patch is that it puts my brain upside down. Hopefully that won't last. Thanks, M. -- Jazz is not dead. It just smells funny... -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html