On Mon, Feb 08, 2016 at 11:40:22AM +0000, Marc Zyngier wrote: > Just like on GICv2, we're a bit hammer-happy with GICv3, and access > them more often than we should. > > Adopt a policy similar to what we do for GICv2, only save/restoring > the minimal set of registers. As we don't access the registers > linearly anymore (we may skip some), the convoluted accessors become > slightly simpler, and we can drop the ugly indexing macro that > tended to confuse the reviewers. > > Signed-off-by: Marc Zyngier <marc.zyngier@xxxxxxx> > --- > arch/arm64/kvm/hyp/vgic-v3-sr.c | 288 ++++++++++++++++++++++++---------------- > include/kvm/arm_vgic.h | 6 - > virt/kvm/arm/vgic-v3.c | 4 +- > 3 files changed, 176 insertions(+), 122 deletions(-) > > diff --git a/arch/arm64/kvm/hyp/vgic-v3-sr.c b/arch/arm64/kvm/hyp/vgic-v3-sr.c > index 9142e082..d3813f5 100644 > --- a/arch/arm64/kvm/hyp/vgic-v3-sr.c > +++ b/arch/arm64/kvm/hyp/vgic-v3-sr.c > @@ -39,12 +39,104 @@ > asm volatile("msr_s " __stringify(r) ", %0" : : "r" (__val));\ > } while (0) > > -/* vcpu is already in the HYP VA space */ > +static u64 __hyp_text __gic_v3_get_lr(unsigned int lr) > +{ > + switch (lr & 0xf) { > + case 0: > + return read_gicreg(ICH_LR0_EL2); > + case 1: > + return read_gicreg(ICH_LR1_EL2); > + case 2: > + return read_gicreg(ICH_LR2_EL2); > + case 3: > + return read_gicreg(ICH_LR3_EL2); > + case 4: > + return read_gicreg(ICH_LR4_EL2); > + case 5: > + return read_gicreg(ICH_LR5_EL2); > + case 6: > + return read_gicreg(ICH_LR6_EL2); > + case 7: > + return read_gicreg(ICH_LR7_EL2); > + case 8: > + return read_gicreg(ICH_LR8_EL2); > + case 9: > + return read_gicreg(ICH_LR9_EL2); > + case 10: > + return read_gicreg(ICH_LR10_EL2); > + case 11: > + return read_gicreg(ICH_LR11_EL2); > + case 12: > + return read_gicreg(ICH_LR12_EL2); > + case 13: > + return read_gicreg(ICH_LR13_EL2); > + case 14: > + return read_gicreg(ICH_LR14_EL2); > + case 15: > + return read_gicreg(ICH_LR15_EL2); > + } > + > + unreachable(); > +} > + > +static void __hyp_text __gic_v3_set_lr(u64 val, int lr) > +{ > + switch (lr & 0xf) { > + case 0: > + write_gicreg(val, ICH_LR0_EL2); > + break; > + case 1: > + write_gicreg(val, ICH_LR1_EL2); > + break; > + case 2: > + write_gicreg(val, ICH_LR2_EL2); > + break; > + case 3: > + write_gicreg(val, ICH_LR3_EL2); > + break; > + case 4: > + write_gicreg(val, ICH_LR4_EL2); > + break; > + case 5: > + write_gicreg(val, ICH_LR5_EL2); > + break; > + case 6: > + write_gicreg(val, ICH_LR6_EL2); > + break; > + case 7: > + write_gicreg(val, ICH_LR7_EL2); > + break; > + case 8: > + write_gicreg(val, ICH_LR8_EL2); > + break; > + case 9: > + write_gicreg(val, ICH_LR9_EL2); > + break; > + case 10: > + write_gicreg(val, ICH_LR10_EL2); > + break; > + case 11: > + write_gicreg(val, ICH_LR11_EL2); > + break; > + case 12: > + write_gicreg(val, ICH_LR12_EL2); > + break; > + case 13: > + write_gicreg(val, ICH_LR13_EL2); > + break; > + case 14: > + write_gicreg(val, ICH_LR14_EL2); > + break; > + case 15: > + write_gicreg(val, ICH_LR15_EL2); > + break; > + } > +} > + > void __hyp_text __vgic_v3_save_state(struct kvm_vcpu *vcpu) > { > struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; > u64 val; > - u32 max_lr_idx, nr_pri_bits; > > /* > * Make sure stores to the GIC via the memory mapped interface > @@ -53,68 +145,50 @@ void __hyp_text __vgic_v3_save_state(struct kvm_vcpu *vcpu) > dsb(st); > > cpu_if->vgic_vmcr = read_gicreg(ICH_VMCR_EL2); > - cpu_if->vgic_misr = read_gicreg(ICH_MISR_EL2); > - cpu_if->vgic_eisr = read_gicreg(ICH_EISR_EL2); > - cpu_if->vgic_elrsr = read_gicreg(ICH_ELSR_EL2); > > - write_gicreg(0, ICH_HCR_EL2); > - val = read_gicreg(ICH_VTR_EL2); > - max_lr_idx = vtr_to_max_lr_idx(val); > - nr_pri_bits = vtr_to_nr_pri_bits(val); > + if (vcpu->arch.vgic_cpu.live_lrs) { > + int i; > + u32 max_lr_idx, nr_pri_bits; > > - switch (max_lr_idx) { > - case 15: > - cpu_if->vgic_lr[VGIC_V3_LR_INDEX(15)] = read_gicreg(ICH_LR15_EL2); > - case 14: > - cpu_if->vgic_lr[VGIC_V3_LR_INDEX(14)] = read_gicreg(ICH_LR14_EL2); > - case 13: > - cpu_if->vgic_lr[VGIC_V3_LR_INDEX(13)] = read_gicreg(ICH_LR13_EL2); > - case 12: > - cpu_if->vgic_lr[VGIC_V3_LR_INDEX(12)] = read_gicreg(ICH_LR12_EL2); > - case 11: > - cpu_if->vgic_lr[VGIC_V3_LR_INDEX(11)] = read_gicreg(ICH_LR11_EL2); > - case 10: > - cpu_if->vgic_lr[VGIC_V3_LR_INDEX(10)] = read_gicreg(ICH_LR10_EL2); > - case 9: > - cpu_if->vgic_lr[VGIC_V3_LR_INDEX(9)] = read_gicreg(ICH_LR9_EL2); > - case 8: > - cpu_if->vgic_lr[VGIC_V3_LR_INDEX(8)] = read_gicreg(ICH_LR8_EL2); > - case 7: > - cpu_if->vgic_lr[VGIC_V3_LR_INDEX(7)] = read_gicreg(ICH_LR7_EL2); > - case 6: > - cpu_if->vgic_lr[VGIC_V3_LR_INDEX(6)] = read_gicreg(ICH_LR6_EL2); > - case 5: > - cpu_if->vgic_lr[VGIC_V3_LR_INDEX(5)] = read_gicreg(ICH_LR5_EL2); > - case 4: > - cpu_if->vgic_lr[VGIC_V3_LR_INDEX(4)] = read_gicreg(ICH_LR4_EL2); > - case 3: > - cpu_if->vgic_lr[VGIC_V3_LR_INDEX(3)] = read_gicreg(ICH_LR3_EL2); > - case 2: > - cpu_if->vgic_lr[VGIC_V3_LR_INDEX(2)] = read_gicreg(ICH_LR2_EL2); > - case 1: > - cpu_if->vgic_lr[VGIC_V3_LR_INDEX(1)] = read_gicreg(ICH_LR1_EL2); > - case 0: > - cpu_if->vgic_lr[VGIC_V3_LR_INDEX(0)] = read_gicreg(ICH_LR0_EL2); > - } > + cpu_if->vgic_misr = read_gicreg(ICH_MISR_EL2); > + cpu_if->vgic_eisr = read_gicreg(ICH_EISR_EL2); > + cpu_if->vgic_elrsr = read_gicreg(ICH_ELSR_EL2); > > - switch (nr_pri_bits) { > - case 7: > - cpu_if->vgic_ap0r[3] = read_gicreg(ICH_AP0R3_EL2); > - cpu_if->vgic_ap0r[2] = read_gicreg(ICH_AP0R2_EL2); > - case 6: > - cpu_if->vgic_ap0r[1] = read_gicreg(ICH_AP0R1_EL2); > - default: > - cpu_if->vgic_ap0r[0] = read_gicreg(ICH_AP0R0_EL2); > - } > + write_gicreg(0, ICH_HCR_EL2); > + val = read_gicreg(ICH_VTR_EL2); can't we cache the read of ICH_VTR_EL2 then? > + max_lr_idx = vtr_to_max_lr_idx(val); > + nr_pri_bits = vtr_to_nr_pri_bits(val); > > - switch (nr_pri_bits) { > - case 7: > - cpu_if->vgic_ap1r[3] = read_gicreg(ICH_AP1R3_EL2); > - cpu_if->vgic_ap1r[2] = read_gicreg(ICH_AP1R2_EL2); > - case 6: > - cpu_if->vgic_ap1r[1] = read_gicreg(ICH_AP1R1_EL2); > - default: > - cpu_if->vgic_ap1r[0] = read_gicreg(ICH_AP1R0_EL2); > + for (i = 0; i <= max_lr_idx; i++) { > + if (vcpu->arch.vgic_cpu.live_lrs & (1UL << i)) > + cpu_if->vgic_lr[i] = __gic_v3_get_lr(i); > + } > + > + switch (nr_pri_bits) { > + case 7: > + cpu_if->vgic_ap0r[3] = read_gicreg(ICH_AP0R3_EL2); > + cpu_if->vgic_ap0r[2] = read_gicreg(ICH_AP0R2_EL2); > + case 6: > + cpu_if->vgic_ap0r[1] = read_gicreg(ICH_AP0R1_EL2); > + default: > + cpu_if->vgic_ap0r[0] = read_gicreg(ICH_AP0R0_EL2); > + } > + > + switch (nr_pri_bits) { > + case 7: > + cpu_if->vgic_ap1r[3] = read_gicreg(ICH_AP1R3_EL2); > + cpu_if->vgic_ap1r[2] = read_gicreg(ICH_AP1R2_EL2); > + case 6: > + cpu_if->vgic_ap1r[1] = read_gicreg(ICH_AP1R1_EL2); > + default: > + cpu_if->vgic_ap1r[0] = read_gicreg(ICH_AP1R0_EL2); > + } > + > + vcpu->arch.vgic_cpu.live_lrs = 0; > + } else { > + cpu_if->vgic_misr = 0; > + cpu_if->vgic_eisr = 0; > + cpu_if->vgic_elrsr = 0xffff; > } > > val = read_gicreg(ICC_SRE_EL2); > @@ -128,6 +202,8 @@ void __hyp_text __vgic_v3_restore_state(struct kvm_vcpu *vcpu) > struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; > u64 val; > u32 max_lr_idx, nr_pri_bits; > + u16 live_lrs = 0; > + int i; > > /* > * VFIQEn is RES1 if ICC_SRE_EL1.SRE is 1. This causes a > @@ -140,68 +216,51 @@ void __hyp_text __vgic_v3_restore_state(struct kvm_vcpu *vcpu) > write_gicreg(cpu_if->vgic_sre, ICC_SRE_EL1); > isb(); > > - write_gicreg(cpu_if->vgic_hcr, ICH_HCR_EL2); > - write_gicreg(cpu_if->vgic_vmcr, ICH_VMCR_EL2); > - > val = read_gicreg(ICH_VTR_EL2); same as above > max_lr_idx = vtr_to_max_lr_idx(val); > nr_pri_bits = vtr_to_nr_pri_bits(val); > > - switch (nr_pri_bits) { > - case 7: > - write_gicreg(cpu_if->vgic_ap1r[3], ICH_AP1R3_EL2); > - write_gicreg(cpu_if->vgic_ap1r[2], ICH_AP1R2_EL2); > - case 6: > - write_gicreg(cpu_if->vgic_ap1r[1], ICH_AP1R1_EL2); > - default: > - write_gicreg(cpu_if->vgic_ap1r[0], ICH_AP1R0_EL2); > - } > - > - switch (nr_pri_bits) { > - case 7: > - write_gicreg(cpu_if->vgic_ap0r[3], ICH_AP0R3_EL2); > - write_gicreg(cpu_if->vgic_ap0r[2], ICH_AP0R2_EL2); > - case 6: > - write_gicreg(cpu_if->vgic_ap0r[1], ICH_AP0R1_EL2); > - default: > - write_gicreg(cpu_if->vgic_ap0r[0], ICH_AP0R0_EL2); > + for (i = 0; i <= max_lr_idx; i++) { > + if (cpu_if->vgic_lr[i] & ICH_LR_STATE) > + live_lrs |= (1 << i); > } > > - switch (max_lr_idx) { > - case 15: > - write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(15)], ICH_LR15_EL2); > - case 14: > - write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(14)], ICH_LR14_EL2); > - case 13: > - write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(13)], ICH_LR13_EL2); > - case 12: > - write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(12)], ICH_LR12_EL2); > - case 11: > - write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(11)], ICH_LR11_EL2); > - case 10: > - write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(10)], ICH_LR10_EL2); > - case 9: > - write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(9)], ICH_LR9_EL2); > - case 8: > - write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(8)], ICH_LR8_EL2); > - case 7: > - write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(7)], ICH_LR7_EL2); > - case 6: > - write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(6)], ICH_LR6_EL2); > - case 5: > - write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(5)], ICH_LR5_EL2); > - case 4: > - write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(4)], ICH_LR4_EL2); > - case 3: > - write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(3)], ICH_LR3_EL2); > - case 2: > - write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(2)], ICH_LR2_EL2); > - case 1: > - write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(1)], ICH_LR1_EL2); > - case 0: > - write_gicreg(cpu_if->vgic_lr[VGIC_V3_LR_INDEX(0)], ICH_LR0_EL2); > + write_gicreg(cpu_if->vgic_vmcr, ICH_VMCR_EL2); also here you may be able to optimize and cache the last seen in-ardware VMCR. > + > + if (live_lrs) { > + write_gicreg(cpu_if->vgic_hcr, ICH_HCR_EL2); > + > + switch (nr_pri_bits) { > + case 7: > + write_gicreg(cpu_if->vgic_ap1r[3], ICH_AP1R3_EL2); > + write_gicreg(cpu_if->vgic_ap1r[2], ICH_AP1R2_EL2); > + case 6: > + write_gicreg(cpu_if->vgic_ap1r[1], ICH_AP1R1_EL2); > + default: > + write_gicreg(cpu_if->vgic_ap1r[0], ICH_AP1R0_EL2); > + } > + nit: trailing white space > + switch (nr_pri_bits) { > + case 7: > + write_gicreg(cpu_if->vgic_ap0r[3], ICH_AP0R3_EL2); > + write_gicreg(cpu_if->vgic_ap0r[2], ICH_AP0R2_EL2); > + case 6: > + write_gicreg(cpu_if->vgic_ap0r[1], ICH_AP0R1_EL2); > + default: > + write_gicreg(cpu_if->vgic_ap0r[0], ICH_AP0R0_EL2); > + } > + > + for (i = 0; i <= max_lr_idx; i++) { > + val = 0; > + > + if (live_lrs & (1 << i)) > + val = cpu_if->vgic_lr[i]; > + > + __gic_v3_set_lr(val, i); > + } > } > > + > /* > * Ensures that the above will have reached the > * (re)distributors. This ensure the guest will read the > @@ -209,6 +268,7 @@ void __hyp_text __vgic_v3_restore_state(struct kvm_vcpu *vcpu) > */ > isb(); > dsb(sy); > + vcpu->arch.vgic_cpu.live_lrs = live_lrs; > > /* > * Prevent the guest from touching the GIC system registers if > diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h > index f473fd6..281caf8 100644 > --- a/include/kvm/arm_vgic.h > +++ b/include/kvm/arm_vgic.h > @@ -279,12 +279,6 @@ struct vgic_v2_cpu_if { > u32 vgic_lr[VGIC_V2_MAX_LRS]; > }; > > -/* > - * LRs are stored in reverse order in memory. make sure we index them > - * correctly. > - */ > -#define VGIC_V3_LR_INDEX(lr) (VGIC_V3_MAX_LRS - 1 - lr) > - > struct vgic_v3_cpu_if { > #ifdef CONFIG_KVM_ARM_VGIC_V3 > u32 vgic_hcr; > diff --git a/virt/kvm/arm/vgic-v3.c b/virt/kvm/arm/vgic-v3.c > index 453eafd..11b5ff6 100644 > --- a/virt/kvm/arm/vgic-v3.c > +++ b/virt/kvm/arm/vgic-v3.c > @@ -42,7 +42,7 @@ static u32 ich_vtr_el2; > static struct vgic_lr vgic_v3_get_lr(const struct kvm_vcpu *vcpu, int lr) > { > struct vgic_lr lr_desc; > - u64 val = vcpu->arch.vgic_cpu.vgic_v3.vgic_lr[VGIC_V3_LR_INDEX(lr)]; > + u64 val = vcpu->arch.vgic_cpu.vgic_v3.vgic_lr[lr]; > > if (vcpu->kvm->arch.vgic.vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) > lr_desc.irq = val & ICH_LR_VIRTUALID_MASK; > @@ -106,7 +106,7 @@ static void vgic_v3_set_lr(struct kvm_vcpu *vcpu, int lr, > lr_val |= ((u64)lr_desc.hwirq) << ICH_LR_PHYS_ID_SHIFT; > } > > - vcpu->arch.vgic_cpu.vgic_v3.vgic_lr[VGIC_V3_LR_INDEX(lr)] = lr_val; > + vcpu->arch.vgic_cpu.vgic_v3.vgic_lr[lr] = lr_val; > > if (!(lr_desc.state & LR_STATE_MASK)) > vcpu->arch.vgic_cpu.vgic_v3.vgic_elrsr |= (1U << lr); > -- > 2.1.4 > Ignoring potential further optimizations: Reviewed-by: Christoffer Dall <christoffer.dall@xxxxxxxxxx> -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html