2016-04-07 03:20-0500, Suravee Suthikulpanit: > From: Suravee Suthikulpanit <suravee.suthikulpanit@xxxxxxx> > > This patch introduces VMEXIT handlers, avic_incomplete_ipi_interception() > and avic_unaccelerated_access_interception() along with two trace points > (trace_kvm_avic_incomplete_ipi and trace_kvm_avic_unaccelerated_access). > > Signed-off-by: Suravee Suthikulpanit <suravee.suthikulpanit@xxxxxxx> > --- > diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c > @@ -3515,6 +3515,250 @@ static int mwait_interception(struct vcpu_svm *svm) > +static u32 *avic_get_logical_id_entry(struct kvm_vcpu *vcpu, u8 mda, bool flat) > +{ > + struct kvm_arch *vm_data = &vcpu->kvm->arch; > + int index; > + u32 *logical_apic_id_table; > + > + if (flat) { /* flat */ > + if (mda > 7) Don't you want to check that just one bit it set? > + return NULL; > + index = mda; > + } else { /* cluster */ > + int apic_id = mda & 0xf; > + int cluster_id = (mda & 0xf0) >> 8; ">> 4". > + > + if (apic_id > 4 || cluster_id >= 0xf) > + return NULL; > + index = (cluster_id << 2) + apic_id; ffs(apic_id), because 'apic_id' must be compacted into 2 bits. > + } > + logical_apic_id_table = (u32 *) page_address(vm_data->avic_logical_id_table_page); > + > + return &logical_apic_id_table[index]; > +} > + > +static int avic_handle_ldr_write(struct kvm_vcpu *vcpu, u8 g_physical_id, > + u8 logical_id) > +{ > + u32 mod; > + u32 *entry, new_entry; > + struct vcpu_svm *svm = to_svm(vcpu); > + > + if (!svm) > + return -EINVAL; (No need to check, svm is taken for granted.) > + > + mod = (kvm_apic_get_reg(svm->vcpu.arch.apic, APIC_DFR) >> 28) & 0xf; > + entry = avic_get_logical_id_entry(vcpu, logical_id, (mod == 0xf)); (Use "kvm_apic_get_reg(apic, APIC_DFR) == APIC_DFR_FLAT".) > + if (!entry) > + return -EINVAL; > + > + new_entry = READ_ONCE(*entry); > + new_entry &= ~AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK; > + new_entry |= (g_physical_id & AVIC_LOGICAL_ID_ENTRY_GUEST_PHYSICAL_ID_MASK); > + new_entry |= AVIC_LOGICAL_ID_ENTRY_VALID_MASK; > + WRITE_ONCE(*entry, new_entry); > + > + return 0; > +} > + > +static int avic_unaccel_trap_write(struct vcpu_svm *svm) > +{ > + u32 offset = svm->vmcb->control.exit_info_1 & 0xFF0; The previous function knew the offset, just pass it as an argument. (Or use AVIC_UNACCEL_ACCESS_OFFSET_MASK, because defining and not using it everywhere is too sad.) > + struct kvm_lapic *apic = svm->vcpu.arch.apic; > + u32 reg = kvm_apic_get_reg(apic, offset); > + > + switch (offset) { > + case APIC_ID: { > + u32 aid = (reg >> 24) & 0xff; > + u64 *o_ent = avic_get_physical_id_entry(&svm->vcpu, > + svm->vcpu.vcpu_id); > + u64 *n_ent = avic_get_physical_id_entry(&svm->vcpu, aid); Write old and new. (Skip "_ent" if you want to save characters.) > + if (!n_ent || !o_ent) > + return 0; > + > + /* We need to move physical_id_entry to new offset */ > + *n_ent = *o_ent; > + *o_ent = 0ULL; > + svm->avic_physical_id_cache = n_ent; > + break; > + } > + case APIC_LDR: { > + int ret, lid; > + int dlid = (reg >> 24) & 0xff; > + > + if (!dlid) > + return 0; ldr == 0 should be handled as well. > + > + lid = ffs(dlid) - 1; > + ret = avic_handle_ldr_write(&svm->vcpu, svm->vcpu.vcpu_id, lid); > + if (ret) > + return 0; OS can actually change LDR, so the old one should be invalidated. (No OS does, but that is not an important factor for the hypervisor.) > + > + break; > + } > + case APIC_DFR: { > + struct kvm_arch *vm_data = &svm->vcpu.kvm->arch; > + u32 mod = (reg >> 28) & 0xf; > + > + /* > + * We assume that all local APICs are using the same type. > + * If this changes, we need to rebuild the AVIC logical > + * APID id table with subsequent write to APIC_LDR. > + */ The code doesn't rebuild avic_logical_id_table_page, it just flushes the old one. > + if (vm_data->ldr_mode != mod) { > + clear_page(page_address(vm_data->avic_logical_id_table_page)); > + vm_data->ldr_mode = mod; > + } > + break; > + } All these cases need to be called on KVM_SET_LAPIC -- the userspace can provide completely new set of APIC registers and AVIC should build its maps with them. Test with save/restore or migration. > +static int avic_unaccelerated_access_interception(struct vcpu_svm *svm) > +{ > + int ret = 0; > + u32 offset = svm->vmcb->control.exit_info_1 & > + AVIC_UNACCEL_ACCESS_OFFSET_MASK; > + u32 vector = svm->vmcb->control.exit_info_2 & > + AVIC_UNACCEL_ACCESS_VECTOR_MASK; > + bool write = (svm->vmcb->control.exit_info_1 >> 32) & > + AVIC_UNACCEL_ACCESS_WRITE_MASK; > + bool trap = is_avic_unaccelerated_access_trap(offset); > + > + trace_kvm_avic_unaccelerated_access(svm->vcpu.vcpu_id, offset, > + trap, write, vector); > + > + /** > + * AVIC does not support x2APIC registers, and we only advertise > + * xAPIC when enable AVIC. Therefore, access to these registers > + * will not be supported. > + */ x2APIC only has MSR interface and I don't think it has much do with offset >= 0x400. AMD defines few registers in the extended area, but we don't seem emulate them. > + if (offset >= 0x400) { > + WARN(1, "Unsupported APIC offset %#x\n", offset); "printk_ratelimited(KERN_INFO " is the most severe message you could give. I think that not printing anything is best, > + return ret; because we should not return, but continue to emulate the access. > + } > + > + if (trap) { > + /* Handling Trap */ > + if (!write) /* Trap read should never happens */ > + BUG(); (BUG_ON(!write) is shorter, though I would avoid BUG -- only guests are going to fail, so we don't need to kill the host.) > + ret = avic_unaccel_trap_write(svm); > + } else { > + /* Handling Fault */ > + ret = (emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE); > + } > + > + return ret; > +} -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html