2016-07-25 04:32-0500, Suravee Suthikulpanit: > diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c > @@ -1485,9 +1521,16 @@ static void avic_set_running(struct kvm_vcpu *vcpu, bool is_run) > WARN_ON(is_run == !!(entry & AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK)); > > entry &= ~AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK; > - if (is_run) > + if (is_run) { > entry |= AVIC_PHYSICAL_ID_ENTRY_IS_RUNNING_MASK; > - WRITE_ONCE(*(svm->avic_physical_id_cache), entry); > + WRITE_ONCE(*(svm->avic_physical_id_cache), entry); > + avic_update_iommu(vcpu, h_physical_id, > + page_to_phys(svm->avic_backing_page), 1); > + } else { > + avic_update_iommu(vcpu, h_physical_id, > + page_to_phys(svm->avic_backing_page), 0); > + WRITE_ONCE(*(svm->avic_physical_id_cache), entry); > + } You need to do the same change twice ... I guess it is time to factor the code. :) Wouldn't the following be an improvement in the !is_run path too? static void avic_set_running(struct kvm_vcpu *vcpu, bool is_run) { svm->avic_is_running = is_run; if (is_run) avic_vcpu_load(vcpu, vcpu->cpu); else avic_vcpu_put(vcpu); } > +static void svm_pi_list_add(struct vcpu_svm *svm, struct amd_iommu_pi_data *pi) > +{ > + bool found = false; > + unsigned long flags; > + struct amd_iommu_pi_data *cur; > + > + spin_lock_irqsave(&svm->pi_list_lock, flags); > + list_for_each_entry(cur, &svm->pi_list, node) { > + if (cur->ir_data != pi->ir_data) > + continue; > + found = true; This optimization turned out to be ugly ... sorry. Manipulation with pi_list is hard to understand, IMO, so a comment explaining why we couldn't do that without traversing a list and comparing pi->ir_data would be nice. Maybe I was a bit confused by reusing amd_iommu_pi_data when all we care about is a list of cur->ir_data -- can't we have a list of just ir_data? Allocating the list node in svm_pi_list_add() would make it nicely symmetrical with svm_pi_list_del() too. > + break; > + } > + if (!found) > + list_add(&pi->node, &svm->pi_list); > + spin_unlock_irqrestore(&svm->pi_list_lock, flags); > +} > + > +static void svm_pi_list_del(struct vcpu_svm *svm, struct amd_iommu_pi_data *pi) > +{ > + unsigned long flags; > + struct amd_iommu_pi_data *cur, *next; > + > + spin_lock_irqsave(&svm->pi_list_lock, flags); > + list_for_each_entry_safe(cur, next, &svm->pi_list, node) { No need to use _safe loop if you break on the first deletion. > + if (cur->ir_data != pi->ir_data) > + continue; > + list_del(&cur->node); > + kfree(cur); > + break; > + } > + spin_unlock_irqrestore(&svm->pi_list_lock, flags); > +} > + > +/* > + * svm_update_pi_irte - set IRTE for Posted-Interrupts > + * > + * @kvm: kvm > + * @host_irq: host irq of the interrupt > + * @guest_irq: gsi of the interrupt > + * @set: set or unset PI > + * returns 0 on success, < 0 on failure > + */ > +static int svm_update_pi_irte(struct kvm *kvm, unsigned int host_irq, > + uint32_t guest_irq, bool set) > +{ > + struct kvm_kernel_irq_routing_entry *e; > + struct kvm_irq_routing_table *irq_rt; > + int idx, ret = -EINVAL; > + > + if (!kvm_arch_has_assigned_device(kvm) || > + !irq_remapping_cap(IRQ_POSTING_CAP)) > + return 0; > + > + pr_debug("SVM: %s: host_irq=%#x, guest_irq=%#x, set=%#x\n", > + __func__, host_irq, guest_irq, set); > + > + idx = srcu_read_lock(&kvm->irq_srcu); > + irq_rt = srcu_dereference(kvm->irq_routing, &kvm->irq_srcu); > + WARN_ON(guest_irq >= irq_rt->nr_rt_entries); > + > + hlist_for_each_entry(e, &irq_rt->map[guest_irq], link) { > + struct kvm_lapic_irq irq; > + struct vcpu_data vcpu_info; > + struct kvm_vcpu *vcpu = NULL; > + struct vcpu_svm *svm = NULL; > + > + if (e->type != KVM_IRQ_ROUTING_MSI) > + continue; > + > + /** > + * Note: > + * The HW cannot support posting multicast/broadcast > + * interrupts to a vCPU. So, we still use interrupt > + * remapping for these kind of interrupts. > + * > + * For lowest-priority interrupts, we only support > + * those with single CPU as the destination, e.g. user > + * configures the interrupts via /proc/irq or uses > + * irqbalance to make the interrupts single-CPU. > + */ > + kvm_set_msi_irq(e, &irq); > + if (kvm_intr_is_single_vcpu(kvm, &irq, &vcpu)) { > + svm = to_svm(vcpu); > + vcpu_info.pi_desc_addr = page_to_phys(svm->avic_backing_page); > + vcpu_info.vector = irq.vector; > + > + trace_kvm_pi_irte_update(vcpu->vcpu_id, host_irq, e->gsi, > + vcpu_info.vector, > + vcpu_info.pi_desc_addr, set); Trace below the if/else, we might update something in both of them. > + > + pr_debug("SVM: %s: use GA mode for irq %u\n", __func__, > + irq.vector); > + } else { > + set = false; > + > + pr_debug("SVM: %s: use legacy intr remap mode for irq %u\n", > + __func__, irq.vector); > + } > + > + /** > + * When AVIC is disabled, we fall-back to setup > + * IRTE w/ legacy mode > + */ > + if (set && kvm_vcpu_apicv_active(&svm->vcpu)) { > + struct amd_iommu_pi_data *pi_data; > + > + /** > + * Allocating new amd_iommu_pi_data, which will get > + * add to the per-vcpu pi_list. > + */ > + pi_data = kzalloc(sizeof(struct amd_iommu_pi_data), > + GFP_KERNEL); > + if (!pi_data) { > + ret = -ENOMEM; > + goto out; > + } > + > + /* Try to enable guest_mode in IRTE */ > + pi_data->ga_tag = AVIC_GATAG(kvm->arch.avic_vm_id, > + vcpu->vcpu_id); > + pi_data->vcpu_data = &vcpu_info; > + pi_data->is_guest_mode = true; > + ret = irq_set_vcpu_affinity(host_irq, pi_data); > + > + /** > + * We save the pointer to pi_data in the struct > + * vcpu_svm so that we can reference to them directly > + * when we update vcpu scheduling information in IOMMU > + * irte. > + */ > + if (!ret && pi_data->is_guest_mode) > + svm_pi_list_add(svm, pi_data); pi_data leaks in the else case. (Allocating the element in svm_pi_list_add() would solve this.) > + } else { > + /* Use legacy mode in IRTE */ > + struct amd_iommu_pi_data pi; > + > + /** > + * Here, pi is used to: > + * - Tell IOMMU to use legacy mode for this interrupt. > + * - Retrieve ga_tag of prior interrupt remapping data. > + */ > + pi.is_guest_mode = false; > + ret = irq_set_vcpu_affinity(host_irq, &pi); > + > + /** > + * We need to check if the interrupt was previously > + * setup with the guest_mode by checking if the ga_tag > + * was cached. If so, we need to clean up the per-vcpu > + * pi_list. > + */ > + if (!ret && pi.ga_tag) { > + struct kvm_vcpu *vcpu = kvm_get_vcpu_by_id(kvm, > + AVIC_GATAG_TO_VCPUID(pi.ga_tag)); > + > + if (vcpu) > + svm_pi_list_del(to_svm(vcpu), &pi); > + } > + } > + > + if (ret < 0) { > + pr_err("%s: failed to update PI IRTE\n", __func__); > + goto out; > + } > + } > + > + ret = 0; > +out: > + srcu_read_unlock(&kvm->irq_srcu, idx); > + return ret; > +} > + > static int svm_nmi_allowed(struct kvm_vcpu *vcpu) > { > struct vcpu_svm *svm = to_svm(vcpu); -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html