On 20/03/19 09:14, Suthikulpanit, Suravee wrote: > When AVIC is enabled and the VM has discrete device assignment, > the interrupt remapping table (IRT) is used to keep track of which > destination APIC ID the IOMMU will inject the device interrput to. > > This means every time a vcpu is blocked or context-switched (i.e. > vcpu_blocking/unblocking() and vcpu_load/put()), the information > in IRT must be updated and the IOMMU IRT flush command must be > issued. > > The current implementation flushes IOMMU IRT every time an entry > is modified. If the assigned device has large number of interrupts > (hence large number of entries), this would add large amount of > overhead to vcpu context-switch. Instead, this can be optmized by > only flush IRT once per vcpu context-switch per device after all > IRT entries are modified. > > The function amd_iommu_update_ga() is refactored to only update > IRT entry, while the amd_iommu_sync_ga() is introduced to allow > IRT flushing to be done separately. > > Cc: Joerg Roedel <joro@xxxxxxxxxx> > Cc: Radim Krčmář <rkrcmar@xxxxxxxxxx> > Cc: Paolo Bonzini <pbonzini@xxxxxxxxxx> > Signed-off-by: Suravee Suthikulpanit <suravee.suthikulpanit@xxxxxxx> > --- > arch/x86/kvm/svm.c | 35 ++++++++++++++++++++++++++++++++++- > drivers/iommu/amd_iommu.c | 20 +++++++++++++++++--- > include/linux/amd-iommu.h | 13 ++++++++++--- > 3 files changed, 61 insertions(+), 7 deletions(-) I found this patch in my inbox... I'd rather avoid allocating 8k of RAM per vCPU. Can you make it per-VM? Paolo > + /* > + * Bitmap used to store PCI devid to sync > + * AMD IOMMU interrupt remapping table > + */ > + unsigned long *avic_devid_sync_bitmap; > }; > > /* > @@ -1984,6 +1992,7 @@ static inline int > avic_update_iommu_vcpu_affinity(struct kvm_vcpu *vcpu, int cpu, bool r) > { > int ret = 0; > + int devid = 0; > unsigned long flags; > struct amd_svm_iommu_ir *ir; > struct vcpu_svm *svm = to_svm(vcpu); > @@ -2001,9 +2010,21 @@ avic_update_iommu_vcpu_affinity(struct kvm_vcpu *vcpu, int cpu, bool r) > goto out; > > list_for_each_entry(ir, &svm->ir_list, node) { > - ret = amd_iommu_update_ga(cpu, r, ir->data); > + ret = amd_iommu_update_ga(cpu, r, ir->data, &devid); > if (ret) > break; > + set_bit(devid, svm->avic_devid_sync_bitmap); > + } > + > + /* Sync AMD IOMMU interrupt remapping table changes for each device. */ > + devid = find_next_bit(svm->avic_devid_sync_bitmap, > + AVIC_DEVID_BITMAP_SIZE, 0); > + > + while (devid < AVIC_DEVID_BITMAP_SIZE) { > + clear_bit(devid, svm->avic_devid_sync_bitmap); > + ret = amd_iommu_sync_ga(devid); > + devid = find_next_bit(svm->avic_devid_sync_bitmap, > + AVIC_DEVID_BITMAP_SIZE, devid+1); > } > out: > spin_unlock_irqrestore(&svm->ir_list_lock, flags); > @@ -2107,6 +2128,13 @@ static int avic_init_vcpu(struct vcpu_svm *svm) > INIT_LIST_HEAD(&svm->ir_list); > spin_lock_init(&svm->ir_list_lock); > > + svm->avic_devid_sync_bitmap = (void *)__get_free_pages( > + GFP_KERNEL | __GFP_ZERO, > + get_order(AVIC_DEVID_BITMAP_SIZE/8)); > + if (svm->avic_devid_sync_bitmap == NULL) > + ret = -ENOMEM; > + memset(svm->avic_devid_sync_bitmap, 0, AVIC_DEVID_BITMAP_SIZE/8); > + > return ret; > } > > @@ -2221,6 +2249,11 @@ static void svm_free_vcpu(struct kvm_vcpu *vcpu) > __free_pages(virt_to_page(svm->msrpm), MSRPM_ALLOC_ORDER); > __free_page(virt_to_page(svm->nested.hsave)); > __free_pages(virt_to_page(svm->nested.msrpm), MSRPM_ALLOC_ORDER); > + > + free_pages((unsigned long)svm->avic_devid_sync_bitmap, > + get_order(AVIC_DEVID_BITMAP_SIZE/8)); > + svm->avic_devid_sync_bitmap = NULL; > + > kvm_vcpu_uninit(vcpu); > kmem_cache_free(x86_fpu_cache, svm->vcpu.arch.guest_fpu); > kmem_cache_free(kvm_vcpu_cache, svm); > diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c > index 2a7b78bb98b4..637bcc9192e5 100644 > --- a/drivers/iommu/amd_iommu.c > +++ b/drivers/iommu/amd_iommu.c > @@ -4499,7 +4499,20 @@ int amd_iommu_create_irq_domain(struct amd_iommu *iommu) > return 0; > } > > -int amd_iommu_update_ga(int cpu, bool is_run, void *data) > +int amd_iommu_sync_ga(int devid) > +{ > + struct amd_iommu *iommu = amd_iommu_rlookup_table[devid]; > + > + if (!iommu) > + return -ENODEV; > + > + iommu_flush_irt(iommu, devid); > + iommu_completion_wait(iommu); > + return 0; > +} > +EXPORT_SYMBOL(amd_iommu_sync_ga); > + > +int amd_iommu_update_ga(int cpu, bool is_run, void *data, int *id) > { > unsigned long flags; > struct amd_iommu *iommu; > @@ -4521,6 +4534,9 @@ int amd_iommu_update_ga(int cpu, bool is_run, void *data) > if (!table) > return -ENODEV; > > + if (id) > + *id = devid; > + > raw_spin_lock_irqsave(&table->lock, flags); > > if (ref->lo.fields_vapic.guest_mode) { > @@ -4536,8 +4552,6 @@ int amd_iommu_update_ga(int cpu, bool is_run, void *data) > > raw_spin_unlock_irqrestore(&table->lock, flags); > > - iommu_flush_irt(iommu, devid); > - iommu_completion_wait(iommu); > return 0; > } > EXPORT_SYMBOL(amd_iommu_update_ga); > diff --git a/include/linux/amd-iommu.h b/include/linux/amd-iommu.h > index 09751d349963..b94d4b33dfd7 100644 > --- a/include/linux/amd-iommu.h > +++ b/include/linux/amd-iommu.h > @@ -193,8 +193,9 @@ static inline int amd_iommu_detect(void) { return -ENODEV; } > /* IOMMU AVIC Function */ > extern int amd_iommu_register_ga_log_notifier(int (*notifier)(u32)); > > -extern int > -amd_iommu_update_ga(int cpu, bool is_run, void *data); > +extern int amd_iommu_update_ga(int cpu, bool is_run, void *data, int *devid); > + > +extern int amd_iommu_sync_ga(int devid); > > #else /* defined(CONFIG_AMD_IOMMU) && defined(CONFIG_IRQ_REMAP) */ > > @@ -205,7 +206,13 @@ amd_iommu_register_ga_log_notifier(int (*notifier)(u32)) > } > > static inline int > -amd_iommu_update_ga(int cpu, bool is_run, void *data) > +amd_iommu_update_ga(int cpu, bool is_run, void *data, int *devid) > +{ > + return 0; > +} > + > +static inline int > +amd_iommu_sync_ga(int devid) > { > return 0; > } >