On Tue, 2021-12-07 at 15:57 -0500, Matthew Rosato wrote: > Add a routine that will perform a shadow operation between a guest > and host IOAT. A subsequent patch will invoke this in response to > an 04 RPCIT instruction intercept. > > Signed-off-by: Matthew Rosato <mjrosato@xxxxxxxxxxxxx> > --- > arch/s390/include/asm/kvm_pci.h | 1 + > arch/s390/include/asm/pci_dma.h | 1 + > arch/s390/kvm/pci.c | 191 ++++++++++++++++++++++++++++++++ > arch/s390/kvm/pci.h | 4 +- > 4 files changed, 196 insertions(+), 1 deletion(-) > > diff --git a/arch/s390/include/asm/kvm_pci.h b/arch/s390/include/asm/kvm_pci.h > index 254275399f21..97e3a369135d 100644 > --- a/arch/s390/include/asm/kvm_pci.h > +++ b/arch/s390/include/asm/kvm_pci.h > @@ -30,6 +30,7 @@ struct kvm_zdev_ioat { > struct kvm_zdev { > struct zpci_dev *zdev; > struct kvm *kvm; > + u64 rpcit_count; > struct kvm_zdev_ioat ioat; > struct zpci_fib fib; > }; > diff --git a/arch/s390/include/asm/pci_dma.h b/arch/s390/include/asm/pci_dma.h > index e1d3c1d3fc8a..0ca15e5db3d9 100644 > --- a/arch/s390/include/asm/pci_dma.h > +++ b/arch/s390/include/asm/pci_dma.h > @@ -52,6 +52,7 @@ enum zpci_ioat_dtype { > #define ZPCI_TABLE_ENTRIES (ZPCI_TABLE_SIZE / ZPCI_TABLE_ENTRY_SIZE) > #define ZPCI_TABLE_PAGES (ZPCI_TABLE_SIZE >> PAGE_SHIFT) > #define ZPCI_TABLE_ENTRIES_PAGES (ZPCI_TABLE_ENTRIES * ZPCI_TABLE_PAGES) > +#define ZPCI_TABLE_ENTRIES_PER_PAGE (ZPCI_TABLE_ENTRIES / ZPCI_TABLE_PAGES) > > #define ZPCI_TABLE_BITS 11 > #define ZPCI_PT_BITS 8 > diff --git a/arch/s390/kvm/pci.c b/arch/s390/kvm/pci.c > index a1c0c0881332..858c5ecdc8b9 100644 > --- a/arch/s390/kvm/pci.c > +++ b/arch/s390/kvm/pci.c > @@ -123,6 +123,195 @@ int kvm_s390_pci_aen_init(u8 nisc) > return rc; > } > > +static int dma_shadow_cpu_trans(struct kvm_vcpu *vcpu, unsigned long *entry, > + unsigned long *gentry) > +{ > + unsigned long idx; > + struct page *page; > + void *gaddr = NULL; > + kvm_pfn_t pfn; > + gpa_t addr; > + int rc = 0; > + > + if (pt_entry_isvalid(*gentry)) { > + /* pin and validate */ > + addr = *gentry & ZPCI_PTE_ADDR_MASK; > + idx = srcu_read_lock(&vcpu->kvm->srcu); > + page = gfn_to_page(vcpu->kvm, gpa_to_gfn(addr)); > + srcu_read_unlock(&vcpu->kvm->srcu, idx); > + if (is_error_page(page)) > + return -EIO; > + gaddr = page_to_virt(page) + (addr & ~PAGE_MASK); Hmm, this looks like a virtual vs physical address mixup to me that is currently not a problem because kernel virtual addresses are equal to their physical address. Here page_to_virt(page) gives us a virtual address but the entries in the I/O translation table have to be physical (aka absolute) addresses. With my commit "s390/pci: use physical addresses in DMA tables" currently in the s390 feature branch this is also reflected in the argument types taken by set_pt_pfaa() below so gaddr should have type phys_addr_t not void *. That should also remove the need for the cast to unsigned long for the duplicate check. > + } > + > + if (pt_entry_isvalid(*entry)) { > + /* Either we are invalidating, replacing or no-op */ > + if (gaddr) { > + if ((*entry & ZPCI_PTE_ADDR_MASK) == > + (unsigned long)gaddr) { > + /* Duplicate */ > + kvm_release_pfn_dirty(*entry >> PAGE_SHIFT); > + } else { > + /* Replace */ > + pfn = (*entry >> PAGE_SHIFT); > + invalidate_pt_entry(entry); > + set_pt_pfaa(entry, gaddr); > + validate_pt_entry(entry); > + kvm_release_pfn_dirty(pfn); > + rc = 1; > + } > + } else { > + /* Invalidate */ > + pfn = (*entry >> PAGE_SHIFT); > + invalidate_pt_entry(entry); > + kvm_release_pfn_dirty(pfn); > + rc = 1; > + } > + } else if (gaddr) { > + /* New Entry */ > + set_pt_pfaa(entry, gaddr); > + validate_pt_entry(entry); > + } > + > + return rc; > +} > + > +unsigned long *dma_walk_guest_cpu_trans(struct kvm_vcpu *vcpu, > + struct kvm_zdev_ioat *ioat, > + dma_addr_t dma_addr) > +{ > + unsigned long *rto, *sto, *pto; > + unsigned int rtx, rts, sx, px, idx; > + struct page *page; > + gpa_t addr; > + int i; > + > + /* Pin guest segment table if needed */ > + rtx = calc_rtx(dma_addr); > + rto = ioat->head[(rtx / ZPCI_TABLE_ENTRIES_PER_PAGE)]; > + rts = rtx * ZPCI_TABLE_PAGES; > + if (!ioat->seg[rts]) { > + if (!reg_entry_isvalid(rto[rtx % ZPCI_TABLE_ENTRIES_PER_PAGE])) > + return NULL; > + sto = get_rt_sto(rto[rtx % ZPCI_TABLE_ENTRIES_PER_PAGE]); > + addr = ((u64)sto & ZPCI_RTE_ADDR_MASK); > + idx = srcu_read_lock(&vcpu->kvm->srcu); > + for (i = 0; i < ZPCI_TABLE_PAGES; i++) { > + page = gfn_to_page(vcpu->kvm, gpa_to_gfn(addr)); > + if (is_error_page(page)) { > + srcu_read_unlock(&vcpu->kvm->srcu, idx); > + return NULL; > + } > + ioat->seg[rts + i] = page_to_virt(page) + > + (addr & ~PAGE_MASK); Here on the other hand I think the page_to_virt() is correct since you want the virtual addresses to be able to derference it, correct? > + addr += PAGE_SIZE; > + } > + srcu_read_unlock(&vcpu->kvm->srcu, idx); > + } > + > + /* Allocate pin pointers for another segment table if needed */ > + if (!ioat->pt[rtx]) { > + ioat->pt[rtx] = kcalloc(ZPCI_TABLE_ENTRIES, > + (sizeof(unsigned long *)), GFP_KERNEL); > + if (!ioat->pt[rtx]) > + return NULL; > + } > + /* Pin guest page table if needed */ > + sx = calc_sx(dma_addr); > + sto = ioat->seg[(rts + (sx / ZPCI_TABLE_ENTRIES_PER_PAGE))]; > + if (!ioat->pt[rtx][sx]) { > + if (!reg_entry_isvalid(sto[sx % ZPCI_TABLE_ENTRIES_PER_PAGE])) > + return NULL; > + pto = get_st_pto(sto[sx % ZPCI_TABLE_ENTRIES_PER_PAGE]); > + if (!pto) > + return NULL; > + addr = ((u64)pto & ZPCI_STE_ADDR_MASK); > + idx = srcu_read_lock(&vcpu->kvm->srcu); > + page = gfn_to_page(vcpu->kvm, gpa_to_gfn(addr)); > + srcu_read_unlock(&vcpu->kvm->srcu, idx); > + if (is_error_page(page)) > + return NULL; > + ioat->pt[rtx][sx] = page_to_virt(page) + (addr & ~PAGE_MASK); Same as above. > + } > + pto = ioat->pt[rtx][sx]; > + > + /* Return guest PTE */ > + px = calc_px(dma_addr); > + return &pto[px]; > +} > + > ---8<---