Re: [PATCH 23/32] KVM: s390: pci: handle refresh of PCI translations

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Tue, 2021-12-07 at 15:57 -0500, Matthew Rosato wrote:
> Add a routine that will perform a shadow operation between a guest
> and host IOAT.  A subsequent patch will invoke this in response to
> an 04 RPCIT instruction intercept.
> 
> Signed-off-by: Matthew Rosato <mjrosato@xxxxxxxxxxxxx>
> ---
>  arch/s390/include/asm/kvm_pci.h |   1 +
>  arch/s390/include/asm/pci_dma.h |   1 +
>  arch/s390/kvm/pci.c             | 191 ++++++++++++++++++++++++++++++++
>  arch/s390/kvm/pci.h             |   4 +-
>  4 files changed, 196 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/s390/include/asm/kvm_pci.h b/arch/s390/include/asm/kvm_pci.h
> index 254275399f21..97e3a369135d 100644
> --- a/arch/s390/include/asm/kvm_pci.h
> +++ b/arch/s390/include/asm/kvm_pci.h
> @@ -30,6 +30,7 @@ struct kvm_zdev_ioat {
>  struct kvm_zdev {
>  	struct zpci_dev *zdev;
>  	struct kvm *kvm;
> +	u64 rpcit_count;
>  	struct kvm_zdev_ioat ioat;
>  	struct zpci_fib fib;
>  };
> diff --git a/arch/s390/include/asm/pci_dma.h b/arch/s390/include/asm/pci_dma.h
> index e1d3c1d3fc8a..0ca15e5db3d9 100644
> --- a/arch/s390/include/asm/pci_dma.h
> +++ b/arch/s390/include/asm/pci_dma.h
> @@ -52,6 +52,7 @@ enum zpci_ioat_dtype {
>  #define ZPCI_TABLE_ENTRIES		(ZPCI_TABLE_SIZE / ZPCI_TABLE_ENTRY_SIZE)
>  #define ZPCI_TABLE_PAGES		(ZPCI_TABLE_SIZE >> PAGE_SHIFT)
>  #define ZPCI_TABLE_ENTRIES_PAGES	(ZPCI_TABLE_ENTRIES * ZPCI_TABLE_PAGES)
> +#define ZPCI_TABLE_ENTRIES_PER_PAGE	(ZPCI_TABLE_ENTRIES / ZPCI_TABLE_PAGES)
>  
>  #define ZPCI_TABLE_BITS			11
>  #define ZPCI_PT_BITS			8
> diff --git a/arch/s390/kvm/pci.c b/arch/s390/kvm/pci.c
> index a1c0c0881332..858c5ecdc8b9 100644
> --- a/arch/s390/kvm/pci.c
> +++ b/arch/s390/kvm/pci.c
> @@ -123,6 +123,195 @@ int kvm_s390_pci_aen_init(u8 nisc)
>  	return rc;
>  }
>  
> +static int dma_shadow_cpu_trans(struct kvm_vcpu *vcpu, unsigned long *entry,
> +				unsigned long *gentry)
> +{
> +	unsigned long idx;
> +	struct page *page;
> +	void *gaddr = NULL;
> +	kvm_pfn_t pfn;
> +	gpa_t addr;
> +	int rc = 0;
> +
> +	if (pt_entry_isvalid(*gentry)) {
> +		/* pin and validate */
> +		addr = *gentry & ZPCI_PTE_ADDR_MASK;
> +		idx = srcu_read_lock(&vcpu->kvm->srcu);
> +		page = gfn_to_page(vcpu->kvm, gpa_to_gfn(addr));
> +		srcu_read_unlock(&vcpu->kvm->srcu, idx);
> +		if (is_error_page(page))
> +			return -EIO;
> +		gaddr = page_to_virt(page) + (addr & ~PAGE_MASK);

Hmm, this looks like a virtual vs physical address mixup to me that is
currently not a problem because kernel virtual addresses are equal to
their physical address. Here page_to_virt(page) gives us a virtual
address but the entries in the I/O translation table have to be
physical (aka absolute) addresses.

With my commit "s390/pci: use physical addresses in DMA tables"
currently in the s390 feature branch this is also reflected in the
argument types taken by set_pt_pfaa() below so gaddr should have type
phys_addr_t not void *. That should also remove the need for the cast
to unsigned long for the duplicate check.

> +	}
> +
> +	if (pt_entry_isvalid(*entry)) {
> +		/* Either we are invalidating, replacing or no-op */
> +		if (gaddr) {
> +			if ((*entry & ZPCI_PTE_ADDR_MASK) ==
> +			    (unsigned long)gaddr) {
> +				/* Duplicate */
> +				kvm_release_pfn_dirty(*entry >> PAGE_SHIFT);
> +			} else {
> +				/* Replace */
> +				pfn = (*entry >> PAGE_SHIFT);
> +				invalidate_pt_entry(entry);
> +				set_pt_pfaa(entry, gaddr);
> +				validate_pt_entry(entry);
> +				kvm_release_pfn_dirty(pfn);
> +				rc = 1;
> +			}
> +		} else {
> +			/* Invalidate */
> +			pfn = (*entry >> PAGE_SHIFT);
> +			invalidate_pt_entry(entry);
> +			kvm_release_pfn_dirty(pfn);
> +			rc = 1;
> +		}
> +	} else if (gaddr) {
> +		/* New Entry */
> +		set_pt_pfaa(entry, gaddr);
> +		validate_pt_entry(entry);
> +	}
> +
> +	return rc;
> +}
> +
> +unsigned long *dma_walk_guest_cpu_trans(struct kvm_vcpu *vcpu,
> +					struct kvm_zdev_ioat *ioat,
> +					dma_addr_t dma_addr)
> +{
> +	unsigned long *rto, *sto, *pto;
> +	unsigned int rtx, rts, sx, px, idx;
> +	struct page *page;
> +	gpa_t addr;
> +	int i;
> +
> +	/* Pin guest segment table if needed */
> +	rtx = calc_rtx(dma_addr);
> +	rto = ioat->head[(rtx / ZPCI_TABLE_ENTRIES_PER_PAGE)];
> +	rts = rtx * ZPCI_TABLE_PAGES;
> +	if (!ioat->seg[rts]) {
> +		if (!reg_entry_isvalid(rto[rtx % ZPCI_TABLE_ENTRIES_PER_PAGE]))
> +			return NULL;
> +		sto = get_rt_sto(rto[rtx % ZPCI_TABLE_ENTRIES_PER_PAGE]);
> +		addr = ((u64)sto & ZPCI_RTE_ADDR_MASK);
> +		idx = srcu_read_lock(&vcpu->kvm->srcu);
> +		for (i = 0; i < ZPCI_TABLE_PAGES; i++) {
> +			page = gfn_to_page(vcpu->kvm, gpa_to_gfn(addr));
> +			if (is_error_page(page)) {
> +				srcu_read_unlock(&vcpu->kvm->srcu, idx);
> +				return NULL;
> +			}
> +			ioat->seg[rts + i] = page_to_virt(page) +
> +					     (addr & ~PAGE_MASK);

Here on the other hand I think the page_to_virt() is correct since you
want the virtual addresses to be able to derference it, correct? 

> +			addr += PAGE_SIZE;
> +		}
> +		srcu_read_unlock(&vcpu->kvm->srcu, idx);
> +	}
> +
> +	/* Allocate pin pointers for another segment table if needed */
> +	if (!ioat->pt[rtx]) {
> +		ioat->pt[rtx] = kcalloc(ZPCI_TABLE_ENTRIES,
> +					(sizeof(unsigned long *)), GFP_KERNEL);
> +		if (!ioat->pt[rtx])
> +			return NULL;
> +	}
> +	/* Pin guest page table if needed */
> +	sx = calc_sx(dma_addr);
> +	sto = ioat->seg[(rts + (sx / ZPCI_TABLE_ENTRIES_PER_PAGE))];
> +	if (!ioat->pt[rtx][sx]) {
> +		if (!reg_entry_isvalid(sto[sx % ZPCI_TABLE_ENTRIES_PER_PAGE]))
> +			return NULL;
> +		pto = get_st_pto(sto[sx % ZPCI_TABLE_ENTRIES_PER_PAGE]);
> +		if (!pto)
> +			return NULL;
> +		addr = ((u64)pto & ZPCI_STE_ADDR_MASK);
> +		idx = srcu_read_lock(&vcpu->kvm->srcu);
> +		page = gfn_to_page(vcpu->kvm, gpa_to_gfn(addr));
> +		srcu_read_unlock(&vcpu->kvm->srcu, idx);
> +		if (is_error_page(page))
> +			return NULL;
> +		ioat->pt[rtx][sx] = page_to_virt(page) + (addr & ~PAGE_MASK);

Same as above.

> +	}
> +	pto = ioat->pt[rtx][sx];
> +
> +	/* Return guest PTE */
> +	px = calc_px(dma_addr);
> +	return &pto[px];
> +}
> +
> 
---8<---




[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Index of Archives]     [Kernel Development]     [Kernel Newbies]     [IDE]     [Security]     [Git]     [Netfilter]     [Bugtraq]     [Yosemite Info]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux ATA RAID]     [Samba]     [Linux Media]     [Device Mapper]

  Powered by Linux