On Fri, Sep 19, 2014 at 05:12:15PM +0200, Thomas Huth wrote: > > Hi Frank, > > On Fri, 19 Sep 2014 13:54:34 +0200 > frank.blaschka@xxxxxxxxxx wrote: > > > From: Frank Blaschka <frank.blaschka@xxxxxxxxxx> > > > > This patch implements the s390 pci instructions in qemu. This allows > > to attach qemu pci devices including vfio. This does not mean the > > devices are functional but at least detection and config/memory space > > access is working. > > > > Signed-off-by: Frank Blaschka <frank.blaschka@xxxxxxxxxx> > > --- > > target-s390x/Makefile.objs | 2 > > target-s390x/kvm.c | 52 +++ > > target-s390x/pci_ic.c | 621 +++++++++++++++++++++++++++++++++++++++++++++ > > target-s390x/pci_ic.h | 425 ++++++++++++++++++++++++++++++ > > 4 files changed, 1099 insertions(+), 1 deletion(-) > > > > --- a/target-s390x/Makefile.objs > > +++ b/target-s390x/Makefile.objs > > @@ -2,4 +2,4 @@ obj-y += translate.o helper.o cpu.o inte > > obj-y += int_helper.o fpu_helper.o cc_helper.o mem_helper.o misc_helper.o > > obj-y += gdbstub.o > > obj-$(CONFIG_SOFTMMU) += ioinst.o arch_dump.o > > -obj-$(CONFIG_KVM) += kvm.o > > +obj-$(CONFIG_KVM) += kvm.o pci_ic.o > > --- a/target-s390x/kvm.c > > +++ b/target-s390x/kvm.c > > @@ -40,6 +40,7 @@ > > #include "exec/gdbstub.h" > > #include "trace.h" > > #include "qapi-event.h" > > +#include "pci_ic.h" > > > > /* #define DEBUG_KVM */ > > > > @@ -56,6 +57,7 @@ > > #define IPA0_B2 0xb200 > > #define IPA0_B9 0xb900 > > #define IPA0_EB 0xeb00 > > +#define IPA0_E3 0xe300 > > > > #define PRIV_B2_SCLP_CALL 0x20 > > #define PRIV_B2_CSCH 0x30 > > @@ -76,8 +78,17 @@ > > #define PRIV_B2_XSCH 0x76 > > > > #define PRIV_EB_SQBS 0x8a > > +#define PRIV_EB_PCISTB 0xd0 > > +#define PRIV_EB_SIC 0xd1 > > > > #define PRIV_B9_EQBS 0x9c > > +#define PRIV_B9_CLP 0xa0 > > +#define PRIV_B9_PCISTG 0xd0 > > +#define PRIV_B9_PCILG 0xd2 > > +#define PRIV_B9_RPCIT 0xd3 > > + > > +#define PRIV_E3_MPCIFC 0xd0 > > +#define PRIV_E3_STPCIFC 0xd4 > > > > #define DIAG_IPL 0x308 > > #define DIAG_KVM_HYPERCALL 0x500 > > @@ -813,6 +824,18 @@ static int handle_b9(S390CPU *cpu, struc > > int r = 0; > > > > switch (ipa1) { > > + case PRIV_B9_CLP: > > + r = kvm_clp_service_call(cpu, run); > > + break; > > + case PRIV_B9_PCISTG: > > + r = kvm_pcistg_service_call(cpu, run); > > + break; > > + case PRIV_B9_PCILG: > > + r = kvm_pcilg_service_call(cpu, run); > > + break; > > + case PRIV_B9_RPCIT: > > + r = kvm_rpcit_service_call(cpu, run); > > + break; > > case PRIV_B9_EQBS: > > /* just inject exception */ > > r = -1; > > @@ -831,6 +854,12 @@ static int handle_eb(S390CPU *cpu, struc > > int r = 0; > > > > switch (ipa1) { > > + case PRIV_EB_PCISTB: > > + r = kvm_pcistb_service_call(cpu, run); > > + break; > > + case PRIV_EB_SIC: > > + r = kvm_sic_service_call(cpu, run); > > + break; > > case PRIV_EB_SQBS: > > /* just inject exception */ > > r = -1; > > I'm not sure, but I think the handler for the eb instructions is wrong: > The second byte of the opcode is encoded in the lowest byte of the ipb > field, not the lowest byte of the ipa field (just like with the e3 > handler). Did you verify that your handlers get called correctly? > Hi Thomas, you are absolutely right. I already have a patch available for this issue but did not append it to this RFC post (since it is basically a bug fix). To the next posting I will add this patch as well. Will also fix the remaining issues thx for your review. > > @@ -844,6 +873,26 @@ static int handle_eb(S390CPU *cpu, struc > > return r; > > } > > > > +static int handle_e3(S390CPU *cpu, struct kvm_run *run, uint8_t ipa1) > > +{ > > + int r = 0; > > + > > + switch (ipa1) { > > + case PRIV_E3_MPCIFC: > > + r = kvm_mpcifc_service_call(cpu, run); > > + break; > > + case PRIV_E3_STPCIFC: > > + r = kvm_stpcifc_service_call(cpu, run); > > + break; > > + default: > > + r = -1; > > + DPRINTF("KVM: unhandled PRIV: 0xe3%x\n", ipa1); > > + break; > > + } > > + > > + return r; > > +} > > Could you please replace "ipa1" with "ipb1" to avoid confusion here? > > > static int handle_hypercall(S390CPU *cpu, struct kvm_run *run) > > { > > CPUS390XState *env = &cpu->env; > > @@ -1038,6 +1087,9 @@ static int handle_instruction(S390CPU *c > > case IPA0_EB: > > r = handle_eb(cpu, run, ipa1); > > break; > > + case IPA0_E3: > > + r = handle_e3(cpu, run, run->s390_sieic.ipb & 0xff); > > + break; > > case IPA0_DIAG: > > r = handle_diag(cpu, run, run->s390_sieic.ipb); > > break; > > --- /dev/null > > +++ b/target-s390x/pci_ic.c > > @@ -0,0 +1,621 @@ > [...] > > + > > +int kvm_pcilg_service_call(S390CPU *cpu, struct kvm_run *run) > > +{ > > + CPUS390XState *env = &cpu->env; > > + S390PCIBusDevice *pbdev; > > + uint8_t r1 = (run->s390_sieic.ipb & 0x00f00000) >> 20; > > + uint8_t r2 = (run->s390_sieic.ipb & 0x000f0000) >> 16; > > + PciLgStg *rp; > > + uint64_t offset; > > + uint64_t data; > > + > > + cpu_synchronize_state(CPU(cpu)); > > + rp = (PciLgStg *)&env->regs[r2]; > > I think you have to check for r2 to be even here (and inject a > specification exception otherwise) > > You should also check rp->len for a valid value and inject an operand > exception otherwise. > > > + offset = env->regs[r2 + 1]; > > You should also check for a valid offset value here (and inject an > operand exception otherwise). > > ... and while you're at it - it also seems like the pci instructions > are priviledged, so you should check for not being in the problem state > here, too, just to be sure ;-) > > > + pbdev = s390_pci_find_dev_by_fh(rp->fh); > > + if (!pbdev) { > > + DPRINTF("pcilg no pci dev\n"); > > + return -EIO; > > + } > > + > > + if (rp->pcias < 6) { > > + MemoryRegion *mr = pbdev->pdev->io_regions[rp->pcias].memory; > > + io_mem_read(mr, offset, &data, rp->len); > > Does this also work if len != 8 ? I think the data should be put in the > rightmost byte positions, with the unused leftmost bytes set to zero... > so some more logic might be needed here. > > > + } else if (rp->pcias == 15) { > > + data = pci_host_config_read_common( > > + pbdev->pdev, offset, pci_config_size(pbdev->pdev), rp->len); > > + > > + switch (rp->len) { > > + case 1: > > + break; > > + case 2: > > + data = cpu_to_le16(data); > > + break; > > + case 4: > > + data = cpu_to_le32(data); > > + break; > > + case 8: > > + data = cpu_to_le64(data); > > + break; > > + default: > > + abort(); > > + } > > + } else { > > + DPRINTF("invalid space\n"); > > Set condition code 1 in this case? > > > + } > > + > > + env->regs[r1] = data; > > + setcc(cpu, 0); > > + return 0; > > +} > > + > > +int kvm_pcistg_service_call(S390CPU *cpu, struct kvm_run *run) > > +{ > > + CPUS390XState *env = &cpu->env; > > + uint8_t r1 = (run->s390_sieic.ipb & 0x00f00000) >> 20; > > + uint8_t r2 = (run->s390_sieic.ipb & 0x000f0000) >> 16; > > + PciLgStg *rp; > > + uint64_t offset, data; > > + S390PCIBusDevice *pbdev; > > + > > + cpu_synchronize_state(CPU(cpu)); > > + rp = (PciLgStg *)&env->regs[r2]; > > + offset = env->regs[r2 + 1]; > > You likely need the same sanity checks here as with the pcilg function. > > > + pbdev = s390_pci_find_dev_by_fh(rp->fh); > > + if (!pbdev) { > > + DPRINTF("pcistg no pci dev\n"); > > + return -EIO; > > + } > > + > > + data = env->regs[r1]; > > + > > + if (rp->pcias < 6) { > > + MemoryRegion *mr; > > + if (pbdev->msix_table_bar == rp->pcias && > > + offset >= pbdev->msix_table_offset && > > + offset <= pbdev->msix_table_offset + > > + (pbdev->msix_entries - 1) * PCI_MSIX_ENTRY_SIZE) { > > + offset = offset - pbdev->msix_table_offset; > > + mr = &pbdev->pdev->msix_table_mmio; > > + } else { > > + mr = pbdev->pdev->io_regions[rp->pcias].memory; > > + } > > + > > + io_mem_write(mr, offset, data, rp->len); > > Same concerns as with the pcilg - this likely only works for len == 8 ? > > > + } else if (rp->pcias == 15) { > > + switch (rp->len) { > > + case 1: > > + break; > > + case 2: > > + data = le16_to_cpu(data); > > + break; > > + case 4: > > + data = le32_to_cpu(data); > > + break; > > + case 8: > > + data = le64_to_cpu(data); > > + break; > > + default: > > + abort(); > > + } > > + > > + pci_host_config_write_common(pbdev->pdev, offset, > > + pci_config_size(pbdev->pdev), > > + data, rp->len); > > + } else { > > + DPRINTF("pcistg invalid space\n"); > > Set condition code 1 in this case? > > > + } > > + > > + setcc(cpu, 0); > > + return 0; > > +} > > + > > +static uint64_t guest_io_table_walk(uint64_t guest_iota, > > + uint64_t guest_dma_address) > > +{ > > + uint64_t sto_a, pto_a, px_a; > > + uint64_t sto, pto, pte; > > + uint32_t rtx, sx, px; > > + > > + rtx = calc_rtx(guest_dma_address); > > + sx = calc_sx(guest_dma_address); > > + px = calc_px(guest_dma_address); > > + > > + sto_a = guest_iota + rtx * sizeof(uint64_t); > > + cpu_physical_memory_rw(sto_a, (uint8_t *)&sto, sizeof(uint64_t), 0); > > + sto = (uint64_t)get_rt_sto(sto); > > + > > + pto_a = sto + sx * sizeof(uint64_t); > > + cpu_physical_memory_rw(pto_a, (uint8_t *)&pto, sizeof(uint64_t), 0); > > + pto = (uint64_t)get_st_pto(pto); > > + > > + px_a = pto + px * sizeof(uint64_t); > > + cpu_physical_memory_rw(px_a, (uint8_t *)&pte, sizeof(uint64_t), 0); > > + > > + return pte; > > +} > > + > > +int kvm_rpcit_service_call(S390CPU *cpu, struct kvm_run *run) > > +{ > > + CPUS390XState *env = &cpu->env; > > + uint8_t r1 = (run->s390_sieic.ipb & 0x00f00000) >> 20; > > + uint8_t r2 = (run->s390_sieic.ipb & 0x000f0000) >> 16; > > + uint32_t fh; > > + uint64_t pte; > > + S390PCIBusDevice *pbdev; > > + ram_addr_t size; > > + int flags; > > + IOMMUTLBEntry entry; > > + > > + cpu_synchronize_state(CPU(cpu)); > > + > > + fh = env->regs[r1] >> 32; > > + size = env->regs[r2 + 1]; > > Missing two checks again: > - r2 should be even > - CPU should not be in problem state > > > + pbdev = s390_pci_find_dev_by_fh(fh); > > + > > + if (!pbdev) { > > + DPRINTF("rpcit no pci dev\n"); > > + return -EIO; > > I think it would be better to set condition code 3 instead. > > > + } > > + > > + pte = guest_io_table_walk(pbdev->g_iota, env->regs[r2]); > > + flags = pte & ZPCI_PTE_FLAG_MASK; > > + entry.target_as = &address_space_memory; > > + entry.iova = env->regs[r2]; > > + entry.translated_addr = pte & ZPCI_PTE_ADDR_MASK; > > + entry.addr_mask = size - 1; > > + > > + if (flags & ZPCI_PTE_INVALID) { > > + entry.perm = IOMMU_NONE; > > + } else { > > + entry.perm = IOMMU_RW; > > + } > > + > > + memory_region_notify_iommu(pci_device_iommu_address_space( > > + pbdev->pdev)->root, entry); > > + > > + setcc(cpu, 0); > > + return 0; > > +} > > + > > +int kvm_sic_service_call(S390CPU *cpu, struct kvm_run *run) > > +{ > > + DPRINTF("sic\n"); > > + return 0; > > +} > > + > > +int kvm_pcistb_service_call(S390CPU *cpu, struct kvm_run *run) > > +{ > > + CPUS390XState *env = &cpu->env; > > + uint8_t r1 = (run->s390_sieic.ipa & 0x00f0) >> 4; > > + uint8_t r3 = run->s390_sieic.ipa & 0x000f; > > + PciStb *rp; > > + uint64_t gaddr; > > + uint64_t *uaddr, *pu; > > + hwaddr len; > > + S390PCIBusDevice *pbdev; > > + MemoryRegion *mr; > > + int i; > > + > > + cpu_synchronize_state(CPU(cpu)); > > + > > + rp = (PciStb *)&env->regs[r1]; > > + gaddr = get_base_disp_rsy(cpu, run); > > + len = rp->len; > > Not sure, but don't you also have to check the length and offset here > for valid ranges? > At least you should check for problem state again. > > > + pbdev = s390_pci_find_dev_by_fh(rp->fh); > > + if (!pbdev) { > > + DPRINTF("pcistb no pci dev fh 0x%x\n", rp->fh); > > + return -EIO; > > User cc3 instead? > > > + } > > + > > + uaddr = cpu_physical_memory_map(gaddr, &len, 0); > > + mr = pbdev->pdev->io_regions[rp->pcias].memory; > > + > > + pu = uaddr; > > + for (i = 0; i < rp->len / 8; i++) { > > + io_mem_write(mr, env->regs[r3] + i * 8, *pu, 8); > > + pu++; > > + } > > + > > + cpu_physical_memory_unmap(uaddr, len, 0, len); > > + setcc(cpu, 0); > > + return 0; > > +} > [...] > > Thomas > > -- > To unsubscribe from this list: send the line "unsubscribe kvm" in > the body of a message to majordomo@xxxxxxxxxxxxxxx > More majordomo info at http://vger.kernel.org/majordomo-info.html > -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html