Re: [Qemu-devel] [RFC patch 5/6] s390: implement pci instruction

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Fri, Sep 19, 2014 at 05:12:15PM +0200, Thomas Huth wrote:
> 
>  Hi Frank,
> 
> On Fri, 19 Sep 2014 13:54:34 +0200
> frank.blaschka@xxxxxxxxxx wrote:
> 
> > From: Frank Blaschka <frank.blaschka@xxxxxxxxxx>
> > 
> > This patch implements the s390 pci instructions in qemu. This allows
> > to attach qemu pci devices including vfio. This does not mean the
> > devices are functional but at least detection and config/memory space
> > access is working.
> >     
> > Signed-off-by: Frank Blaschka <frank.blaschka@xxxxxxxxxx>
> > ---
> >  target-s390x/Makefile.objs |    2 
> >  target-s390x/kvm.c         |   52 +++
> >  target-s390x/pci_ic.c      |  621 +++++++++++++++++++++++++++++++++++++++++++++
> >  target-s390x/pci_ic.h      |  425 ++++++++++++++++++++++++++++++
> >  4 files changed, 1099 insertions(+), 1 deletion(-)
> > 
> > --- a/target-s390x/Makefile.objs
> > +++ b/target-s390x/Makefile.objs
> > @@ -2,4 +2,4 @@ obj-y += translate.o helper.o cpu.o inte
> >  obj-y += int_helper.o fpu_helper.o cc_helper.o mem_helper.o misc_helper.o
> >  obj-y += gdbstub.o
> >  obj-$(CONFIG_SOFTMMU) += ioinst.o arch_dump.o
> > -obj-$(CONFIG_KVM) += kvm.o
> > +obj-$(CONFIG_KVM) += kvm.o pci_ic.o
> > --- a/target-s390x/kvm.c
> > +++ b/target-s390x/kvm.c
> > @@ -40,6 +40,7 @@
> >  #include "exec/gdbstub.h"
> >  #include "trace.h"
> >  #include "qapi-event.h"
> > +#include "pci_ic.h"
> > 
> >  /* #define DEBUG_KVM */
> > 
> > @@ -56,6 +57,7 @@
> >  #define IPA0_B2                         0xb200
> >  #define IPA0_B9                         0xb900
> >  #define IPA0_EB                         0xeb00
> > +#define IPA0_E3                         0xe300
> > 
> >  #define PRIV_B2_SCLP_CALL               0x20
> >  #define PRIV_B2_CSCH                    0x30
> > @@ -76,8 +78,17 @@
> >  #define PRIV_B2_XSCH                    0x76
> > 
> >  #define PRIV_EB_SQBS                    0x8a
> > +#define PRIV_EB_PCISTB                  0xd0
> > +#define PRIV_EB_SIC                     0xd1
> > 
> >  #define PRIV_B9_EQBS                    0x9c
> > +#define PRIV_B9_CLP                     0xa0
> > +#define PRIV_B9_PCISTG                  0xd0
> > +#define PRIV_B9_PCILG                   0xd2
> > +#define PRIV_B9_RPCIT                   0xd3
> > +
> > +#define PRIV_E3_MPCIFC                  0xd0
> > +#define PRIV_E3_STPCIFC                 0xd4
> > 
> >  #define DIAG_IPL                        0x308
> >  #define DIAG_KVM_HYPERCALL              0x500
> > @@ -813,6 +824,18 @@ static int handle_b9(S390CPU *cpu, struc
> >      int r = 0;
> > 
> >      switch (ipa1) {
> > +    case PRIV_B9_CLP:
> > +        r = kvm_clp_service_call(cpu, run);
> > +        break;
> > +    case PRIV_B9_PCISTG:
> > +        r = kvm_pcistg_service_call(cpu, run);
> > +        break;
> > +    case PRIV_B9_PCILG:
> > +        r = kvm_pcilg_service_call(cpu, run);
> > +        break;
> > +    case PRIV_B9_RPCIT:
> > +        r = kvm_rpcit_service_call(cpu, run);
> > +        break;
> >      case PRIV_B9_EQBS:
> >          /* just inject exception */
> >          r = -1;
> > @@ -831,6 +854,12 @@ static int handle_eb(S390CPU *cpu, struc
> >      int r = 0;
> > 
> >      switch (ipa1) {
> > +    case PRIV_EB_PCISTB:
> > +        r = kvm_pcistb_service_call(cpu, run);
> > +        break;
> > +    case PRIV_EB_SIC:
> > +        r = kvm_sic_service_call(cpu, run);
> > +        break;
> >      case PRIV_EB_SQBS:
> >          /* just inject exception */
> >          r = -1;
> 
> I'm not sure, but I think the handler for the eb instructions is wrong:
> The second byte of the opcode is encoded in the lowest byte of the ipb
> field, not the lowest byte of the ipa field (just like with the e3
> handler). Did you verify that your handlers get called correctly?
>

Hi Thomas, you are absolutely right. I already have a patch available for
this issue but did not append it to this RFC post (since it is basically a bug
fix). To the next posting I will add this patch as well.

Will also fix the remaining issues thx for your review.
 
> > @@ -844,6 +873,26 @@ static int handle_eb(S390CPU *cpu, struc
> >      return r;
> >  }
> > 
> > +static int handle_e3(S390CPU *cpu, struct kvm_run *run, uint8_t ipa1)
> > +{
> > +    int r = 0;
> > +
> > +    switch (ipa1) {
> > +    case PRIV_E3_MPCIFC:
> > +        r = kvm_mpcifc_service_call(cpu, run);
> > +        break;
> > +    case PRIV_E3_STPCIFC:
> > +        r = kvm_stpcifc_service_call(cpu, run);
> > +        break;
> > +    default:
> > +        r = -1;
> > +        DPRINTF("KVM: unhandled PRIV: 0xe3%x\n", ipa1);
> > +        break;
> > +    }
> > +
> > +    return r;
> > +}
> 
> Could you please replace "ipa1" with "ipb1" to avoid confusion here?
> 
> >  static int handle_hypercall(S390CPU *cpu, struct kvm_run *run)
> >  {
> >      CPUS390XState *env = &cpu->env;
> > @@ -1038,6 +1087,9 @@ static int handle_instruction(S390CPU *c
> >      case IPA0_EB:
> >          r = handle_eb(cpu, run, ipa1);
> >          break;
> > +    case IPA0_E3:
> > +        r = handle_e3(cpu, run, run->s390_sieic.ipb & 0xff);
> > +        break;
> >      case IPA0_DIAG:
> >          r = handle_diag(cpu, run, run->s390_sieic.ipb);
> >          break;
> > --- /dev/null
> > +++ b/target-s390x/pci_ic.c
> > @@ -0,0 +1,621 @@
> [...]
> > +
> > +int kvm_pcilg_service_call(S390CPU *cpu, struct kvm_run *run)
> > +{
> > +    CPUS390XState *env = &cpu->env;
> > +    S390PCIBusDevice *pbdev;
> > +    uint8_t r1 = (run->s390_sieic.ipb & 0x00f00000) >> 20;
> > +    uint8_t r2 = (run->s390_sieic.ipb & 0x000f0000) >> 16;
> > +    PciLgStg *rp;
> > +    uint64_t offset;
> > +    uint64_t data;
> > +
> > +    cpu_synchronize_state(CPU(cpu));
> > +    rp = (PciLgStg *)&env->regs[r2];
> 
> I think you have to check for r2 to be even here (and inject a
> specification exception otherwise)
> 
> You should also check rp->len for a valid value and inject an operand
> exception otherwise.
> 
> > +    offset = env->regs[r2 + 1];
> 
> You should also check for a valid offset value here (and inject an
> operand exception otherwise).
> 
> ... and while you're at it - it also seems like the pci instructions
> are priviledged, so you should check for not being in the problem state
> here, too, just to be sure ;-)
> 
> > +    pbdev = s390_pci_find_dev_by_fh(rp->fh);
> > +    if (!pbdev) {
> > +        DPRINTF("pcilg no pci dev\n");
> > +        return -EIO;
> > +    }
> > +
> > +    if (rp->pcias < 6) {
> > +        MemoryRegion *mr = pbdev->pdev->io_regions[rp->pcias].memory;
> > +        io_mem_read(mr, offset, &data, rp->len);
> 
> Does this also work if len != 8 ? I think the data should be put in the
> rightmost  byte positions, with the unused leftmost bytes set to zero...
> so some more logic might be needed here.
> 
> > +    } else if (rp->pcias == 15) {
> > +        data =  pci_host_config_read_common(
> > +                   pbdev->pdev, offset, pci_config_size(pbdev->pdev), rp->len);
> > +
> > +        switch (rp->len) {
> > +        case 1:
> > +            break;
> > +        case 2:
> > +            data = cpu_to_le16(data);
> > +            break;
> > +        case 4:
> > +            data = cpu_to_le32(data);
> > +            break;
> > +        case 8:
> > +            data = cpu_to_le64(data);
> > +            break;
> > +        default:
> > +            abort();
> > +        }
> > +    } else {
> > +        DPRINTF("invalid space\n");
> 
> Set condition code 1 in this case?
> 
> > +    }
> > +
> > +    env->regs[r1] = data;
> > +    setcc(cpu, 0);
> > +    return 0;
> > +}
> > +
> > +int kvm_pcistg_service_call(S390CPU *cpu, struct kvm_run *run)
> > +{
> > +    CPUS390XState *env = &cpu->env;
> > +    uint8_t r1 = (run->s390_sieic.ipb & 0x00f00000) >> 20;
> > +    uint8_t r2 = (run->s390_sieic.ipb & 0x000f0000) >> 16;
> > +    PciLgStg *rp;
> > +    uint64_t offset, data;
> > +    S390PCIBusDevice *pbdev;
> > +
> > +    cpu_synchronize_state(CPU(cpu));
> > +    rp = (PciLgStg *)&env->regs[r2];
> > +    offset = env->regs[r2 + 1];
> 
> You likely need the same sanity checks here as with the pcilg function.
> 
> > +    pbdev = s390_pci_find_dev_by_fh(rp->fh);
> > +    if (!pbdev) {
> > +        DPRINTF("pcistg no pci dev\n");
> > +        return -EIO;
> > +    }
> > +
> > +    data = env->regs[r1];
> > +
> > +    if (rp->pcias < 6) {
> > +        MemoryRegion *mr;
> > +        if (pbdev->msix_table_bar == rp->pcias &&
> > +            offset >= pbdev->msix_table_offset &&
> > +            offset <= pbdev->msix_table_offset +
> > +                      (pbdev->msix_entries - 1) * PCI_MSIX_ENTRY_SIZE) {
> > +            offset = offset - pbdev->msix_table_offset;
> > +            mr = &pbdev->pdev->msix_table_mmio;
> > +        } else {
> > +            mr = pbdev->pdev->io_regions[rp->pcias].memory;
> > +        }
> > +
> > +        io_mem_write(mr, offset, data, rp->len);
> 
> Same concerns as with the pcilg - this likely only works for len == 8 ?
> 
> > +    } else if (rp->pcias == 15) {
> > +        switch (rp->len) {
> > +        case 1:
> > +            break;
> > +        case 2:
> > +            data = le16_to_cpu(data);
> > +            break;
> > +        case 4:
> > +            data = le32_to_cpu(data);
> > +            break;
> > +        case 8:
> > +            data = le64_to_cpu(data);
> > +            break;
> > +        default:
> > +            abort();
> > +        }
> > +
> > +        pci_host_config_write_common(pbdev->pdev, offset,
> > +                                     pci_config_size(pbdev->pdev),
> > +                                     data, rp->len);
> > +    } else {
> > +        DPRINTF("pcistg invalid space\n");
> 
> Set condition code 1 in this case?
> 
> > +    }
> > +
> > +    setcc(cpu, 0);
> > +    return 0;
> > +}
> > +
> > +static uint64_t guest_io_table_walk(uint64_t guest_iota,
> > +                                    uint64_t guest_dma_address)
> > +{
> > +    uint64_t sto_a, pto_a, px_a;
> > +    uint64_t sto, pto, pte;
> > +    uint32_t rtx, sx, px;
> > +
> > +    rtx = calc_rtx(guest_dma_address);
> > +    sx = calc_sx(guest_dma_address);
> > +    px = calc_px(guest_dma_address);
> > +
> > +    sto_a = guest_iota + rtx * sizeof(uint64_t);
> > +    cpu_physical_memory_rw(sto_a, (uint8_t *)&sto, sizeof(uint64_t), 0);
> > +    sto = (uint64_t)get_rt_sto(sto);
> > +
> > +    pto_a = sto + sx * sizeof(uint64_t);
> > +    cpu_physical_memory_rw(pto_a, (uint8_t *)&pto, sizeof(uint64_t), 0);
> > +    pto = (uint64_t)get_st_pto(pto);
> > +
> > +    px_a = pto + px * sizeof(uint64_t);
> > +    cpu_physical_memory_rw(px_a, (uint8_t *)&pte, sizeof(uint64_t), 0);
> > +
> > +    return pte;
> > +}
> > +
> > +int kvm_rpcit_service_call(S390CPU *cpu, struct kvm_run *run)
> > +{
> > +    CPUS390XState *env = &cpu->env;
> > +    uint8_t r1 = (run->s390_sieic.ipb & 0x00f00000) >> 20;
> > +    uint8_t r2 = (run->s390_sieic.ipb & 0x000f0000) >> 16;
> > +    uint32_t fh;
> > +    uint64_t pte;
> > +    S390PCIBusDevice *pbdev;
> > +    ram_addr_t size;
> > +    int flags;
> > +    IOMMUTLBEntry entry;
> > +
> > +    cpu_synchronize_state(CPU(cpu));
> > +
> > +    fh = env->regs[r1] >> 32;
> > +    size = env->regs[r2 + 1];
> 
> Missing two checks again:
> - r2 should be even
> - CPU should not be in problem state
> 
> > +    pbdev = s390_pci_find_dev_by_fh(fh);
> > +
> > +    if (!pbdev) {
> > +        DPRINTF("rpcit no pci dev\n");
> > +        return -EIO;
> 
> I think it would be better to set condition code 3 instead.
> 
> > +    }
> > +
> > +    pte = guest_io_table_walk(pbdev->g_iota, env->regs[r2]);
> > +    flags = pte & ZPCI_PTE_FLAG_MASK;
> > +    entry.target_as = &address_space_memory;
> > +    entry.iova = env->regs[r2];
> > +    entry.translated_addr = pte & ZPCI_PTE_ADDR_MASK;
> > +    entry.addr_mask = size - 1;
> > +
> > +    if (flags & ZPCI_PTE_INVALID) {
> > +        entry.perm = IOMMU_NONE;
> > +    } else {
> > +        entry.perm = IOMMU_RW;
> > +    }
> > +
> > +    memory_region_notify_iommu(pci_device_iommu_address_space(
> > +                               pbdev->pdev)->root, entry);
> > +
> > +    setcc(cpu, 0);
> > +    return 0;
> > +}
> > +
> > +int kvm_sic_service_call(S390CPU *cpu, struct kvm_run *run)
> > +{
> > +    DPRINTF("sic\n");
> > +    return 0;
> > +}
> > +
> > +int kvm_pcistb_service_call(S390CPU *cpu, struct kvm_run *run)
> > +{
> > +    CPUS390XState *env = &cpu->env;
> > +    uint8_t r1 = (run->s390_sieic.ipa & 0x00f0) >> 4;
> > +    uint8_t r3 = run->s390_sieic.ipa & 0x000f;
> > +    PciStb *rp;
> > +    uint64_t gaddr;
> > +    uint64_t *uaddr, *pu;
> > +    hwaddr len;
> > +    S390PCIBusDevice *pbdev;
> > +    MemoryRegion *mr;
> > +    int i;
> > +
> > +    cpu_synchronize_state(CPU(cpu));
> > +
> > +    rp = (PciStb *)&env->regs[r1];
> > +    gaddr = get_base_disp_rsy(cpu, run);
> > +    len = rp->len;
> 
> Not sure, but don't you also have to check the length and offset here
> for valid ranges?
> At least you should check for problem state again.
> 
> > +    pbdev = s390_pci_find_dev_by_fh(rp->fh);
> > +    if (!pbdev) {
> > +        DPRINTF("pcistb no pci dev fh 0x%x\n", rp->fh);
> > +        return -EIO;
> 
> User cc3 instead?
> 
> > +    }
> > +
> > +    uaddr = cpu_physical_memory_map(gaddr, &len, 0);
> > +    mr = pbdev->pdev->io_regions[rp->pcias].memory;
> > +
> > +    pu = uaddr;
> > +    for (i = 0; i < rp->len / 8; i++) {
> > +        io_mem_write(mr, env->regs[r3] + i * 8, *pu, 8);
> > +        pu++;
> > +    }
> > +
> > +    cpu_physical_memory_unmap(uaddr, len, 0, len);
> > +    setcc(cpu, 0);
> > +    return 0;
> > +}
> [...]
> 
>  Thomas
> 
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majordomo@xxxxxxxxxxxxxxx
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html




[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]
  Powered by Linux