On Tue, Nov 29, 2016 at 12:25:48PM +0800, Peter Xu wrote: > DMAR test is based on QEMU edu device. A 4B DMA memory copy is carried > out as the simplest DMAR test. > > Signed-off-by: Peter Xu <peterx@xxxxxxxxxx> > --- > lib/pci.h | 5 ++ > lib/x86/intel-iommu.c | 132 ++++++++++++++++++++++++++++++++++++++++++++++++++ > lib/x86/intel-iommu.h | 23 +++++++++ > x86/Makefile.common | 1 + > x86/intel-iommu.c | 51 +++++++++++++++++++ > 5 files changed, 212 insertions(+) > > diff --git a/lib/pci.h b/lib/pci.h > index d3052ef..26968b1 100644 > --- a/lib/pci.h > +++ b/lib/pci.h > @@ -18,6 +18,9 @@ enum { > #define PCI_BAR_NUM 6 > #define PCI_DEVFN_MAX 256 > > +#define PCI_BDF_GET_DEVFN(x) ((x) & 0xff) > +#define PCI_BDF_GET_BUS(x) (((x) >> 8) & 0xff) > + > struct pci_dev { > uint16_t bdf; > phys_addr_t resource[PCI_BAR_NUM]; > @@ -28,6 +31,8 @@ extern void pci_scan_bars(struct pci_dev *dev); > extern void pci_cmd_set_clr(struct pci_dev *dev, uint16_t set, uint16_t clr); > extern void pci_enable_defaults(struct pci_dev *dev); > > +typedef phys_addr_t iova_t; > + > extern bool pci_probe(void); > extern void pci_print(void); > extern bool pci_dev_exists(pcidevaddr_t dev); > diff --git a/lib/x86/intel-iommu.c b/lib/x86/intel-iommu.c > index 9890f34..48fb2a7 100644 > --- a/lib/x86/intel-iommu.c > +++ b/lib/x86/intel-iommu.c > @@ -11,6 +11,42 @@ > */ > > #include "intel-iommu.h" > +#include "libcflat.h" > + > +/* > + * VT-d in QEMU currently only support 39 bits address width, which is > + * 3-level translation. > + */ > +#define VTD_PAGE_LEVEL 3 > +#define VTD_CE_AW_39BIT 0x1 > + > +typedef uint64_t vtd_pte_t; > + > +struct vtd_root_entry { > + /* Quad 1 */ > + uint64_t present:1; > + uint64_t __reserved:11; > + uint64_t context_table_p:52; > + /* Quad 2 */ > + uint64_t __reserved_2; > +} __attribute__ ((packed)); > +typedef struct vtd_root_entry vtd_re_t; > + > +struct vtd_context_entry { > + /* Quad 1 */ > + uint64_t present:1; > + uint64_t disable_fault_report:1; > + uint64_t trans_type:2; > + uint64_t __reserved:8; > + uint64_t slptptr:52; > + /* Quad 2 */ > + uint64_t addr_width:3; > + uint64_t __ignore:4; > + uint64_t __reserved_2:1; > + uint64_t domain_id:16; > + uint64_t __reserved_3:40; > +} __attribute__ ((packed)); > +typedef struct vtd_context_entry vtd_ce_t; > > #define VTD_RTA_MASK (PAGE_MASK) > #define VTD_IRTA_MASK (PAGE_MASK) > @@ -74,6 +110,102 @@ static void vtd_setup_ir_table(void) > printf("IR table address: 0x%016lx\n", vtd_ir_table()); > } > > +static void vtd_install_pte(vtd_pte_t *root, iova_t iova, > + phys_addr_t pa, int level_target) > +{ > + int level; > + unsigned int offset; > + void *page; > + > + for (level = VTD_PAGE_LEVEL; level > level_target; level--) { > + offset = PGDIR_OFFSET(iova, level); > + if (!(root[offset] & VTD_PTE_RW)) { > + page = alloc_page(); > + memset(page, 0, PAGE_SIZE); > + root[offset] = virt_to_phys(page) | VTD_PTE_RW; > + } > + root = (uint64_t *)(root[offset] & VTD_PTE_ADDR); Physical to virtual translation is missed. Also, PAGE_SHIFT implied instead of VTD_PAGE_SHIFT (see below). > + } > + > + offset = PGDIR_OFFSET(iova, level); > + root[offset] = pa | VTD_PTE_RW; > + if (level != 1) { > + /* This is huge page */ > + root[offset] |= VTD_PTE_HUGE; > + } > +} > + > +#define VTD_FETCH_VIRT_ADDR(x) \ > + ((void *)(((uint64_t)phys_to_virt(x)) >> PAGE_SHIFT)) Just a nit. A fetch somehow implies pulling data, while here we have a translation. What about VTD_PHYS_TO_VIRT? > +/** > + * vtd_map_range: setup IO address mapping for specific memory range > + * > + * @sid: source ID of the device to setup > + * @iova: start IO virtual address > + * @pa: start physical address > + * @size: size of the mapping area > + */ > +void vtd_map_range(uint16_t sid, iova_t iova, phys_addr_t pa, size_t size) > +{ > + uint8_t bus_n, devfn; > + void *slptptr; > + vtd_ce_t *ce; > + vtd_re_t *re = phys_to_virt(vtd_root_table()); > + > + assert(IS_ALIGNED(iova, SZ_4K)); > + assert(IS_ALIGNED(pa, SZ_4K)); > + assert(IS_ALIGNED(size, SZ_4K)); > + > + bus_n = PCI_BDF_GET_BUS(sid); > + devfn = PCI_BDF_GET_DEVFN(sid); > + > + /* Point to the correct root entry */ > + re += bus_n; > + > + if (!re->present) { > + ce = alloc_page(); > + memset(ce, 0, PAGE_SIZE); > + memset(re, 0, sizeof(*re)); > + re->context_table_p = virt_to_phys(ce) >> PAGE_SHIFT; > + re->present = 1; > + printf("allocated vt-d root entry for PCI bus %d\n", > + bus_n); > + } else > + ce = VTD_FETCH_VIRT_ADDR(re->context_table_p); > + > + /* Point to the correct context entry */ > + ce += devfn; > + > + if (!ce->present) { > + slptptr = alloc_page(); > + memset(slptptr, 0, PAGE_SIZE); > + memset(ce, 0, sizeof(*ce)); > + /* To make it simple, domain ID is the same as SID */ > + ce->domain_id = sid; > + /* We only test 39 bits width case (3-level paging) */ > + ce->addr_width = VTD_CE_AW_39BIT; > + ce->slptptr = virt_to_phys(slptptr) >> PAGE_SHIFT; It seems left shift is needed here, not the right one. Also, using PAGE_SHIFT (and possible other memory constants throughout the source) looks wrong to me. Instead it should be VTD_PAGE_SHIFT (and alike) - no matter they are equal to the memory constants at the moment. > + ce->trans_type = VTD_CONTEXT_TT_MULTI_LEVEL; > + ce->present = 1; > + /* No error reporting yet */ > + ce->disable_fault_report = 1; > + printf("allocated vt-d context entry for devfn 0x%x\n", > + devfn); > + } else > + slptptr = VTD_FETCH_VIRT_ADDR(ce->slptptr); > + > + while (size) { > + /* TODO: currently we only map 4K pages (level = 1) */ > + printf("map 4K page IOVA 0x%lx to 0x%lx (sid=0x%04x)\n", > + iova, pa, sid); > + vtd_install_pte(slptptr, iova, pa, 1); > + size -= PAGE_SIZE; > + iova += PAGE_SIZE; > + pa += PAGE_SIZE; > + } > +} > + > void vtd_init(void) > { > setup_vm(); > diff --git a/lib/x86/intel-iommu.h b/lib/x86/intel-iommu.h > index fae9ae5..1352639 100644 > --- a/lib/x86/intel-iommu.h > +++ b/lib/x86/intel-iommu.h > @@ -20,6 +20,7 @@ > #include "isr.h" > #include "smp.h" > #include "desc.h" > +#include "pci.h" > #include "asm/io.h" > > #define Q35_HOST_BRIDGE_IOMMU_ADDR 0xfed90000ULL > @@ -91,6 +92,27 @@ > #define VTD_GCMD_ONE_SHOT_BITS (VTD_GCMD_IR_TABLE | VTD_GCMD_WBF | \ > VTD_GCMD_SFL | VTD_GCMD_ROOT) > > +/* Supported Adjusted Guest Address Widths */ > +#define VTD_CAP_SAGAW_SHIFT 8 > +/* 39-bit AGAW, 3-level page-table */ > +#define VTD_CAP_SAGAW_39bit (0x2ULL << VTD_CAP_SAGAW_SHIFT) > +/* 48-bit AGAW, 4-level page-table */ > +#define VTD_CAP_SAGAW_48bit (0x4ULL << VTD_CAP_SAGAW_SHIFT) > +#define VTD_CAP_SAGAW VTD_CAP_SAGAW_39bit > + > +/* Both 1G/2M huge pages */ > +#define VTD_CAP_SLLPS ((1ULL << 34) | (1ULL << 35)) > + > +#define VTD_CONTEXT_TT_MULTI_LEVEL 0 > +#define VTD_CONTEXT_TT_DEV_IOTLB 1 > +#define VTD_CONTEXT_TT_PASS_THROUGH 2 > + > +#define VTD_PTE_R (1 << 0) > +#define VTD_PTE_W (1 << 1) > +#define VTD_PTE_RW (VTD_PTE_R | VTD_PTE_W) > +#define VTD_PTE_ADDR GENMASK_ULL(51, 12) Are we safe to include 51:HAW lines? They are marked as Reserved and we might write 1s to these bits occasionally. I do not really believe it could cause trouble ever - just need some clarity here. > +#define VTD_PTE_HUGE (1 << 7) > + > #define vtd_reg(reg) ((volatile void *)(Q35_HOST_BRIDGE_IOMMU_ADDR + reg)) > > static inline void vtd_writel(unsigned int reg, uint32_t value) > @@ -114,5 +136,6 @@ static inline uint64_t vtd_readq(unsigned int reg) > } > > void vtd_init(void); > +void vtd_map_range(uint16_t sid, phys_addr_t iova, phys_addr_t pa, size_t size); > > #endif > diff --git a/x86/Makefile.common b/x86/Makefile.common > index 356d879..1dad18b 100644 > --- a/x86/Makefile.common > +++ b/x86/Makefile.common > @@ -3,6 +3,7 @@ > all: test_cases > > cflatobjs += lib/pci.o > +cflatobjs += lib/pci-edu.o > cflatobjs += lib/x86/io.o > cflatobjs += lib/x86/smp.o > cflatobjs += lib/x86/vm.o > diff --git a/x86/intel-iommu.c b/x86/intel-iommu.c > index f247913..21fd57f 100644 > --- a/x86/intel-iommu.c > +++ b/x86/intel-iommu.c > @@ -11,9 +11,50 @@ > */ > > #include "intel-iommu.h" > +#include "pci-edu.h" > + > +#define VTD_TEST_DMAR_4B ("DMAR 4B memcpy test") > + > +void vtd_test_dmar(struct pci_edu_dev *dev) > +{ > + void *page = alloc_page(); > + > +#define DMA_TEST_WORD (0x12345678) > + /* Modify the first 4 bytes of the page */ > + *(uint32_t *)page = DMA_TEST_WORD; > + > + /* > + * Map the newly allocated page into IOVA address 0 (size 4K) > + * of the device address space. Root entry and context entry > + * will be automatically created when needed. > + */ > + vtd_map_range(dev->pci_dev.bdf, 0, virt_to_phys(page), PAGE_SIZE); > + > + /* > + * DMA the first 4 bytes of the page to EDU device buffer > + * offset 0. > + */ > + edu_dma(dev, 0, 4, 0, false); > + > + /* > + * DMA the first 4 bytes of EDU device buffer into the page > + * with offset 4 (so it'll be using 4-7 bytes). > + */ > + edu_dma(dev, 4, 4, 0, true); > + > + /* > + * Check data match between 0-3 bytes and 4-7 bytes of the > + * page. > + */ > + report(VTD_TEST_DMAR_4B, *((uint32_t *)page + 1) == DMA_TEST_WORD); > + > + free_page(page); > +} > > int main(int argc, char *argv[]) > { > + struct pci_edu_dev dev; > + > vtd_init(); > > report("fault status check", vtd_readl(DMAR_FSTS_REG) == 0); > @@ -22,6 +63,16 @@ int main(int argc, char *argv[]) > report("IR table setup", vtd_readl(DMAR_GSTS_REG) & VTD_GCMD_IR_TABLE); > report("DMAR enablement", vtd_readl(DMAR_GSTS_REG) & VTD_GCMD_DMAR); > report("IR enablement", vtd_readl(DMAR_GSTS_REG) & VTD_GCMD_IR); > + report("DMAR support 39 bits address width", > + vtd_readq(DMAR_CAP_REG) & VTD_CAP_SAGAW); > + report("DMAR support huge pages", vtd_readq(DMAR_CAP_REG) & VTD_CAP_SLLPS); > + > + if (!edu_init(&dev)) { > + printf("Please specify \"-device edu\" to do " > + "further IOMMU tests.\n"); > + report_skip(VTD_TEST_DMAR_4B); > + } else > + vtd_test_dmar(&dev); > > return report_summary(); > } > -- > 2.7.4 > -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html