On Fri, Sep 04, 2015 at 09:59:00AM -0700, Jesse Barnes wrote: > New file with VT-d SVM and PASID handling functions and page table > management. This belongs in the IOMMU code (along with some extra bits > for waiting for invalidations and page faults to complete, flushing the > device IOTLB, etc.) > > FIXME: > need work queue for re-submitting contexts > TE bit handling on SKL > --- > drivers/gpu/drm/i915/Makefile | 5 +- > drivers/gpu/drm/i915/i915_drv.h | 43 ++ > drivers/gpu/drm/i915/i915_gem.c | 3 + > drivers/gpu/drm/i915/i915_gem_context.c | 3 + > drivers/gpu/drm/i915/i915_irq.c | 7 + > drivers/gpu/drm/i915/i915_reg.h | 47 ++ > drivers/gpu/drm/i915/i915_svm.c | 1102 +++++++++++++++++++++++++++++++ > drivers/gpu/drm/i915/intel_lrc.c | 120 +++- > drivers/gpu/drm/i915/intel_lrc.h | 1 + > 9 files changed, 1299 insertions(+), 32 deletions(-) > create mode 100644 drivers/gpu/drm/i915/i915_svm.c > > diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile > index 44d290a..e4883a7 100644 > --- a/drivers/gpu/drm/i915/Makefile > +++ b/drivers/gpu/drm/i915/Makefile > @@ -38,7 +38,8 @@ i915-y += i915_cmd_parser.o \ > intel_lrc.o \ > intel_mocs.o \ > intel_ringbuffer.o \ > - intel_uncore.o > + intel_uncore.o \ > + i915_svm.o Correct me if I am wrong, but it looks like i915_svm implements the lowlevel interface with the hardware, so by convention is intel_svm.c > # general-purpose microcontroller (GuC) support > i915-y += intel_guc_loader.o \ > @@ -93,6 +94,8 @@ i915-y += dvo_ch7017.o \ > # virtual gpu code > i915-y += i915_vgpu.o > > +i915-$(CONFIG_MMU_NOTIFIER) += i915_svm.o Added twice? > + > # legacy horrors > i915-y += i915_dma.o > > diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h > index 20beb51..ca38a7a 100644 > --- a/drivers/gpu/drm/i915/i915_drv.h > +++ b/drivers/gpu/drm/i915/i915_drv.h > @@ -47,6 +47,7 @@ > #include <drm/drm_gem.h> > #include <linux/backlight.h> > #include <linux/hashtable.h> > +#include <linux/mmu_notifier.h> > #include <linux/intel-iommu.h> > #include <linux/kref.h> > #include <linux/pm_qos.h> > @@ -848,6 +849,13 @@ struct i915_ctx_hang_stats { > bool banned; > }; > > +struct intel_mm_struct { > + struct kref kref; > + struct mmu_notifier notifier; > + struct drm_i915_private *dev_priv; > + struct list_head context_list; > +}; Doesn't this look kind of familiar? struct i915_mm_struct perhaps? > + > /* This must match up with the value previously used for execbuf2.rsvd1. */ > #define DEFAULT_CONTEXT_HANDLE 0 > > @@ -874,6 +882,9 @@ struct i915_ctx_hang_stats { > struct intel_context { > struct kref ref; > int user_handle; > + bool is_svm; /* shares x86 page tables */ > + u32 pasid; /* 20 bits */ > + struct intel_mm_struct *ims; > uint8_t remap_slice; > struct drm_i915_private *i915; > int flags; > @@ -895,6 +906,9 @@ struct intel_context { > int pin_count; > } engine[I915_NUM_RINGS]; > > + struct list_head mm_list; This is a link, name it so. > + struct task_struct *tsk; One task? A context can be passed by the device fd to another process. Do we inherit the VM along with the context? I don't anything to prevent such. > +static void gpu_mm_segv(struct task_struct *tsk, unsigned long address, > + int si_code) > +{ > + siginfo_t info; > + > + /* Need specific signal info here */ > + info.si_signo = SIGSEGV; > + info.si_errno = EIO; > + info.si_code = si_code; > + info.si_addr = (void __user *)address; > + > + force_sig_info(SIGSEGV, &info, tsk); force_sig_info() is not exported, ah you builtin i915-svm.c > +} > + > +/* > + * Read the fault descriptor and handle the fault: > + * get PML4 from PASID > + * get mm struct > + * get the vma > + * verify the address is valid > + * call handle_mm_fault after taking the mm->mmap_sem > + */ > +void intel_gpu_fault_work(struct work_struct *work) > +{ > + struct i915_svm_state *svm = container_of(work, struct i915_svm_state, > + work); > + struct drm_i915_private *dev_priv = > + container_of(svm, struct drm_i915_private, svm); > + struct drm_device *dev = dev_priv->dev; > + struct intel_ringbuffer *ringbuf; > + struct page_request_dsc desc; > + struct page_group_response_dsc resp; > + struct intel_context *ctx; > + struct task_struct *tsk; > + struct mm_struct *mm; > + struct vm_area_struct *vma; > + u64 address; > + int ret; > + > + DRM_ERROR("PRQ updated, head 0x%08x, tail 0x%08x\n", > + I915_READ(SVM_PRQ_HEAD), I915_READ(SVM_PRQ_TAIL)); > + prq_read_descriptor(dev, &desc); > + DRM_ERROR("page fault on addr 0x%016llx, PASID %d, srr %d\n", > + (u64)(desc.addr << PAGE_SHIFT), desc.pasid, desc.srr); > + > + spin_lock(&dev_priv->svm.lock); > + ctx = dev_priv->svm.pasid_ctx[desc.pasid]; > + tsk = ctx->tsk; > + mm = tsk->mm; > + address = desc.addr << PAGE_SHIFT; > + ringbuf = ctx->engine[RCS].ringbuf; > + spin_unlock(&dev_priv->svm.lock); All of the above can disappear at anytime after the unlock? > + > + down_read_trylock(&mm->mmap_sem); > + vma = find_extend_vma(mm, address); > + if (!vma || address < vma->vm_start) { > + DRM_ERROR("bad VMA or address out of range\n"); > + gpu_mm_segv(tsk, address, SEGV_MAPERR); > + goto out_unlock; /* need to kill process */ > + } > + > + ret = handle_mm_fault(mm, vma, address, > + desc.wr_req ? FAULT_FLAG_WRITE : 0); > + if (ret & VM_FAULT_ERROR) { > + gpu_mm_segv(tsk, address, SEGV_ACCERR); /* ? */ > + goto out_unlock; > + } > + > + if (ret & VM_FAULT_MAJOR) > + tsk->maj_flt++; > + else > + tsk->min_flt++; > + > + if (desc.srr) > + resp.dsc_type = PAGE_STREAM_RESP_DSC; > + else > + resp.dsc_type = PAGE_GRP_RESP_DSC; > + resp.pasid = desc.pasid; > + resp.pasid_present = 1; > + resp.requestor_id = PCI_DEVID(0, PCI_DEVFN(2,0)); > + resp.resp_code = RESP_CODE_SUCCESS; > + resp.prg_index = desc.prg_index; > + resp.private = desc.private; > + ivq_write_resp_descriptor(dev, &resp); > +out_unlock: > + up_read(&mm->mmap_sem); > + > + /* FIXME: wait for page response to be serviced */ > + > + /* FIXME: queue context for re-submit */ > + /* execlists_context_queue(req); */ > +} > +/* Make sure GPU writes can't hit the mm that's about to go away */ > +static void intel_mm_release(struct mmu_notifier *mn, struct mm_struct *mm) > +{ > + struct intel_mm_struct *ims = container_of(mn, struct intel_mm_struct, > + notifier); > + struct drm_i915_private *dev_priv = ims->dev_priv; > + struct drm_device *dev = dev_priv->dev; > + struct intel_context *ctx; > + > + /* > + * Wait for any outstanding activity and unbind the mm. Since > + * each context has its own ring, we can simply wait for the ring > + * to idle before invalidating the PASID and flushing the TLB. > + */ > + mutex_lock(&dev->struct_mutex); > + list_for_each_entry(ctx, &ims->context_list, mm_list) { > + intel_ring_idle(ctx->engine[RCS].ringbuf->ring); > + } > + > + intel_iommu_tlb_flush(dev_priv->dev); > + mutex_unlock(&dev->struct_mutex); Erm, what! So you halt the GPU everytime? But you've already invalidated the shadow PTE -- ah, invalidate-range looks to be a wip. > +static void intel_flush_page_locked(struct drm_device *dev, int pasid, > + unsigned long address) > +{ > + struct ext_iotlb_inv_dsc dsc = { 0 }; > + > + dsc.dsc_type = EXT_IOTLB_INV_DSC; > + dsc.g = EXT_IOTLB_INV_G_PASID_PAGE_SELECT; > + dsc.pasid = pasid; > + dsc.ih = 0; > + dsc.addr = address; > + dsc.am = 1; > + ivq_write_ext_iotlb_inv_descriptor(dev, &dsc); > +} > + > +static void intel_change_pte(struct mmu_notifier *mn, struct mm_struct *mm, > + unsigned long address, pte_t pte) > +{ > + struct intel_mm_struct *ims = container_of(mn, struct intel_mm_struct, > + notifier); > + struct drm_i915_private *dev_priv = ims->dev_priv; > + struct drm_device *dev = dev_priv->dev; > + > + struct intel_context *ctx; > + > + mutex_lock(&dev->struct_mutex); > + list_for_each_entry(ctx, &ims->context_list, mm_list) > + intel_flush_page_locked(dev, ctx->pasid, address); > + mutex_unlock(&dev->struct_mutex); Suggests you really want a ims->spinlock for context_list instead. > +} > + > +static void intel_invalidate_page(struct mmu_notifier *mn, > + struct mm_struct *mm, > + unsigned long address) > +{ > + struct intel_mm_struct *ims = container_of(mn, struct intel_mm_struct, > + notifier); > + struct drm_i915_private *dev_priv = ims->dev_priv; > + struct drm_device *dev = dev_priv->dev; > + struct intel_context *ctx; > + > + mutex_lock(&dev->struct_mutex); > + list_for_each_entry(ctx, &ims->context_list, mm_list) > + intel_flush_page_locked(dev, ctx->pasid, address); > + mutex_unlock(&dev->struct_mutex); > +} > + > +/* Need to unmap this range and make sure it doesn't get re-faulted */ > +static void intel_invalidate_range_start(struct mmu_notifier *mn, > + struct mm_struct *mm, > + unsigned long start, unsigned long end) > +{ > + struct intel_mm_struct *ims = container_of(mn, struct intel_mm_struct, > + notifier); > + struct drm_i915_private *dev_priv = ims->dev_priv; > + struct drm_device *dev = dev_priv->dev; > + > + /* FIXME: invalidate page only */ > + intel_iommu_tlb_flush(dev); > +} > + > +/* Pages have been freed at this point */ > +static void intel_invalidate_range_end(struct mmu_notifier *mn, > + struct mm_struct *mm, > + unsigned long start, unsigned long end) > +{ > + struct intel_mm_struct *ims = container_of(mn, struct intel_mm_struct, > + notifier); > + struct drm_i915_private *dev_priv = ims->dev_priv; > + struct drm_device *dev = dev_priv->dev; > + > + /* FIXME: invalidate page only */ > + intel_iommu_tlb_flush(dev); > +} > + > +static const struct mmu_notifier_ops intel_mmuops = { > + .release = intel_mm_release, > + /* no clear_flush_young, we just share the x86 bits */ > + /* no test_young, we just share the x86 bits */ > + .change_pte = intel_change_pte, > + .invalidate_page = intel_invalidate_page, > + .invalidate_range_start = intel_invalidate_range_start, > + .invalidate_range_end = intel_invalidate_range_end, > +}; > + > +struct intel_mm_struct *intel_bind_mm(struct drm_device *dev, > + struct intel_context *ctx) > +{ > + struct drm_i915_private *dev_priv = dev->dev_private; > + struct intel_mm_struct *ims; > + struct mmu_notifier *mn; > + int ret; > + > + WARN_ON(!mutex_is_locked(&dev_priv->dev->struct_mutex)); > + > + mn = mmu_find_ops(current->mm, &intel_mmuops); Magic function, I am missing its definition > + if (mn) { > + ims = container_of(mn, struct intel_mm_struct, notifier); > + kref_get(&ims->kref); > + goto out; > + } > + > + ims = kzalloc(sizeof(*ims), GFP_KERNEL); > + if (!ims) { > + ret = -ENOMEM; > + goto error; > + } > + INIT_LIST_HEAD(&ims->context_list); > + > + ims->notifier.ops = &intel_mmuops; > + > + ret = mmu_notifier_register(&ims->notifier, current->mm); This has lock inversion between struct_mutex and mm->mmap_sem. > + if (ret) > + goto error; > + > + ims->dev_priv = dev->dev_private; > + > +out: > + list_add(&ctx->mm_list, &ims->context_list); > + return ims; > +error: > + kfree(ims); > + return ERR_PTR(ret); > +} > + > +static void intel_mm_free(struct kref *ims_ref) > +{ > + struct intel_mm_struct *ims = > + container_of(ims_ref, struct intel_mm_struct, kref); > + > + mmu_notifier_unregister(&ims->notifier, current->mm); More lock inversion. > + kfree(ims); > +} > + > +void intel_unbind_mm(struct intel_context *ctx) > +{ > + struct drm_i915_private *dev_priv = ctx->ims->dev_priv; > + > + WARN_ON(!mutex_is_locked(&dev_priv->dev->struct_mutex)); > + > + list_del(&ctx->mm_list); > + kref_put(&ctx->ims->kref, intel_mm_free); > + > + return; > +} > + > +int intel_exec_mm_ioctl(struct drm_device *dev, void *data, > + struct drm_file *file) > +{ > +// struct drm_i915_exec_mm *exec_mm = data; > +// struct drm_i915_private *dev_priv = dev->dev_private; > + > + /* Load new context into context reg */ Ah, there is a modicum of user API here. > + return 0; > +} > + > +/* > + * The PASID table has 32 entries in the current config, rotate through > + * them as needed. > + */ > +int intel_alloc_pasid(struct drm_device *dev, struct intel_context *ctx) > +{ > + struct drm_i915_private *dev_priv = dev->dev_private; > + struct pasid_table_entry *table; > + int i; > + > + WARN_ON(!mutex_is_locked(&dev_priv->dev->struct_mutex)); > + > + spin_lock(&dev_priv->svm.lock); > + table = dev_priv->svm.pasid_table; > + > + for (i = 0; i < PASID_COUNT; i++) { > + if (!table[i].present) > + goto found; > + } > + > + spin_unlock(&dev_priv->svm.lock); > + return -1; > + > +found: > + table[i].pml4 = __pa(current->mm->pgd) >> PAGE_SHIFT; > + table[i].present = 1; > + > + ctx->pasid = i; > + dev_priv->svm.pasid_ctx[ctx->pasid] = NULL; > + spin_unlock(&dev_priv->svm.lock); > + > + intel_iommu_tlb_flush(dev); > + > + return 0; > +} > + > +void intel_free_pasid(struct drm_device *dev, struct intel_context *ctx) > +{ > + struct drm_i915_private *dev_priv = dev->dev_private; > + struct pasid_table_entry *table; > + > + WARN_ON(!mutex_is_locked(&dev_priv->dev->struct_mutex)); > + > + if (ctx->pasid >= PASID_COUNT) > + return; > + > + spin_lock(&dev_priv->svm.lock); > + table = dev_priv->svm.pasid_table; > + memset(&table[ctx->pasid], 0, sizeof(struct pasid_table_entry)); > + dev_priv->svm.pasid_ctx[ctx->pasid] = NULL; > + ctx->pasid = -1; > + spin_unlock(&dev_priv->svm.lock); > + > + intel_iommu_tlb_flush(dev); > +} > + > +/* > + * Each root table entry is 16 bytes wide. In legacy mode, only > + * the lower 64 bits are used: > + * Bits 38:12: context table pointer > + * Bit 0: present > + * all other bits reserved > + * In extended mode (what we use for SVM): > + * Bits 102:76: upper context table pointer > + * Bit 64: upper present > + * Bits 38:12: lower context table pointer > + * Bit 0: lower present > + * all other bits reserved > + * > + * The context entries are 128 bit in legacy mode: > + * Bits 87:72: Domain ID > + * Bits 70:67: Available > + * Bits 66:64: Address width > + * Bits 38:12: Page table pointer > + * Bits 3:2: Translation type > + * 00 - only untranslated DMA requests go through this table > + * translated and translation requests are blocked > + * 01 - untranslated, translated, and translation requests supported > + * 10 - untranslated requests are treated as pass through (HPA == GPA), > + * translated DMA requests and translation requests are blocked > + * 11 - reserved > + * Bit 1: fault disable > + * Bit 0: Present > + * and 256 bit in extended: > + * Bits 230:204: PASID state table pointer > + * Bits 166:140: PASID table pointer > + * Bits 131:128: PASID table size > + * Bits 127:96: Page table attribute (PAT) > + * Bit 92: SL64KPE > + * Bit 91: SLEE > + * Bit 90: ERE > + * Bit 89: SRE > + * Bit 88: SMEP > + * Bits 87:72: Domain ID > + * Bit 71: Extended memory type enable > + * Bit 70: cache disable (CD) > + * Bit 69: write protect (WP) > + * Bit 68: no execute enable (NXE) > + * Bit 67: page global enable (PGE) > + * Bits 66:64: address width > + * Bits 38:12: 2nd level (VT-d) page table pointer > + * Bit 11: PASID enable > + * Bit 10: Nesting enable > + * Bit 9: Page Request enable > + * Bit 8: Lazy-Invalidate enable > + * Bits 7:5: Extended Memory Type (VT-d) > + * Bits 4:2: Translation type > + * 000 - Only Untranslated DMA requests are translated through this page > + * table. Translated DMA requests and Translation Requests are > + * blocked. Untranslated requests-without-PASID are remapped using > + * the second-level page-table referenced through SLPTPTR field. > + * If PASIDE field is Set, Untranslated requests-with-PASID are > + * remapped using the PASID Table referenced through PASIDPTPTR > + * field. If PASIDE field is Clear, Untranslated requests-with-PASID > + * are blocked. Translation requests (with or without PASID), and > + * Translated Requests are blocked. > + * 001 - Un-translated and Translation requests without PASID supported > + * (and with PASID supported, if PASID Enable Set); Translate > + * requests bypass address translation. Untranslated > + * requests-without-PASID and Translation requests-without-PASID are > + * remapped using the second level page-table referenced through > + * SLPTPTR field. If PASIDE field is Set, Untranslated > + * requests-with-PASID and Translation requests-with-PASID are > + * remapped using the PASID Table referenced through PASIDPTPTR > + * field. If PASIDE field is Clear, Untranslated requests-with-PASID, > + * and Translation requests-with-PASID, are blocked. Translated > + * requests bypass address translation. > + * 010 - If Pass-through Supported (GT supports pass-through), > + * Un-translated requests without PASID bypass address translation; > + * All other requests (with or without PASID) blocked. Untranslated > + * requests-without-PASID bypass address translation and are > + * processed as passthrough. SLPTPTR field is ignored by hardware. > + * Untranslated requests-with-PASID, Translation requests (with or > + * without PASID), and Translated requests are blocked. > + * 011 - Reserved. > + * 100 - Un-translated requests without PASID bypass address translation; > + * Un-translated requests with PASID supported, if PASID Enable Set; > + * All other requests blocked. Untranslated requests-without-PASID > + * bypass address translation and are processed as passthrough. > + * SLPTPTR field is ignored by hardware. Untranslated > + * requests-with-PASID are remapped using the PASID Table referenced > + * through PASIDPTPTR field. Translation requests (with or without > + * PASID) and Translated requests are blocked. > + * 101 - Un-translated and Translation requests without PASID bypass > + * address translation; Un-translated and Translation requests with > + * PASID supported, if PASID Enable Set; Translated requests bypass > + * address translation. Untranslated requests-without-PASID bypass > + * address translation and are processed as passthrough. SLPTPTR > + * field is ignored by hardware. Translation requests-without-PASID > + * are responded with Untranslated access only bit Set (U=1) along > + * with read and write permissions (R=W=1). SLPTPTR field is ignored > + * by hardware. Untranslated requests-with-PASID, and Translation > + * requests-with-PASID are remapped using the PASID Table referenced > + * through PASIDPTPTR field. Translated requests bypass address > + * translation. > + * 110 - Un-translated requests without PASID are blocked; Un-translated > + * requests with PASID supported, if PASID Enable Set; All other > + * requests blocked – Not applicable to GFX, GT should treat this as > + * reserved. > + * 111 - Un-translated and Translation requests without PASID blocked; > + * Un-translated and Translation requests with PASID supported, if > + * PASID Enable Set; Translated requests bypass address translation. > + * Note: Not applicable to GFX, GT should treat this as reserved. > + * Bit 1: Fault disable > + * Bit 0: Present > + * > + * Page walks for graphics addresses can go through one or two levels of > + * translation, depending on whether VT-d is enabled. > + * > + * If we're in driver mode (currently the only supported mode), we always > + * use a single level of translation, meaning the second level page table > + * pointer (if present) is ignored. > + * > + * The full walk starts at the root table, which indexes into the upper > + * and lower context tables. Those tables point to PASID mapping and state > + * tables and potentially a second level page table for VT-d (which, as noted > + * above, is unused currently). The PASID mapping table points to a PML4 > + * (x86 compatible) page table, while the state table indicates other > + * information about the PASID involved in the request, which ultimately comes > + * from the execlist port submission of the context descriptor. > + * > + * To enable a shared CPU/GPU address space, we can use a couple of different > + * translation types, either 101 or 01 w/o nesting. The main requirement > + * is that requests with PASID are translated through the page tables provided, > + * potentially with nesting if we're running in a VT-d context (which we > + * don't currently support). > + */ > +#define CONTEXT_OFFSET (PAGE_SIZE * 1) > +#define PASID_OFFSET (PAGE_SIZE * 2) > +#define PASID_STATE_OFFSET (PAGE_SIZE * 3) > +#define PRQ_OFFSET (PAGE_SIZE * 4) > +#define IVQ_OFFSET (PAGE_SIZE * 5) > +static void intel_init_svm_root_table(struct drm_device *dev, > + drm_dma_handle_t *tables) > +{ > + struct drm_i915_private *dev_priv = dev->dev_private; > + struct extended_root_table_entry *root_table; > + struct extended_context_table_entry *context; > + struct pasid_table_entry *pasid_table; > + struct pasid_state_table_entry *pasid_state_table; > + u64 *tmp; > + > + root_table = tables->vaddr; > + context = tables->vaddr + CONTEXT_OFFSET; > + pasid_table = tables->vaddr + PASID_OFFSET; > + pasid_state_table = tables->vaddr + PASID_STATE_OFFSET; > + > + DRM_ERROR("programmed PASID table, vaddr %p, busaddr 0x%16llx\n", > + pasid_table, tables->busaddr + PASID_OFFSET); > + > + /* Context entry for gfx device */ > + context[16].pat = 0x66666666; > + context[16].ere = 1; > + context[16].sre = 1; > + context[16].smep = 1; > + context[16].domain_id = 1; > + context[16].addr_width = AGAW_48; /* full x86 walk */ > + context[16].pasid_en = 1; > + context[16].nesting_en = 0; /* not yet */ > + context[16].pg_req_en = 1; > + context[16].lazy_invalidate_en = 1; > + context[16].ext_mem_type = EXTENDED_MTYPE_WB; > + context[16].translation_type = EXTENDED_TTYPE_UT_TR_PASID_PT; > + context[16].fault_disable = 0; > + context[16].present = 1; > + context[16].pasid_state_table_addr = (tables->busaddr + PASID_STATE_OFFSET) >> PAGE_SHIFT; > + context[16].pasid_table_addr = (tables->busaddr + PASID_OFFSET) >> > + PAGE_SHIFT; > + context[16].pasid_table_size = 0; /* 2^(5+x) */ > + > + tmp = (u64 *)&context[16]; > + DRM_ERROR("root entry: 0x%016llx%016llx\n", tmp[1], tmp[0]); > + > + DRM_ERROR("programmed context table, vaddr %p, busaddr 0x%16llx\n", > + context, tables->busaddr + CONTEXT_OFFSET); > + > + /* Root table */ > + root_table[0].lo_ctx_addr = (tables->busaddr + CONTEXT_OFFSET) >> > + PAGE_SHIFT; > + root_table[0].lo_present = 1; > + root_table[0].hi_present = 0; > + > + tmp = (u64 *)&root_table[0]; > + DRM_ERROR("root entry: 0x%016llx%016llx\n", tmp[1], tmp[0]); > + > + dev_priv->svm.root_table = root_table; > + dev_priv->svm.context = context; > + dev_priv->svm.pasid_table = pasid_table; > + dev_priv->svm.pasid_state_table = pasid_state_table; > + dev_priv->svm.prq_ring = tables->vaddr + PRQ_OFFSET; > + dev_priv->svm.ivq_ring = tables->vaddr + IVQ_OFFSET; > + > + /* Enable the page request queue */ > + I915_WRITE64(SVM_PRQA, tables->busaddr + PRQ_OFFSET); > + I915_WRITE(SVM_PRQ_HEAD, 0); > + I915_WRITE(SVM_PRQ_TAIL, 0); > + I915_WRITE(SVM_PRECTL, 0); > + > + /* Set up the invalidation request queue */ > + I915_WRITE64(SVM_IQA, tables->busaddr + IVQ_OFFSET); > + I915_WRITE(SVM_IVQ_HEAD, 0); > + I915_WRITE(SVM_IVQ_TAIL, 0); > + I915_WRITE(SVM_IECTL, 0); > + > + I915_WRITE(SVM_GCMD, GCMD_QIE); > + if (wait_for(I915_READ(SVM_GSTS) & GSTS_QIES, 500)) > + DRM_ERROR("timed out waiting for queued invalidation enable\n"); > + > + /* All set, program the root */ > + I915_WRITE(SVM_RTADDR, tables->busaddr | SVM_RTT_TYPE_EXT); > + > + I915_WRITE(SVM_GCMD, GCMD_SRTP); > + if (wait_for(I915_READ(SVM_GSTS) & GSTS_RTPS, 500)) > + DRM_ERROR("timed out waiting for root table to load\n"); > + > + DRM_ERROR("programmed SVM root, vaddr %p, busaddr 0x%16llx\n", > + tables->vaddr, tables->busaddr); > + > + intel_iommu_tlb_flush(dev); > +} > + > +/* > + * Probe for SVM capability. If found: > + * - try to switch to driver mode > + * - set up root PASID table > + * - enable page fault and error handling interrupts > + * - allow SVM ioctls > + */ > +void intel_init_svm(struct drm_device *dev) > +{ > + struct drm_i915_private *dev_priv = dev->dev_private; > + drm_dma_handle_t *tables; > + u32 dev_mode; > + int num_tables = 6; > + > + dev_mode = I915_READ(BDW_SVM_DEV_MODE_CNFG); > + I915_WRITE(BDW_SVM_DEV_MODE_CNFG, dev_mode | BDW_SVM_MODE_DRIVER); > + dev_mode = I915_READ(BDW_SVM_DEV_MODE_CNFG); > +#if defined(CONFIG_INTEL_IOMMU) || defined(IOMMU_SUPPORT) > +#error must disable IOMMU support > +#endif > + if (!dev_mode & BDW_SVM_MODE_DRIVER) { > + DRM_ERROR("driver mode not available, disabling SVM\n"); > + goto err; > + } > + > + tables = drm_pci_alloc(dev, PAGE_SIZE*num_tables, PAGE_SIZE); > + if (!tables) { > + DRM_ERROR("table alloc failed, disabling SVM\n"); > + goto err; > + } > + > + memset(tables->vaddr, 0, PAGE_SIZE*num_tables); > + > + intel_init_svm_root_table(dev, tables); > + > + spin_lock_init(&dev_priv->svm.lock); > + > +#if 0 > + I915_WRITE(SVM_GCMD, GCMD_TE); > + if (wait_for(I915_READ(SVM_GSTS) & GSTS_TES, 500)) > + DRM_ERROR("timed out waiting for translation enable\n"); > +#endif > + INIT_WORK(&dev_priv->svm.work, intel_gpu_fault_work); > + > + DRM_ERROR("SVM driver mode enabled\n"); > + dev_priv->svm.svm_available = true; > + return; > + > +err: > + dev_priv->svm.svm_available = false; > + return; > +} > diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c > index 40cbba4..1450491 100644 > --- a/drivers/gpu/drm/i915/intel_lrc.c > +++ b/drivers/gpu/drm/i915/intel_lrc.c > @@ -217,6 +217,7 @@ enum { > FAULT_AND_STREAM, > FAULT_AND_CONTINUE /* Unsupported */ > }; > +#define GEN8_CTX_FAULT_SHIFT 6 > #define GEN8_CTX_ID_SHIFT 32 > #define CTX_RCS_INDIRECT_CTX_OFFSET_DEFAULT 0x17 > > @@ -289,12 +290,21 @@ uint64_t intel_lr_context_descriptor(struct intel_context *ctx, > WARN_ON(lrca & 0xFFFFFFFF00000FFFULL); > > desc = GEN8_CTX_VALID; > - desc |= GEN8_CTX_ADDRESSING_MODE(dev) << GEN8_CTX_ADDRESSING_MODE_SHIFT; > - if (IS_GEN8(ctx_obj->base.dev)) > - desc |= GEN8_CTX_L3LLC_COHERENT; > - desc |= GEN8_CTX_PRIVILEGE; > - desc |= lrca; > - desc |= (u64)intel_execlists_ctx_id(ctx_obj) << GEN8_CTX_ID_SHIFT; > + if (ctx->is_svm) { > + desc |= ADVANCED_CONTEXT << GEN8_CTX_ADDRESSING_MODE_SHIFT; > + desc |= FAULT_AND_STREAM << GEN8_CTX_FAULT_SHIFT; > + desc |= lrca; > + desc |= (u64)intel_execlists_ctx_id(ctx_obj) << GEN8_CTX_ID_SHIFT; > + } else { > + desc |= GEN8_CTX_ADDRESSING_MODE(dev) << > + GEN8_CTX_ADDRESSING_MODE_SHIFT; > + if (IS_GEN8(ctx_obj->base.dev)) > + desc |= GEN8_CTX_L3LLC_COHERENT; > + desc |= GEN8_CTX_PRIVILEGE; > + desc |= lrca; > + desc |= (u64)intel_execlists_ctx_id(ctx_obj) << > + GEN8_CTX_ID_SHIFT; > + } > > /* TODO: WaDisableLiteRestore when we start using semaphore > * signalling between Command Streamers */ > @@ -545,7 +555,7 @@ void intel_lrc_irq_handler(struct intel_engine_cs *ring) > _MASKED_FIELD(0x07 << 8, ((u32)ring->next_context_status_buffer & 0x07) << 8)); > } > > -static int execlists_context_queue(struct drm_i915_gem_request *request) > +int execlists_context_queue(struct drm_i915_gem_request *request) > { > struct intel_engine_cs *ring = request->ring; > struct drm_i915_gem_request *cursor; > @@ -2273,31 +2283,40 @@ populate_lr_context(struct intel_context *ctx, struct drm_i915_gem_object *ctx_o > reg_state[CTX_LRI_HEADER_1] |= MI_LRI_FORCE_POSTED; > reg_state[CTX_CTX_TIMESTAMP] = ring->mmio_base + 0x3a8; > reg_state[CTX_CTX_TIMESTAMP+1] = 0; > - reg_state[CTX_PDP3_UDW] = GEN8_RING_PDP_UDW(ring, 3); > - reg_state[CTX_PDP3_LDW] = GEN8_RING_PDP_LDW(ring, 3); > - reg_state[CTX_PDP2_UDW] = GEN8_RING_PDP_UDW(ring, 2); > - reg_state[CTX_PDP2_LDW] = GEN8_RING_PDP_LDW(ring, 2); > - reg_state[CTX_PDP1_UDW] = GEN8_RING_PDP_UDW(ring, 1); > - reg_state[CTX_PDP1_LDW] = GEN8_RING_PDP_LDW(ring, 1); > - reg_state[CTX_PDP0_UDW] = GEN8_RING_PDP_UDW(ring, 0); > - reg_state[CTX_PDP0_LDW] = GEN8_RING_PDP_LDW(ring, 0); > - > - if (USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) { > - /* 64b PPGTT (48bit canonical) > - * PDP0_DESCRIPTOR contains the base address to PML4 and > - * other PDP Descriptors are ignored. > - */ > - ASSIGN_CTX_PML4(ppgtt, reg_state); > + > + if (ctx->is_svm) { > + reg_state[CTX_PDP0_UDW] = GEN8_RING_PDP_UDW(ring, 0); > + reg_state[CTX_PDP0_LDW] = GEN8_RING_PDP_LDW(ring, 0); > + reg_state[CTX_PDP0_UDW+1] = 0; > + reg_state[CTX_PDP0_LDW+1] = ctx->pasid; > } else { > - /* 32b PPGTT > - * PDP*_DESCRIPTOR contains the base address of space supported. > - * With dynamic page allocation, PDPs may not be allocated at > - * this point. Point the unallocated PDPs to the scratch page > - */ > - ASSIGN_CTX_PDP(ppgtt, reg_state, 3); > - ASSIGN_CTX_PDP(ppgtt, reg_state, 2); > - ASSIGN_CTX_PDP(ppgtt, reg_state, 1); > - ASSIGN_CTX_PDP(ppgtt, reg_state, 0); > + reg_state[CTX_PDP3_UDW] = GEN8_RING_PDP_UDW(ring, 3); > + reg_state[CTX_PDP3_LDW] = GEN8_RING_PDP_LDW(ring, 3); > + reg_state[CTX_PDP2_UDW] = GEN8_RING_PDP_UDW(ring, 2); > + reg_state[CTX_PDP2_LDW] = GEN8_RING_PDP_LDW(ring, 2); > + reg_state[CTX_PDP1_UDW] = GEN8_RING_PDP_UDW(ring, 1); > + reg_state[CTX_PDP1_LDW] = GEN8_RING_PDP_LDW(ring, 1); > + reg_state[CTX_PDP0_UDW] = GEN8_RING_PDP_UDW(ring, 0); > + reg_state[CTX_PDP0_LDW] = GEN8_RING_PDP_LDW(ring, 0); > + > + if (USES_FULL_48BIT_PPGTT(ppgtt->base.dev)) { > + /* 64b PPGTT (48bit canonical) > + * PDP0_DESCRIPTOR contains the base address to PML4 and > + * other PDP Descriptors are ignored. > + */ > + ASSIGN_CTX_PML4(ppgtt, reg_state); > + } else { > + /* 32b PPGTT > + * PDP*_DESCRIPTOR contains the base address of space > + * supported. With dynamic page allocation, PDPs may > + * not be allocated at this point. Point the > + * unallocated PDPs to the scratch page > + */ > + ASSIGN_CTX_PDP(ppgtt, reg_state, 3); > + ASSIGN_CTX_PDP(ppgtt, reg_state, 2); > + ASSIGN_CTX_PDP(ppgtt, reg_state, 1); > + ASSIGN_CTX_PDP(ppgtt, reg_state, 0); > + } > } > > if (ring->id == RCS) { > @@ -2327,6 +2346,12 @@ void intel_lr_context_free(struct intel_context *ctx) > { > int i; > > + if (ctx->is_svm) { > + intel_free_pasid(ctx->ims->dev_priv->dev, ctx); > + intel_unbind_mm(ctx); > + put_task_struct(ctx->tsk); > + } > + > for (i = 0; i < I915_NUM_RINGS; i++) { > struct drm_i915_gem_object *ctx_obj = ctx->engine[i].state; > > @@ -2480,6 +2505,37 @@ int intel_lr_context_deferred_create(struct intel_context *ctx, > > } > > + if (ctx->is_svm) { > + /* FIXME: just skip here, don't bail and trash the ctx */ > + if (ring->id != RCS) { > + DRM_DEBUG_DRIVER("svm context only allowed on RCS\n"); That's fairly useless then :) -Chris -- Chris Wilson, Intel Open Source Technology Centre _______________________________________________ Intel-gfx mailing list Intel-gfx@xxxxxxxxxxxxxxxxxxxxx http://lists.freedesktop.org/mailman/listinfo/intel-gfx