On Wed, 7 Apr 2021 at 18:43, Alistair Popple <apopple@xxxxxxxxxx> wrote: > > Some NVIDIA GPUs do not support direct atomic access to system memory > via PCIe. Instead this must be emulated by granting the GPU exclusive > access to the memory. This is achieved by replacing CPU page table > entries with special swap entries that fault on userspace access. > > The driver then grants the GPU permission to update the page undergoing > atomic access via the GPU page tables. When CPU access to the page is > required a CPU fault is raised which calls into the device driver via > MMU notifiers to revoke the atomic access. The original page table > entries are then restored allowing CPU access to proceed. > > Signed-off-by: Alistair Popple <apopple@xxxxxxxxxx> The Nouveau bits at least look good to me. For patches 7/8: Reviewed-by: Ben Skeggs <bskeggs@xxxxxxxxxx> > > --- > > v7: > * Removed magic values for fault access levels > * Improved readability of fault comparison code > > v4: > * Check that page table entries haven't changed before mapping on the > device > --- > drivers/gpu/drm/nouveau/include/nvif/if000c.h | 1 + > drivers/gpu/drm/nouveau/nouveau_svm.c | 126 ++++++++++++++++-- > drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h | 1 + > .../drm/nouveau/nvkm/subdev/mmu/vmmgp100.c | 6 + > 4 files changed, 123 insertions(+), 11 deletions(-) > > diff --git a/drivers/gpu/drm/nouveau/include/nvif/if000c.h b/drivers/gpu/drm/nouveau/include/nvif/if000c.h > index d6dd40f21eed..9c7ff56831c5 100644 > --- a/drivers/gpu/drm/nouveau/include/nvif/if000c.h > +++ b/drivers/gpu/drm/nouveau/include/nvif/if000c.h > @@ -77,6 +77,7 @@ struct nvif_vmm_pfnmap_v0 { > #define NVIF_VMM_PFNMAP_V0_APER 0x00000000000000f0ULL > #define NVIF_VMM_PFNMAP_V0_HOST 0x0000000000000000ULL > #define NVIF_VMM_PFNMAP_V0_VRAM 0x0000000000000010ULL > +#define NVIF_VMM_PFNMAP_V0_A 0x0000000000000004ULL > #define NVIF_VMM_PFNMAP_V0_W 0x0000000000000002ULL > #define NVIF_VMM_PFNMAP_V0_V 0x0000000000000001ULL > #define NVIF_VMM_PFNMAP_V0_NONE 0x0000000000000000ULL > diff --git a/drivers/gpu/drm/nouveau/nouveau_svm.c b/drivers/gpu/drm/nouveau/nouveau_svm.c > index a195e48c9aee..81526d65b4e2 100644 > --- a/drivers/gpu/drm/nouveau/nouveau_svm.c > +++ b/drivers/gpu/drm/nouveau/nouveau_svm.c > @@ -35,6 +35,7 @@ > #include <linux/sched/mm.h> > #include <linux/sort.h> > #include <linux/hmm.h> > +#include <linux/rmap.h> > > struct nouveau_svm { > struct nouveau_drm *drm; > @@ -67,6 +68,11 @@ struct nouveau_svm { > } buffer[1]; > }; > > +#define FAULT_ACCESS_READ 0 > +#define FAULT_ACCESS_WRITE 1 > +#define FAULT_ACCESS_ATOMIC 2 > +#define FAULT_ACCESS_PREFETCH 3 > + > #define SVM_DBG(s,f,a...) NV_DEBUG((s)->drm, "svm: "f"\n", ##a) > #define SVM_ERR(s,f,a...) NV_WARN((s)->drm, "svm: "f"\n", ##a) > > @@ -411,6 +417,24 @@ nouveau_svm_fault_cancel_fault(struct nouveau_svm *svm, > fault->client); > } > > +static int > +nouveau_svm_fault_priority(u8 fault) > +{ > + switch (fault) { > + case FAULT_ACCESS_PREFETCH: > + return 0; > + case FAULT_ACCESS_READ: > + return 1; > + case FAULT_ACCESS_WRITE: > + return 2; > + case FAULT_ACCESS_ATOMIC: > + return 3; > + default: > + WARN_ON_ONCE(1); > + return -1; > + } > +} > + > static int > nouveau_svm_fault_cmp(const void *a, const void *b) > { > @@ -421,9 +445,8 @@ nouveau_svm_fault_cmp(const void *a, const void *b) > return ret; > if ((ret = (s64)fa->addr - fb->addr)) > return ret; > - /*XXX: atomic? */ > - return (fa->access == 0 || fa->access == 3) - > - (fb->access == 0 || fb->access == 3); > + return nouveau_svm_fault_priority(fa->access) - > + nouveau_svm_fault_priority(fb->access); > } > > static void > @@ -487,6 +510,10 @@ static bool nouveau_svm_range_invalidate(struct mmu_interval_notifier *mni, > struct svm_notifier *sn = > container_of(mni, struct svm_notifier, notifier); > > + if (range->event == MMU_NOTIFY_EXCLUSIVE && > + range->owner == sn->svmm->vmm->cli->drm->dev) > + return true; > + > /* > * serializes the update to mni->invalidate_seq done by caller and > * prevents invalidation of the PTE from progressing while HW is being > @@ -555,6 +582,71 @@ static void nouveau_hmm_convert_pfn(struct nouveau_drm *drm, > args->p.phys[0] |= NVIF_VMM_PFNMAP_V0_W; > } > > +static int nouveau_atomic_range_fault(struct nouveau_svmm *svmm, > + struct nouveau_drm *drm, > + struct nouveau_pfnmap_args *args, u32 size, > + struct svm_notifier *notifier) > +{ > + unsigned long timeout = > + jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT); > + struct mm_struct *mm = svmm->notifier.mm; > + struct page *page; > + unsigned long start = args->p.addr; > + unsigned long notifier_seq; > + int ret = 0; > + > + ret = mmu_interval_notifier_insert(¬ifier->notifier, mm, > + args->p.addr, args->p.size, > + &nouveau_svm_mni_ops); > + if (ret) > + return ret; > + > + while (true) { > + if (time_after(jiffies, timeout)) { > + ret = -EBUSY; > + goto out; > + } > + > + notifier_seq = mmu_interval_read_begin(¬ifier->notifier); > + mmap_read_lock(mm); > + make_device_exclusive_range(mm, start, start + PAGE_SIZE, > + &page, drm->dev); > + mmap_read_unlock(mm); > + if (!page) { > + ret = -EINVAL; > + goto out; > + } > + > + mutex_lock(&svmm->mutex); > + if (!mmu_interval_read_retry(¬ifier->notifier, > + notifier_seq)) > + break; > + mutex_unlock(&svmm->mutex); > + } > + > + /* Map the page on the GPU. */ > + args->p.page = 12; > + args->p.size = PAGE_SIZE; > + args->p.addr = start; > + args->p.phys[0] = page_to_phys(page) | > + NVIF_VMM_PFNMAP_V0_V | > + NVIF_VMM_PFNMAP_V0_W | > + NVIF_VMM_PFNMAP_V0_A | > + NVIF_VMM_PFNMAP_V0_HOST; > + > + svmm->vmm->vmm.object.client->super = true; > + ret = nvif_object_ioctl(&svmm->vmm->vmm.object, args, size, NULL); > + svmm->vmm->vmm.object.client->super = false; > + mutex_unlock(&svmm->mutex); > + > + unlock_page(page); > + put_page(page); > + > +out: > + mmu_interval_notifier_remove(¬ifier->notifier); > + return ret; > +} > + > static int nouveau_range_fault(struct nouveau_svmm *svmm, > struct nouveau_drm *drm, > struct nouveau_pfnmap_args *args, u32 size, > @@ -637,7 +729,7 @@ nouveau_svm_fault(struct nvif_notify *notify) > unsigned long hmm_flags; > u64 inst, start, limit; > int fi, fn; > - int replay = 0, ret; > + int replay = 0, atomic = 0, ret; > > /* Parse available fault buffer entries into a cache, and update > * the GET pointer so HW can reuse the entries. > @@ -718,12 +810,14 @@ nouveau_svm_fault(struct nvif_notify *notify) > /* > * Determine required permissions based on GPU fault > * access flags. > - * XXX: atomic? > */ > switch (buffer->fault[fi]->access) { > case 0: /* READ. */ > hmm_flags = HMM_PFN_REQ_FAULT; > break; > + case 2: /* ATOMIC. */ > + atomic = true; > + break; > case 3: /* PREFETCH. */ > hmm_flags = 0; > break; > @@ -739,8 +833,14 @@ nouveau_svm_fault(struct nvif_notify *notify) > } > > notifier.svmm = svmm; > - ret = nouveau_range_fault(svmm, svm->drm, &args.i, > - sizeof(args), hmm_flags, ¬ifier); > + if (atomic) > + ret = nouveau_atomic_range_fault(svmm, svm->drm, > + &args.i, sizeof(args), > + ¬ifier); > + else > + ret = nouveau_range_fault(svmm, svm->drm, &args.i, > + sizeof(args), hmm_flags, > + ¬ifier); > mmput(mm); > > limit = args.i.p.addr + args.i.p.size; > @@ -756,11 +856,15 @@ nouveau_svm_fault(struct nvif_notify *notify) > */ > if (buffer->fault[fn]->svmm != svmm || > buffer->fault[fn]->addr >= limit || > - (buffer->fault[fi]->access == 0 /* READ. */ && > + (buffer->fault[fi]->access == FAULT_ACCESS_READ && > !(args.phys[0] & NVIF_VMM_PFNMAP_V0_V)) || > - (buffer->fault[fi]->access != 0 /* READ. */ && > - buffer->fault[fi]->access != 3 /* PREFETCH. */ && > - !(args.phys[0] & NVIF_VMM_PFNMAP_V0_W))) > + (buffer->fault[fi]->access != FAULT_ACCESS_READ && > + buffer->fault[fi]->access != FAULT_ACCESS_PREFETCH && > + !(args.phys[0] & NVIF_VMM_PFNMAP_V0_W)) || > + (buffer->fault[fi]->access != FAULT_ACCESS_READ && > + buffer->fault[fi]->access != FAULT_ACCESS_WRITE && > + buffer->fault[fi]->access != FAULT_ACCESS_PREFETCH && > + !(args.phys[0] & NVIF_VMM_PFNMAP_V0_A))) > break; > } > > diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h > index a2b179568970..f6188aa9171c 100644 > --- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h > +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h > @@ -178,6 +178,7 @@ void nvkm_vmm_unmap_region(struct nvkm_vmm *, struct nvkm_vma *); > #define NVKM_VMM_PFN_APER 0x00000000000000f0ULL > #define NVKM_VMM_PFN_HOST 0x0000000000000000ULL > #define NVKM_VMM_PFN_VRAM 0x0000000000000010ULL > +#define NVKM_VMM_PFN_A 0x0000000000000004ULL > #define NVKM_VMM_PFN_W 0x0000000000000002ULL > #define NVKM_VMM_PFN_V 0x0000000000000001ULL > #define NVKM_VMM_PFN_NONE 0x0000000000000000ULL > diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp100.c > index 236db5570771..f02abd9cb4dd 100644 > --- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp100.c > +++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmgp100.c > @@ -88,6 +88,9 @@ gp100_vmm_pgt_pfn(struct nvkm_vmm *vmm, struct nvkm_mmu_pt *pt, > if (!(*map->pfn & NVKM_VMM_PFN_W)) > data |= BIT_ULL(6); /* RO. */ > > + if (!(*map->pfn & NVKM_VMM_PFN_A)) > + data |= BIT_ULL(7); /* Atomic disable. */ > + > if (!(*map->pfn & NVKM_VMM_PFN_VRAM)) { > addr = *map->pfn >> NVKM_VMM_PFN_ADDR_SHIFT; > addr = dma_map_page(dev, pfn_to_page(addr), 0, > @@ -322,6 +325,9 @@ gp100_vmm_pd0_pfn(struct nvkm_vmm *vmm, struct nvkm_mmu_pt *pt, > if (!(*map->pfn & NVKM_VMM_PFN_W)) > data |= BIT_ULL(6); /* RO. */ > > + if (!(*map->pfn & NVKM_VMM_PFN_A)) > + data |= BIT_ULL(7); /* Atomic disable. */ > + > if (!(*map->pfn & NVKM_VMM_PFN_VRAM)) { > addr = *map->pfn >> NVKM_VMM_PFN_ADDR_SHIFT; > addr = dma_map_page(dev, pfn_to_page(addr), 0, > -- > 2.20.1 > > _______________________________________________ > dri-devel mailing list > dri-devel@xxxxxxxxxxxxxxxxxxxxx > https://lists.freedesktop.org/mailman/listinfo/dri-devel