Am 2021-06-11 um 1:55 p.m. schrieb philip yang: > > > On 2021-06-10 7:29 p.m., Felix Kuehling wrote: >> When some GPUs don't support SVM, don't disabe it for the entire process. >> That would be inconsistent with the information the process got from the >> topology, which indicates SVM support per GPU. >> >> Instead disable SVM support only for the unsupported GPUs. This is done >> by checking any per-device attributes against the bitmap of supported >> GPUs. Also use the supported GPU bitmap to initialize access bitmaps for >> new SVM address ranges. >> >> Don't handle recoverable page faults from unsupported GPUs. (I don't >> think there will be unsupported GPUs that can generate recoverable page >> faults. But better safe than sorry.) >> >> Signed-off-by: Felix Kuehling <Felix.Kuehling@xxxxxxx> > > It's smart way to handle SVM support and non support GPUs mixed on one > system. > > One suggestion inline. With or without the suggest change, this is > > Reviewed-by: Philip Yang <philip.yang@xxxxxxx> > >> --- >> drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 3 -- >> drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c | 4 -- >> drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 2 +- >> drivers/gpu/drm/amd/amdkfd/kfd_process.c | 1 - >> drivers/gpu/drm/amd/amdkfd/kfd_svm.c | 55 ++++++++++++-------- >> drivers/gpu/drm/amd/amdkfd/kfd_svm.h | 7 +++ >> drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 6 +-- >> 7 files changed, 44 insertions(+), 34 deletions(-) >> >> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c >> index 5788a4656fa1..67541c30327a 100644 >> --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c >> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c >> @@ -1797,9 +1797,6 @@ static int kfd_ioctl_svm(struct file *filep, struct kfd_process *p, void *data) >> struct kfd_ioctl_svm_args *args = data; >> int r = 0; >> >> - if (p->svm_disabled) >> - return -EPERM; >> - >> pr_debug("start 0x%llx size 0x%llx op 0x%x nattr 0x%x\n", >> args->start_addr, args->size, args->op, args->nattr); >> >> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c >> index 91c50739b756..a9b329f0f862 100644 >> --- a/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c >> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_flat_memory.c >> @@ -405,10 +405,6 @@ int kfd_init_apertures(struct kfd_process *process) >> case CHIP_POLARIS12: >> case CHIP_VEGAM: >> kfd_init_apertures_vi(pdd, id); >> - /* VI GPUs cannot support SVM with only >> - * 40 bits of virtual address space. >> - */ >> - process->svm_disabled = true; >> break; >> case CHIP_VEGA10: >> case CHIP_VEGA12: >> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h >> index 329684ee5d6e..6dc22fa1e555 100644 >> --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h >> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h >> @@ -743,6 +743,7 @@ struct svm_range_list { >> spinlock_t deferred_list_lock; >> atomic_t evicted_ranges; >> struct delayed_work restore_work; >> + DECLARE_BITMAP(bitmap_supported, MAX_GPU_INSTANCE); >> }; >> >> /* Process data */ >> @@ -826,7 +827,6 @@ struct kfd_process { >> >> /* shared virtual memory registered by this process */ >> struct svm_range_list svms; >> - bool svm_disabled; >> >> bool xnack_enabled; >> }; >> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c >> index f1f40bba5c60..09b98a83f670 100644 >> --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c >> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c >> @@ -1260,7 +1260,6 @@ static struct kfd_process *create_process(const struct task_struct *thread) >> process->mm = thread->mm; >> process->lead_thread = thread->group_leader; >> process->n_pdds = 0; >> - process->svm_disabled = false; >> INIT_DELAYED_WORK(&process->eviction_work, evict_process_worker); >> INIT_DELAYED_WORK(&process->restore_work, restore_process_worker); >> process->last_restore_timestamp = get_jiffies_64(); >> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c >> index 0f18bd0dc64e..420ca667be32 100644 >> --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.c >> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.c >> @@ -281,7 +281,8 @@ svm_range *svm_range_new(struct svm_range_list *svms, uint64_t start, >> >> p = container_of(svms, struct kfd_process, svms); >> if (p->xnack_enabled) >> - bitmap_fill(prange->bitmap_access, MAX_GPU_INSTANCE); >> + bitmap_copy(prange->bitmap_access, svms->bitmap_supported, >> + MAX_GPU_INSTANCE); >> >> svm_range_set_default_attributes(&prange->preferred_loc, >> &prange->prefetch_loc, >> @@ -580,33 +581,25 @@ svm_range_check_attr(struct kfd_process *p, >> int gpuidx; >> >> for (i = 0; i < nattr; i++) { > > Because we are here, maybe use local variable to short the two lines > kfd_process_gpuidx_from_gpuid into one line > > uint32_t val = attrs[i].value; > That's a good suggestion. I should also move the gpuidx declaration inside the loop. I'll fix those two things before I submit. Thanks, Felix >> + gpuidx = MAX_GPU_INSTANCE; >> + >> switch (attrs[i].type) { >> case KFD_IOCTL_SVM_ATTR_PREFERRED_LOC: >> if (attrs[i].value != KFD_IOCTL_SVM_LOCATION_SYSMEM && >> - attrs[i].value != KFD_IOCTL_SVM_LOCATION_UNDEFINED && >> - kfd_process_gpuidx_from_gpuid(p, >> - attrs[i].value) < 0) { >> - pr_debug("no GPU 0x%x found\n", attrs[i].value); >> - return -EINVAL; >> - } >> + attrs[i].value != KFD_IOCTL_SVM_LOCATION_UNDEFINED) >> + gpuidx = kfd_process_gpuidx_from_gpuid(p, >> + attrs[i].value); >> break; >> case KFD_IOCTL_SVM_ATTR_PREFETCH_LOC: >> - if (attrs[i].value != KFD_IOCTL_SVM_LOCATION_SYSMEM && >> - kfd_process_gpuidx_from_gpuid(p, >> - attrs[i].value) < 0) { >> - pr_debug("no GPU 0x%x found\n", attrs[i].value); >> - return -EINVAL; >> - } >> + if (attrs[i].value != KFD_IOCTL_SVM_LOCATION_SYSMEM) >> + gpuidx = kfd_process_gpuidx_from_gpuid(p, >> + attrs[i].value); >> break; >> case KFD_IOCTL_SVM_ATTR_ACCESS: >> case KFD_IOCTL_SVM_ATTR_ACCESS_IN_PLACE: >> case KFD_IOCTL_SVM_ATTR_NO_ACCESS: >> gpuidx = kfd_process_gpuidx_from_gpuid(p, >> attrs[i].value); >> - if (gpuidx < 0) { >> - pr_debug("no GPU 0x%x found\n", attrs[i].value); >> - return -EINVAL; >> - } >> break; >> case KFD_IOCTL_SVM_ATTR_SET_FLAGS: >> break; >> @@ -618,6 +611,15 @@ svm_range_check_attr(struct kfd_process *p, >> pr_debug("unknown attr type 0x%x\n", attrs[i].type); >> return -EINVAL; >> } >> + >> + if (gpuidx < 0) { >> + pr_debug("no GPU 0x%x found\n", attrs[i].value); >> + return -EINVAL; >> + } else if (gpuidx < MAX_GPU_INSTANCE && >> + !test_bit(gpuidx, p->svms.bitmap_supported)) { >> + pr_debug("GPU 0x%x not supported\n", attrs[i].value); >> + return -EINVAL; >> + } >> } >> >> return 0; >> @@ -1855,7 +1857,7 @@ static void svm_range_drain_retry_fault(struct svm_range_list *svms) >> >> p = container_of(svms, struct kfd_process, svms); >> >> - for (i = 0; i < p->n_pdds; i++) { >> + for_each_set_bit(i, svms->bitmap_supported, p->n_pdds) { >> pdd = p->pdds[i]; >> if (!pdd) >> continue; >> @@ -2325,6 +2327,11 @@ svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid, >> bool write_locked = false; >> int r = 0; >> >> + if (!KFD_IS_SVM_API_SUPPORTED(adev->kfd.dev)) { >> + pr_debug("device does not support SVM\n"); >> + return -EFAULT; >> + } >> + >> p = kfd_lookup_process_by_pasid(pasid); >> if (!p) { >> pr_debug("kfd process not founded pasid 0x%x\n", pasid); >> @@ -2472,6 +2479,7 @@ void svm_range_list_fini(struct kfd_process *p) >> int svm_range_list_init(struct kfd_process *p) >> { >> struct svm_range_list *svms = &p->svms; >> + int i; >> >> svms->objects = RB_ROOT_CACHED; >> mutex_init(&svms->lock); >> @@ -2482,6 +2490,10 @@ int svm_range_list_init(struct kfd_process *p) >> INIT_LIST_HEAD(&svms->deferred_range_list); >> spin_lock_init(&svms->deferred_list_lock); >> >> + for (i = 0; i < p->n_pdds; i++) >> + if (KFD_IS_SVM_API_SUPPORTED(p->pdds[i]->dev)) >> + bitmap_set(svms->bitmap_supported, i, 1); >> + >> return 0; >> } >> >> @@ -2978,14 +2990,15 @@ svm_range_get_attr(struct kfd_process *p, uint64_t start, uint64_t size, >> svm_range_set_default_attributes(&location, &prefetch_loc, >> &granularity, &flags); >> if (p->xnack_enabled) >> - bitmap_fill(bitmap_access, MAX_GPU_INSTANCE); >> + bitmap_copy(bitmap_access, svms->bitmap_supported, >> + MAX_GPU_INSTANCE); >> else >> bitmap_zero(bitmap_access, MAX_GPU_INSTANCE); >> bitmap_zero(bitmap_aip, MAX_GPU_INSTANCE); >> goto fill_values; >> } >> - bitmap_fill(bitmap_access, MAX_GPU_INSTANCE); >> - bitmap_fill(bitmap_aip, MAX_GPU_INSTANCE); >> + bitmap_copy(bitmap_access, svms->bitmap_supported, MAX_GPU_INSTANCE); >> + bitmap_copy(bitmap_aip, svms->bitmap_supported, MAX_GPU_INSTANCE); >> >> while (node) { >> struct interval_tree_node *next; >> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h >> index 573f984b81fe..0c0fc399395e 100644 >> --- a/drivers/gpu/drm/amd/amdkfd/kfd_svm.h >> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_svm.h >> @@ -175,6 +175,11 @@ void svm_range_dma_unmap(struct device *dev, dma_addr_t *dma_addr, >> void svm_range_free_dma_mappings(struct svm_range *prange); >> void svm_range_prefault(struct svm_range *prange, struct mm_struct *mm); >> >> +/* SVM API and HMM page migration work together, device memory type >> + * is initialized to not 0 when page migration register device memory. >> + */ >> +#define KFD_IS_SVM_API_SUPPORTED(dev) ((dev)->pgmap.type != 0) >> + >> #else >> >> struct kfd_process; >> @@ -201,6 +206,8 @@ static inline int svm_range_schedule_evict_svm_bo( >> return -EINVAL; >> } >> >> +#define KFD_IS_SVM_API_SUPPORTED(dev) false >> + >> #endif /* IS_ENABLED(CONFIG_HSA_AMD_SVM) */ >> >> #endif /* KFD_SVM_H_ */ >> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c >> index f668b8cc2b57..ff4e296c1c58 100644 >> --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c >> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c >> @@ -36,6 +36,7 @@ >> #include "kfd_topology.h" >> #include "kfd_device_queue_manager.h" >> #include "kfd_iommu.h" >> +#include "kfd_svm.h" >> #include "amdgpu_amdkfd.h" >> #include "amdgpu_ras.h" >> >> @@ -1441,10 +1442,7 @@ int kfd_topology_add_device(struct kfd_dev *gpu) >> dev->node_props.capability |= (adev->ras_enabled != 0) ? >> HSA_CAP_RASEVENTNOTIFY : 0; >> >> - /* SVM API and HMM page migration work together, device memory type >> - * is initialized to not 0 when page migration register device memory. >> - */ >> - if (adev->kfd.dev->pgmap.type != 0) >> + if (KFD_IS_SVM_API_SUPPORTED(adev->kfd.dev)) >> dev->node_props.capability |= HSA_CAP_SVMAPI_SUPPORTED; >> >> kfd_debug_print_topology(); _______________________________________________ amd-gfx mailing list amd-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/amd-gfx