Am 07.02.2018 um 02:32 schrieb Felix Kuehling: > dGPUs work without IOMMUv2. Make IOMMUv2 initialization dependent on > ASIC information. Also allow building KFD without IOMMUv2 support. > This is still useful for dGPUs and prepares for enabling KFD on > architectures that don't support AMD IOMMUv2. > > v2: > * Centralize IOMMUv2 code to avoid #ifdefs in too many places > > Signed-off-by: Felix Kuehling <Felix.Kuehling at amd.com> > --- > drivers/gpu/drm/amd/amdkfd/Kconfig | 2 +- > drivers/gpu/drm/amd/amdkfd/Makefile | 4 + > drivers/gpu/drm/amd/amdkfd/kfd_crat.c | 14 +- > drivers/gpu/drm/amd/amdkfd/kfd_device.c | 127 +++-------- > drivers/gpu/drm/amd/amdkfd/kfd_events.c | 3 + > drivers/gpu/drm/amd/amdkfd/kfd_iommu.c | 356 ++++++++++++++++++++++++++++++ > drivers/gpu/drm/amd/amdkfd/kfd_iommu.h | 78 +++++++ > drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 14 +- > drivers/gpu/drm/amd/amdkfd/kfd_process.c | 138 +----------- > drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 16 +- > drivers/gpu/drm/amd/amdkfd/kfd_topology.h | 6 +- > 11 files changed, 493 insertions(+), 265 deletions(-) > create mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_iommu.c > create mode 100644 drivers/gpu/drm/amd/amdkfd/kfd_iommu.h > > diff --git a/drivers/gpu/drm/amd/amdkfd/Kconfig b/drivers/gpu/drm/amd/amdkfd/Kconfig > index bc5a294..5bbeb95 100644 > --- a/drivers/gpu/drm/amd/amdkfd/Kconfig > +++ b/drivers/gpu/drm/amd/amdkfd/Kconfig > @@ -4,6 +4,6 @@ > > config HSA_AMD > tristate "HSA kernel driver for AMD GPU devices" > - depends on DRM_AMDGPU && AMD_IOMMU_V2 && X86_64 > + depends on DRM_AMDGPU && X86_64 You still need a weak dependency on AMD_IOMMU_V2 here, in other words add "imply AMD_IOMMU_V2". This prevents illegal combinations like linking amdkfd into the kernel while amd_iommu_v2 is a module. But it should still allow to completely disable amd_iommu_v2 and compile amdkfd without support for it. Christian. > help > Enable this if you want to use HSA features on AMD GPU devices. > diff --git a/drivers/gpu/drm/amd/amdkfd/Makefile b/drivers/gpu/drm/amd/amdkfd/Makefile > index a317e76..0d02422 100644 > --- a/drivers/gpu/drm/amd/amdkfd/Makefile > +++ b/drivers/gpu/drm/amd/amdkfd/Makefile > @@ -37,6 +37,10 @@ amdkfd-y := kfd_module.o kfd_device.o kfd_chardev.o kfd_topology.o \ > kfd_interrupt.o kfd_events.o cik_event_interrupt.o \ > kfd_dbgdev.o kfd_dbgmgr.o kfd_crat.o > > +ifneq ($(CONFIG_AMD_IOMMU_V2),) > +amdkfd-y += kfd_iommu.o > +endif > + > amdkfd-$(CONFIG_DEBUG_FS) += kfd_debugfs.o > > obj-$(CONFIG_HSA_AMD) += amdkfd.o > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c > index 2bc2816..7493f47 100644 > --- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c > @@ -22,10 +22,10 @@ > > #include <linux/pci.h> > #include <linux/acpi.h> > -#include <linux/amd-iommu.h> > #include "kfd_crat.h" > #include "kfd_priv.h" > #include "kfd_topology.h" > +#include "kfd_iommu.h" > > /* GPU Processor ID base for dGPUs for which VCRAT needs to be created. > * GPU processor ID are expressed with Bit[31]=1. > @@ -1037,15 +1037,11 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image, > struct crat_subtype_generic *sub_type_hdr; > struct crat_subtype_computeunit *cu; > struct kfd_cu_info cu_info; > - struct amd_iommu_device_info iommu_info; > int avail_size = *size; > uint32_t total_num_of_cu; > int num_of_cache_entries = 0; > int cache_mem_filled = 0; > int ret = 0; > - const u32 required_iommu_flags = AMD_IOMMU_DEVICE_FLAG_ATS_SUP | > - AMD_IOMMU_DEVICE_FLAG_PRI_SUP | > - AMD_IOMMU_DEVICE_FLAG_PASID_SUP; > struct kfd_local_mem_info local_mem_info; > > if (!pcrat_image || avail_size < VCRAT_SIZE_FOR_GPU) > @@ -1106,12 +1102,8 @@ static int kfd_create_vcrat_image_gpu(void *pcrat_image, > /* Check if this node supports IOMMU. During parsing this flag will > * translate to HSA_CAP_ATS_PRESENT > */ > - iommu_info.flags = 0; > - if (amd_iommu_device_info(kdev->pdev, &iommu_info) == 0) { > - if ((iommu_info.flags & required_iommu_flags) == > - required_iommu_flags) > - cu->hsa_capability |= CRAT_CU_FLAGS_IOMMU_PRESENT; > - } > + if (!kfd_iommu_check_device(kdev)) > + cu->hsa_capability |= CRAT_CU_FLAGS_IOMMU_PRESENT; > > crat_table->length += sub_type_hdr->length; > crat_table->total_entries++; > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c > index 83d6f41..4ac2d61 100644 > --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c > @@ -20,7 +20,9 @@ > * OTHER DEALINGS IN THE SOFTWARE. > */ > > +#if defined(CONFIG_AMD_IOMMU_V2_MODULE) || defined(CONFIG_AMD_IOMMU_V2) > #include <linux/amd-iommu.h> > +#endif > #include <linux/bsearch.h> > #include <linux/pci.h> > #include <linux/slab.h> > @@ -28,9 +30,11 @@ > #include "kfd_device_queue_manager.h" > #include "kfd_pm4_headers_vi.h" > #include "cwsr_trap_handler_gfx8.asm" > +#include "kfd_iommu.h" > > #define MQD_SIZE_ALIGNED 768 > > +#ifdef KFD_SUPPORT_IOMMU_V2 > static const struct kfd_device_info kaveri_device_info = { > .asic_family = CHIP_KAVERI, > .max_pasid_bits = 16, > @@ -41,6 +45,7 @@ static const struct kfd_device_info kaveri_device_info = { > .num_of_watch_points = 4, > .mqd_size_aligned = MQD_SIZE_ALIGNED, > .supports_cwsr = false, > + .needs_iommu_device = true, > .needs_pci_atomics = false, > }; > > @@ -54,8 +59,10 @@ static const struct kfd_device_info carrizo_device_info = { > .num_of_watch_points = 4, > .mqd_size_aligned = MQD_SIZE_ALIGNED, > .supports_cwsr = true, > + .needs_iommu_device = true, > .needs_pci_atomics = false, > }; > +#endif > > static const struct kfd_device_info hawaii_device_info = { > .asic_family = CHIP_HAWAII, > @@ -67,6 +74,7 @@ static const struct kfd_device_info hawaii_device_info = { > .num_of_watch_points = 4, > .mqd_size_aligned = MQD_SIZE_ALIGNED, > .supports_cwsr = false, > + .needs_iommu_device = false, > .needs_pci_atomics = false, > }; > > @@ -79,6 +87,7 @@ static const struct kfd_device_info tonga_device_info = { > .num_of_watch_points = 4, > .mqd_size_aligned = MQD_SIZE_ALIGNED, > .supports_cwsr = false, > + .needs_iommu_device = false, > .needs_pci_atomics = true, > }; > > @@ -91,6 +100,7 @@ static const struct kfd_device_info tonga_vf_device_info = { > .num_of_watch_points = 4, > .mqd_size_aligned = MQD_SIZE_ALIGNED, > .supports_cwsr = false, > + .needs_iommu_device = false, > .needs_pci_atomics = false, > }; > > @@ -103,6 +113,7 @@ static const struct kfd_device_info fiji_device_info = { > .num_of_watch_points = 4, > .mqd_size_aligned = MQD_SIZE_ALIGNED, > .supports_cwsr = true, > + .needs_iommu_device = false, > .needs_pci_atomics = true, > }; > > @@ -115,6 +126,7 @@ static const struct kfd_device_info fiji_vf_device_info = { > .num_of_watch_points = 4, > .mqd_size_aligned = MQD_SIZE_ALIGNED, > .supports_cwsr = true, > + .needs_iommu_device = false, > .needs_pci_atomics = false, > }; > > @@ -128,6 +140,7 @@ static const struct kfd_device_info polaris10_device_info = { > .num_of_watch_points = 4, > .mqd_size_aligned = MQD_SIZE_ALIGNED, > .supports_cwsr = true, > + .needs_iommu_device = false, > .needs_pci_atomics = true, > }; > > @@ -140,6 +153,7 @@ static const struct kfd_device_info polaris10_vf_device_info = { > .num_of_watch_points = 4, > .mqd_size_aligned = MQD_SIZE_ALIGNED, > .supports_cwsr = true, > + .needs_iommu_device = false, > .needs_pci_atomics = false, > }; > > @@ -152,6 +166,7 @@ static const struct kfd_device_info polaris11_device_info = { > .num_of_watch_points = 4, > .mqd_size_aligned = MQD_SIZE_ALIGNED, > .supports_cwsr = true, > + .needs_iommu_device = false, > .needs_pci_atomics = true, > }; > > @@ -162,6 +177,7 @@ struct kfd_deviceid { > }; > > static const struct kfd_deviceid supported_devices[] = { > +#ifdef KFD_SUPPORT_IOMMU_V2 > { 0x1304, &kaveri_device_info }, /* Kaveri */ > { 0x1305, &kaveri_device_info }, /* Kaveri */ > { 0x1306, &kaveri_device_info }, /* Kaveri */ > @@ -189,6 +205,7 @@ static const struct kfd_deviceid supported_devices[] = { > { 0x9875, &carrizo_device_info }, /* Carrizo */ > { 0x9876, &carrizo_device_info }, /* Carrizo */ > { 0x9877, &carrizo_device_info }, /* Carrizo */ > +#endif > { 0x67A0, &hawaii_device_info }, /* Hawaii */ > { 0x67A1, &hawaii_device_info }, /* Hawaii */ > { 0x67A2, &hawaii_device_info }, /* Hawaii */ > @@ -302,77 +319,6 @@ struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, > return kfd; > } > > -static bool device_iommu_pasid_init(struct kfd_dev *kfd) > -{ > - const u32 required_iommu_flags = AMD_IOMMU_DEVICE_FLAG_ATS_SUP | > - AMD_IOMMU_DEVICE_FLAG_PRI_SUP | > - AMD_IOMMU_DEVICE_FLAG_PASID_SUP; > - > - struct amd_iommu_device_info iommu_info; > - unsigned int pasid_limit; > - int err; > - > - err = amd_iommu_device_info(kfd->pdev, &iommu_info); > - if (err < 0) { > - dev_err(kfd_device, > - "error getting iommu info. is the iommu enabled?\n"); > - return false; > - } > - > - if ((iommu_info.flags & required_iommu_flags) != required_iommu_flags) { > - dev_err(kfd_device, "error required iommu flags ats %i, pri %i, pasid %i\n", > - (iommu_info.flags & AMD_IOMMU_DEVICE_FLAG_ATS_SUP) != 0, > - (iommu_info.flags & AMD_IOMMU_DEVICE_FLAG_PRI_SUP) != 0, > - (iommu_info.flags & AMD_IOMMU_DEVICE_FLAG_PASID_SUP) > - != 0); > - return false; > - } > - > - pasid_limit = min_t(unsigned int, > - (unsigned int)(1 << kfd->device_info->max_pasid_bits), > - iommu_info.max_pasids); > - > - if (!kfd_set_pasid_limit(pasid_limit)) { > - dev_err(kfd_device, "error setting pasid limit\n"); > - return false; > - } > - > - return true; > -} > - > -static void iommu_pasid_shutdown_callback(struct pci_dev *pdev, int pasid) > -{ > - struct kfd_dev *dev = kfd_device_by_pci_dev(pdev); > - > - if (dev) > - kfd_process_iommu_unbind_callback(dev, pasid); > -} > - > -/* > - * This function called by IOMMU driver on PPR failure > - */ > -static int iommu_invalid_ppr_cb(struct pci_dev *pdev, int pasid, > - unsigned long address, u16 flags) > -{ > - struct kfd_dev *dev; > - > - dev_warn(kfd_device, > - "Invalid PPR device %x:%x.%x pasid %d address 0x%lX flags 0x%X", > - PCI_BUS_NUM(pdev->devfn), > - PCI_SLOT(pdev->devfn), > - PCI_FUNC(pdev->devfn), > - pasid, > - address, > - flags); > - > - dev = kfd_device_by_pci_dev(pdev); > - if (!WARN_ON(!dev)) > - kfd_signal_iommu_event(dev, pasid, address, > - flags & PPR_FAULT_WRITE, flags & PPR_FAULT_EXEC); > - > - return AMD_IOMMU_INV_PRI_RSP_INVALID; > -} > - > static void kfd_cwsr_init(struct kfd_dev *kfd) > { > if (cwsr_enable && kfd->device_info->supports_cwsr) { > @@ -462,11 +408,9 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, > goto device_queue_manager_error; > } > > - if (!device_iommu_pasid_init(kfd)) { > - dev_err(kfd_device, > - "Error initializing iommuv2 for device %x:%x\n", > - kfd->pdev->vendor, kfd->pdev->device); > - goto device_iommu_pasid_error; > + if (kfd_iommu_device_init(kfd)) { > + dev_err(kfd_device, "Error initializing iommuv2\n"); > + goto device_iommu_error; > } > > kfd_cwsr_init(kfd); > @@ -486,7 +430,7 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, > goto out; > > kfd_resume_error: > -device_iommu_pasid_error: > +device_iommu_error: > device_queue_manager_uninit(kfd->dqm); > device_queue_manager_error: > kfd_interrupt_exit(kfd); > @@ -527,11 +471,7 @@ void kgd2kfd_suspend(struct kfd_dev *kfd) > > kfd->dqm->ops.stop(kfd->dqm); > > - kfd_unbind_processes_from_device(kfd); > - > - amd_iommu_set_invalidate_ctx_cb(kfd->pdev, NULL); > - amd_iommu_set_invalid_ppr_cb(kfd->pdev, NULL); > - amd_iommu_free_device(kfd->pdev); > + kfd_iommu_suspend(kfd); > } > > int kgd2kfd_resume(struct kfd_dev *kfd) > @@ -546,19 +486,14 @@ int kgd2kfd_resume(struct kfd_dev *kfd) > static int kfd_resume(struct kfd_dev *kfd) > { > int err = 0; > - unsigned int pasid_limit = kfd_get_pasid_limit(); > - > - err = amd_iommu_init_device(kfd->pdev, pasid_limit); > - if (err) > - return -ENXIO; > - amd_iommu_set_invalidate_ctx_cb(kfd->pdev, > - iommu_pasid_shutdown_callback); > - amd_iommu_set_invalid_ppr_cb(kfd->pdev, > - iommu_invalid_ppr_cb); > > - err = kfd_bind_processes_to_device(kfd); > - if (err) > - goto processes_bind_error; > + err = kfd_iommu_resume(kfd); > + if (err) { > + dev_err(kfd_device, > + "Failed to resume IOMMU for device %x:%x\n", > + kfd->pdev->vendor, kfd->pdev->device); > + return err; > + } > > err = kfd->dqm->ops.start(kfd->dqm); > if (err) { > @@ -571,9 +506,7 @@ static int kfd_resume(struct kfd_dev *kfd) > return err; > > dqm_start_error: > -processes_bind_error: > - amd_iommu_free_device(kfd->pdev); > - > + kfd_iommu_suspend(kfd); > return err; > } > > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c > index 93aae5c..6fb9c0d 100644 > --- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c > @@ -30,6 +30,7 @@ > #include <linux/memory.h> > #include "kfd_priv.h" > #include "kfd_events.h" > +#include "kfd_iommu.h" > #include <linux/device.h> > > /* > @@ -837,6 +838,7 @@ static void lookup_events_by_type_and_signal(struct kfd_process *p, > } > } > > +#ifdef KFD_SUPPORT_IOMMU_V2 > void kfd_signal_iommu_event(struct kfd_dev *dev, unsigned int pasid, > unsigned long address, bool is_write_requested, > bool is_execute_requested) > @@ -905,6 +907,7 @@ void kfd_signal_iommu_event(struct kfd_dev *dev, unsigned int pasid, > mutex_unlock(&p->event_mutex); > kfd_unref_process(p); > } > +#endif /* KFD_SUPPORT_IOMMU_V2 */ > > void kfd_signal_hw_exception_event(unsigned int pasid) > { > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_iommu.c b/drivers/gpu/drm/amd/amdkfd/kfd_iommu.c > new file mode 100644 > index 0000000..81dee34 > --- /dev/null > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_iommu.c > @@ -0,0 +1,356 @@ > +/* > + * Copyright 2018 Advanced Micro Devices, Inc. > + * > + * Permission is hereby granted, free of charge, to any person obtaining a > + * copy of this software and associated documentation files (the "Software"), > + * to deal in the Software without restriction, including without limitation > + * the rights to use, copy, modify, merge, publish, distribute, sublicense, > + * and/or sell copies of the Software, and to permit persons to whom the > + * Software is furnished to do so, subject to the following conditions: > + * > + * The above copyright notice and this permission notice shall be included in > + * all copies or substantial portions of the Software. > + * > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL > + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR > + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, > + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR > + * OTHER DEALINGS IN THE SOFTWARE. > + */ > + > +#include <linux/printk.h> > +#include <linux/device.h> > +#include <linux/slab.h> > +#include <linux/pci.h> > +#include <linux/amd-iommu.h> > +#include "kfd_priv.h" > +#include "kfd_dbgmgr.h" > +#include "kfd_topology.h" > +#include "kfd_iommu.h" > + > +static const u32 required_iommu_flags = AMD_IOMMU_DEVICE_FLAG_ATS_SUP | > + AMD_IOMMU_DEVICE_FLAG_PRI_SUP | > + AMD_IOMMU_DEVICE_FLAG_PASID_SUP; > + > +/** kfd_iommu_check_device - Check whether IOMMU is available for device > + */ > +int kfd_iommu_check_device(struct kfd_dev *kfd) > +{ > + struct amd_iommu_device_info iommu_info; > + int err; > + > + if (!kfd->device_info->needs_iommu_device) > + return -ENODEV; > + > + iommu_info.flags = 0; > + err = amd_iommu_device_info(kfd->pdev, &iommu_info); > + if (err) > + return err; > + > + if ((iommu_info.flags & required_iommu_flags) != required_iommu_flags) > + return -ENODEV; > + > + return 0; > +} > + > +/** kfd_iommu_device_init - Initialize IOMMU for device > + */ > +int kfd_iommu_device_init(struct kfd_dev *kfd) > +{ > + struct amd_iommu_device_info iommu_info; > + unsigned int pasid_limit; > + int err; > + > + if (!kfd->device_info->needs_iommu_device) > + return 0; > + > + iommu_info.flags = 0; > + err = amd_iommu_device_info(kfd->pdev, &iommu_info); > + if (err < 0) { > + dev_err(kfd_device, > + "error getting iommu info. is the iommu enabled?\n"); > + return -ENODEV; > + } > + > + if ((iommu_info.flags & required_iommu_flags) != required_iommu_flags) { > + dev_err(kfd_device, "error required iommu flags ats %i, pri %i, pasid %i\n", > + (iommu_info.flags & AMD_IOMMU_DEVICE_FLAG_ATS_SUP) != 0, > + (iommu_info.flags & AMD_IOMMU_DEVICE_FLAG_PRI_SUP) != 0, > + (iommu_info.flags & AMD_IOMMU_DEVICE_FLAG_PASID_SUP) > + != 0); > + return -ENODEV; > + } > + > + pasid_limit = min_t(unsigned int, > + (unsigned int)(1 << kfd->device_info->max_pasid_bits), > + iommu_info.max_pasids); > + > + if (!kfd_set_pasid_limit(pasid_limit)) { > + dev_err(kfd_device, "error setting pasid limit\n"); > + return -EBUSY; > + } > + > + return 0; > +} > + > +/** kfd_iommu_bind_process_to_device - Have the IOMMU bind a process > + * > + * Binds the given process to the given device using its PASID. This > + * enables IOMMUv2 address translation for the process on the device. > + * > + * This function assumes that the process mutex is held. > + */ > +int kfd_iommu_bind_process_to_device(struct kfd_process_device *pdd) > +{ > + struct kfd_dev *dev = pdd->dev; > + struct kfd_process *p = pdd->process; > + int err; > + > + if (!dev->device_info->needs_iommu_device || pdd->bound == PDD_BOUND) > + return 0; > + > + if (unlikely(pdd->bound == PDD_BOUND_SUSPENDED)) { > + pr_err("Binding PDD_BOUND_SUSPENDED pdd is unexpected!\n"); > + return -EINVAL; > + } > + > + err = amd_iommu_bind_pasid(dev->pdev, p->pasid, p->lead_thread); > + if (!err) > + pdd->bound = PDD_BOUND; > + > + return err; > +} > + > +/** kfd_iommu_unbind_process - Unbind process from all devices > + * > + * This removes all IOMMU device bindings of the process. To be used > + * before process termination. > + */ > +void kfd_iommu_unbind_process(struct kfd_process *p) > +{ > + struct kfd_process_device *pdd; > + > + list_for_each_entry(pdd, &p->per_device_data, per_device_list) > + if (pdd->bound == PDD_BOUND) > + amd_iommu_unbind_pasid(pdd->dev->pdev, p->pasid); > +} > + > +/* Callback for process shutdown invoked by the IOMMU driver */ > +static void iommu_pasid_shutdown_callback(struct pci_dev *pdev, int pasid) > +{ > + struct kfd_dev *dev = kfd_device_by_pci_dev(pdev); > + struct kfd_process *p; > + struct kfd_process_device *pdd; > + > + if (!dev) > + return; > + > + /* > + * Look for the process that matches the pasid. If there is no such > + * process, we either released it in amdkfd's own notifier, or there > + * is a bug. Unfortunately, there is no way to tell... > + */ > + p = kfd_lookup_process_by_pasid(pasid); > + if (!p) > + return; > + > + pr_debug("Unbinding process %d from IOMMU\n", pasid); > + > + mutex_lock(kfd_get_dbgmgr_mutex()); > + > + if (dev->dbgmgr && dev->dbgmgr->pasid == p->pasid) { > + if (!kfd_dbgmgr_unregister(dev->dbgmgr, p)) { > + kfd_dbgmgr_destroy(dev->dbgmgr); > + dev->dbgmgr = NULL; > + } > + } > + > + mutex_unlock(kfd_get_dbgmgr_mutex()); > + > + mutex_lock(&p->mutex); > + > + pdd = kfd_get_process_device_data(dev, p); > + if (pdd) > + /* For GPU relying on IOMMU, we need to dequeue here > + * when PASID is still bound. > + */ > + kfd_process_dequeue_from_device(pdd); > + > + mutex_unlock(&p->mutex); > + > + kfd_unref_process(p); > +} > + > +/* This function called by IOMMU driver on PPR failure */ > +static int iommu_invalid_ppr_cb(struct pci_dev *pdev, int pasid, > + unsigned long address, u16 flags) > +{ > + struct kfd_dev *dev; > + > + dev_warn(kfd_device, > + "Invalid PPR device %x:%x.%x pasid %d address 0x%lX flags 0x%X", > + PCI_BUS_NUM(pdev->devfn), > + PCI_SLOT(pdev->devfn), > + PCI_FUNC(pdev->devfn), > + pasid, > + address, > + flags); > + > + dev = kfd_device_by_pci_dev(pdev); > + if (!WARN_ON(!dev)) > + kfd_signal_iommu_event(dev, pasid, address, > + flags & PPR_FAULT_WRITE, flags & PPR_FAULT_EXEC); > + > + return AMD_IOMMU_INV_PRI_RSP_INVALID; > +} > + > +/* > + * Bind processes do the device that have been temporarily unbound > + * (PDD_BOUND_SUSPENDED) in kfd_unbind_processes_from_device. > + */ > +static int kfd_bind_processes_to_device(struct kfd_dev *kfd) > +{ > + struct kfd_process_device *pdd; > + struct kfd_process *p; > + unsigned int temp; > + int err = 0; > + > + int idx = srcu_read_lock(&kfd_processes_srcu); > + > + hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) { > + mutex_lock(&p->mutex); > + pdd = kfd_get_process_device_data(kfd, p); > + > + if (WARN_ON(!pdd) || pdd->bound != PDD_BOUND_SUSPENDED) { > + mutex_unlock(&p->mutex); > + continue; > + } > + > + err = amd_iommu_bind_pasid(kfd->pdev, p->pasid, > + p->lead_thread); > + if (err < 0) { > + pr_err("Unexpected pasid %d binding failure\n", > + p->pasid); > + mutex_unlock(&p->mutex); > + break; > + } > + > + pdd->bound = PDD_BOUND; > + mutex_unlock(&p->mutex); > + } > + > + srcu_read_unlock(&kfd_processes_srcu, idx); > + > + return err; > +} > + > +/* > + * Mark currently bound processes as PDD_BOUND_SUSPENDED. These > + * processes will be restored to PDD_BOUND state in > + * kfd_bind_processes_to_device. > + */ > +static void kfd_unbind_processes_from_device(struct kfd_dev *kfd) > +{ > + struct kfd_process_device *pdd; > + struct kfd_process *p; > + unsigned int temp; > + > + int idx = srcu_read_lock(&kfd_processes_srcu); > + > + hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) { > + mutex_lock(&p->mutex); > + pdd = kfd_get_process_device_data(kfd, p); > + > + if (WARN_ON(!pdd)) { > + mutex_unlock(&p->mutex); > + continue; > + } > + > + if (pdd->bound == PDD_BOUND) > + pdd->bound = PDD_BOUND_SUSPENDED; > + mutex_unlock(&p->mutex); > + } > + > + srcu_read_unlock(&kfd_processes_srcu, idx); > +} > + > +/** kfd_iommu_suspend - Prepare IOMMU for suspend > + * > + * This unbinds processes from the device and disables the IOMMU for > + * the device. > + */ > +void kfd_iommu_suspend(struct kfd_dev *kfd) > +{ > + if (!kfd->device_info->needs_iommu_device) > + return; > + > + kfd_unbind_processes_from_device(kfd); > + > + amd_iommu_set_invalidate_ctx_cb(kfd->pdev, NULL); > + amd_iommu_set_invalid_ppr_cb(kfd->pdev, NULL); > + amd_iommu_free_device(kfd->pdev); > +} > + > +/** kfd_iommu_resume - Restore IOMMU after resume > + * > + * This reinitializes the IOMMU for the device and re-binds previously > + * suspended processes to the device. > + */ > +int kfd_iommu_resume(struct kfd_dev *kfd) > +{ > + unsigned int pasid_limit; > + int err; > + > + if (!kfd->device_info->needs_iommu_device) > + return 0; > + > + pasid_limit = kfd_get_pasid_limit(); > + > + err = amd_iommu_init_device(kfd->pdev, pasid_limit); > + if (err) > + return -ENXIO; > + > + amd_iommu_set_invalidate_ctx_cb(kfd->pdev, > + iommu_pasid_shutdown_callback); > + amd_iommu_set_invalid_ppr_cb(kfd->pdev, > + iommu_invalid_ppr_cb); > + > + err = kfd_bind_processes_to_device(kfd); > + if (err) { > + amd_iommu_set_invalidate_ctx_cb(kfd->pdev, NULL); > + amd_iommu_set_invalid_ppr_cb(kfd->pdev, NULL); > + amd_iommu_free_device(kfd->pdev); > + return err; > + } > + > + return 0; > +} > + > +extern bool amd_iommu_pc_supported(void); > +extern u8 amd_iommu_pc_get_max_banks(u16 devid); > +extern u8 amd_iommu_pc_get_max_counters(u16 devid); > + > +/** kfd_iommu_add_perf_counters - Add IOMMU performance counters to topology > + */ > +int kfd_iommu_add_perf_counters(struct kfd_topology_device *kdev) > +{ > + struct kfd_perf_properties *props; > + > + if (!(kdev->node_props.capability & HSA_CAP_ATS_PRESENT)) > + return 0; > + > + if (!amd_iommu_pc_supported()) > + return 0; > + > + props = kfd_alloc_struct(props); > + if (!props) > + return -ENOMEM; > + strcpy(props->block_name, "iommu"); > + props->max_concurrent = amd_iommu_pc_get_max_banks(0) * > + amd_iommu_pc_get_max_counters(0); /* assume one iommu */ > + list_add_tail(&props->list, &kdev->perf_props); > + > + return 0; > +} > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_iommu.h b/drivers/gpu/drm/amd/amdkfd/kfd_iommu.h > new file mode 100644 > index 0000000..dd23d9f > --- /dev/null > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_iommu.h > @@ -0,0 +1,78 @@ > +/* > + * Copyright 2018 Advanced Micro Devices, Inc. > + * > + * Permission is hereby granted, free of charge, to any person obtaining a > + * copy of this software and associated documentation files (the "Software"), > + * to deal in the Software without restriction, including without limitation > + * the rights to use, copy, modify, merge, publish, distribute, sublicense, > + * and/or sell copies of the Software, and to permit persons to whom the > + * Software is furnished to do so, subject to the following conditions: > + * > + * The above copyright notice and this permission notice shall be included in > + * all copies or substantial portions of the Software. > + * > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL > + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR > + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, > + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR > + * OTHER DEALINGS IN THE SOFTWARE. > + */ > + > +#ifndef __KFD_IOMMU_H__ > +#define __KFD_IOMMU_H__ > + > +#if defined(CONFIG_AMD_IOMMU_V2_MODULE) || defined(CONFIG_AMD_IOMMU_V2) > + > +#define KFD_SUPPORT_IOMMU_V2 > + > +int kfd_iommu_check_device(struct kfd_dev *kfd); > +int kfd_iommu_device_init(struct kfd_dev *kfd); > + > +int kfd_iommu_bind_process_to_device(struct kfd_process_device *pdd); > +void kfd_iommu_unbind_process(struct kfd_process *p); > + > +void kfd_iommu_suspend(struct kfd_dev *kfd); > +int kfd_iommu_resume(struct kfd_dev *kfd); > + > +int kfd_iommu_add_perf_counters(struct kfd_topology_device *kdev); > + > +#else > + > +static inline int kfd_iommu_check_device(struct kfd_dev *kfd) > +{ > + return -ENODEV; > +} > +static inline int kfd_iommu_device_init(struct kfd_dev *kfd) > +{ > + return 0; > +} > + > +static inline int kfd_iommu_bind_process_to_device( > + struct kfd_process_device *pdd) > +{ > + return 0; > +} > +static inline void kfd_iommu_unbind_process(struct kfd_process *p) > +{ > + /* empty */ > +} > + > +static inline void kfd_iommu_suspend(struct kfd_dev *kfd) > +{ > + /* empty */ > +} > +static inline int kfd_iommu_resume(struct kfd_dev *kfd) > +{ > + return 0; > +} > + > +static inline int kfd_iommu_add_perf_counters(struct kfd_topology_device *kdev) > +{ > + return 0; > +} > + > +#endif /* defined(CONFIG_AMD_IOMMU_V2) */ > + > +#endif /* __KFD_IOMMU_H__ */ > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h > index 594f853..f12eb5d 100644 > --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h > @@ -158,6 +158,7 @@ struct kfd_device_info { > uint8_t num_of_watch_points; > uint16_t mqd_size_aligned; > bool supports_cwsr; > + bool needs_iommu_device; > bool needs_pci_atomics; > }; > > @@ -517,15 +518,15 @@ struct kfd_process_device { > uint64_t scratch_base; > uint64_t scratch_limit; > > - /* Is this process/pasid bound to this device? (amd_iommu_bind_pasid) */ > - enum kfd_pdd_bound bound; > - > /* Flag used to tell the pdd has dequeued from the dqm. > * This is used to prevent dev->dqm->ops.process_termination() from > * being called twice when it is already called in IOMMU callback > * function. > */ > bool already_dequeued; > + > + /* Is this process/pasid bound to this device? (amd_iommu_bind_pasid) */ > + enum kfd_pdd_bound bound; > }; > > #define qpd_to_pdd(x) container_of(x, struct kfd_process_device, qpd) > @@ -590,6 +591,10 @@ struct kfd_process { > bool signal_event_limit_reached; > }; > > +#define KFD_PROCESS_TABLE_SIZE 5 /* bits: 32 entries */ > +extern DECLARE_HASHTABLE(kfd_processes_table, KFD_PROCESS_TABLE_SIZE); > +extern struct srcu_struct kfd_processes_srcu; > + > /** > * Ioctl function type. > * > @@ -617,9 +622,6 @@ void kfd_unref_process(struct kfd_process *p); > > struct kfd_process_device *kfd_bind_process_to_device(struct kfd_dev *dev, > struct kfd_process *p); > -int kfd_bind_processes_to_device(struct kfd_dev *dev); > -void kfd_unbind_processes_from_device(struct kfd_dev *dev); > -void kfd_process_iommu_unbind_callback(struct kfd_dev *dev, unsigned int pasid); > struct kfd_process_device *kfd_get_process_device_data(struct kfd_dev *dev, > struct kfd_process *p); > struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev, > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c > index 4ff5f0f..e9aee76 100644 > --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c > @@ -35,16 +35,16 @@ struct mm_struct; > > #include "kfd_priv.h" > #include "kfd_dbgmgr.h" > +#include "kfd_iommu.h" > > /* > * List of struct kfd_process (field kfd_process). > * Unique/indexed by mm_struct* > */ > -#define KFD_PROCESS_TABLE_SIZE 5 /* bits: 32 entries */ > -static DEFINE_HASHTABLE(kfd_processes_table, KFD_PROCESS_TABLE_SIZE); > +DEFINE_HASHTABLE(kfd_processes_table, KFD_PROCESS_TABLE_SIZE); > static DEFINE_MUTEX(kfd_processes_mutex); > > -DEFINE_STATIC_SRCU(kfd_processes_srcu); > +DEFINE_SRCU(kfd_processes_srcu); > > static struct workqueue_struct *kfd_process_wq; > > @@ -173,14 +173,8 @@ static void kfd_process_wq_release(struct work_struct *work) > { > struct kfd_process *p = container_of(work, struct kfd_process, > release_work); > - struct kfd_process_device *pdd; > > - pr_debug("Releasing process (pasid %d) in workqueue\n", p->pasid); > - > - list_for_each_entry(pdd, &p->per_device_data, per_device_list) { > - if (pdd->bound == PDD_BOUND) > - amd_iommu_unbind_pasid(pdd->dev->pdev, p->pasid); > - } > + kfd_iommu_unbind_process(p); > > kfd_process_destroy_pdds(p); > > @@ -429,133 +423,13 @@ struct kfd_process_device *kfd_bind_process_to_device(struct kfd_dev *dev, > return ERR_PTR(-ENOMEM); > } > > - if (pdd->bound == PDD_BOUND) { > - return pdd; > - } else if (unlikely(pdd->bound == PDD_BOUND_SUSPENDED)) { > - pr_err("Binding PDD_BOUND_SUSPENDED pdd is unexpected!\n"); > - return ERR_PTR(-EINVAL); > - } > - > - err = amd_iommu_bind_pasid(dev->pdev, p->pasid, p->lead_thread); > - if (err < 0) > + err = kfd_iommu_bind_process_to_device(pdd); > + if (err) > return ERR_PTR(err); > > - pdd->bound = PDD_BOUND; > - > return pdd; > } > > -/* > - * Bind processes do the device that have been temporarily unbound > - * (PDD_BOUND_SUSPENDED) in kfd_unbind_processes_from_device. > - */ > -int kfd_bind_processes_to_device(struct kfd_dev *dev) > -{ > - struct kfd_process_device *pdd; > - struct kfd_process *p; > - unsigned int temp; > - int err = 0; > - > - int idx = srcu_read_lock(&kfd_processes_srcu); > - > - hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) { > - mutex_lock(&p->mutex); > - pdd = kfd_get_process_device_data(dev, p); > - > - if (WARN_ON(!pdd) || pdd->bound != PDD_BOUND_SUSPENDED) { > - mutex_unlock(&p->mutex); > - continue; > - } > - > - err = amd_iommu_bind_pasid(dev->pdev, p->pasid, > - p->lead_thread); > - if (err < 0) { > - pr_err("Unexpected pasid %d binding failure\n", > - p->pasid); > - mutex_unlock(&p->mutex); > - break; > - } > - > - pdd->bound = PDD_BOUND; > - mutex_unlock(&p->mutex); > - } > - > - srcu_read_unlock(&kfd_processes_srcu, idx); > - > - return err; > -} > - > -/* > - * Mark currently bound processes as PDD_BOUND_SUSPENDED. These > - * processes will be restored to PDD_BOUND state in > - * kfd_bind_processes_to_device. > - */ > -void kfd_unbind_processes_from_device(struct kfd_dev *dev) > -{ > - struct kfd_process_device *pdd; > - struct kfd_process *p; > - unsigned int temp; > - > - int idx = srcu_read_lock(&kfd_processes_srcu); > - > - hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) { > - mutex_lock(&p->mutex); > - pdd = kfd_get_process_device_data(dev, p); > - > - if (WARN_ON(!pdd)) { > - mutex_unlock(&p->mutex); > - continue; > - } > - > - if (pdd->bound == PDD_BOUND) > - pdd->bound = PDD_BOUND_SUSPENDED; > - mutex_unlock(&p->mutex); > - } > - > - srcu_read_unlock(&kfd_processes_srcu, idx); > -} > - > -void kfd_process_iommu_unbind_callback(struct kfd_dev *dev, unsigned int pasid) > -{ > - struct kfd_process *p; > - struct kfd_process_device *pdd; > - > - /* > - * Look for the process that matches the pasid. If there is no such > - * process, we either released it in amdkfd's own notifier, or there > - * is a bug. Unfortunately, there is no way to tell... > - */ > - p = kfd_lookup_process_by_pasid(pasid); > - if (!p) > - return; > - > - pr_debug("Unbinding process %d from IOMMU\n", pasid); > - > - mutex_lock(kfd_get_dbgmgr_mutex()); > - > - if (dev->dbgmgr && dev->dbgmgr->pasid == p->pasid) { > - if (!kfd_dbgmgr_unregister(dev->dbgmgr, p)) { > - kfd_dbgmgr_destroy(dev->dbgmgr); > - dev->dbgmgr = NULL; > - } > - } > - > - mutex_unlock(kfd_get_dbgmgr_mutex()); > - > - mutex_lock(&p->mutex); > - > - pdd = kfd_get_process_device_data(dev, p); > - if (pdd) > - /* For GPU relying on IOMMU, we need to dequeue here > - * when PASID is still bound. > - */ > - kfd_process_dequeue_from_device(pdd); > - > - mutex_unlock(&p->mutex); > - > - kfd_unref_process(p); > -} > - > struct kfd_process_device *kfd_get_first_process_device_data( > struct kfd_process *p) > { > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c > index 7783250..2506155 100644 > --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c > @@ -35,6 +35,7 @@ > #include "kfd_crat.h" > #include "kfd_topology.h" > #include "kfd_device_queue_manager.h" > +#include "kfd_iommu.h" > > /* topology_device_list - Master list of all topology devices */ > static struct list_head topology_device_list; > @@ -875,19 +876,8 @@ static void find_system_memory(const struct dmi_header *dm, > */ > static int kfd_add_perf_to_topology(struct kfd_topology_device *kdev) > { > - struct kfd_perf_properties *props; > - > - if (amd_iommu_pc_supported()) { > - props = kfd_alloc_struct(props); > - if (!props) > - return -ENOMEM; > - strcpy(props->block_name, "iommu"); > - props->max_concurrent = amd_iommu_pc_get_max_banks(0) * > - amd_iommu_pc_get_max_counters(0); /* assume one iommu */ > - list_add_tail(&props->list, &kdev->perf_props); > - } > - > - return 0; > + /* These are the only counters supported so far */ > + return kfd_iommu_add_perf_counters(kdev); > } > > /* kfd_add_non_crat_information - Add information that is not currently > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h > index 53fca1f..c0be2be 100644 > --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h > @@ -25,7 +25,7 @@ > > #include <linux/types.h> > #include <linux/list.h> > -#include "kfd_priv.h" > +#include "kfd_crat.h" > > #define KFD_TOPOLOGY_PUBLIC_NAME_SIZE 128 > > @@ -183,8 +183,4 @@ struct kfd_topology_device *kfd_create_topology_device( > struct list_head *device_list); > void kfd_release_topology_device_list(struct list_head *device_list); > > -extern bool amd_iommu_pc_supported(void); > -extern u8 amd_iommu_pc_get_max_banks(u16 devid); > -extern u8 amd_iommu_pc_get_max_counters(u16 devid); > - > #endif /* __KFD_TOPOLOGY_H__ */