[PATCH 16/25] drm/amdkfd: Implement KFD process eviction/restore

oded.gabbay@xxxxxxxxx (Oded Gabbay) · Mon, 12 Feb 2018 11:36:37 +0200



On Wed, Feb 7, 2018 at 3:32 AM, Felix Kuehling <Felix.Kuehling at amd.com> wrote:
> When the TTM memory manager in KGD evicts BOs, all user mode queues
> potentially accessing these BOs must be evicted temporarily. Once
> user mode queues are evicted, the eviction fence is signaled,
> allowing the migration of the BO to proceed.
>
> A delayed worker is scheduled to restore all the BOs belonging to
> the evicted process and restart its queues.
>
> During suspend/resume of the GPU we also evict all processes to allow
> KGD to save BOs in system memory, since VRAM will be lost.
>
> v2:
> * Account for eviction when updating of q->is_active in MQD manager
>
> Signed-off-by: Harish Kasiviswanathan <Harish.Kasiviswanathan at amd.com>
> Signed-off-by: Felix Kuehling <Felix.Kuehling at amd.com>
> ---
>  drivers/gpu/drm/amd/amdkfd/kfd_device.c            |  65 +++++-
>  .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c  | 219 ++++++++++++++++++++-
>  .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.h  |   9 +
>  drivers/gpu/drm/amd/amdkfd/kfd_module.c            |   2 +
>  drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c   |   9 +-
>  drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c    |   6 +-
>  drivers/gpu/drm/amd/amdkfd/kfd_priv.h              |  32 ++-
>  drivers/gpu/drm/amd/amdkfd/kfd_process.c           | 213 ++++++++++++++++++++
>  8 files changed, 547 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> index 4ac2d61..334669996 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
> @@ -33,6 +33,7 @@
>  #include "kfd_iommu.h"
>
>  #define MQD_SIZE_ALIGNED 768
> +static atomic_t kfd_device_suspended = ATOMIC_INIT(0);
>
>  #ifdef KFD_SUPPORT_IOMMU_V2
>  static const struct kfd_device_info kaveri_device_info = {
> @@ -469,6 +470,10 @@ void kgd2kfd_suspend(struct kfd_dev *kfd)
>         if (!kfd->init_complete)
>                 return;
>
> +       /* For first KFD device suspend all the KFD processes */
> +       if (atomic_inc_return(&kfd_device_suspended) == 1)
> +               kfd_suspend_all_processes();
> +
>         kfd->dqm->ops.stop(kfd->dqm);
>
>         kfd_iommu_suspend(kfd);
> @@ -476,11 +481,21 @@ void kgd2kfd_suspend(struct kfd_dev *kfd)
>
>  int kgd2kfd_resume(struct kfd_dev *kfd)
>  {
> +       int ret, count;
> +
>         if (!kfd->init_complete)
>                 return 0;
>
> -       return kfd_resume(kfd);
> +       ret = kfd_resume(kfd);
> +       if (ret)
> +               return ret;
> +
> +       count = atomic_dec_return(&kfd_device_suspended);
> +       WARN_ONCE(count < 0, "KFD suspend / resume ref. error");
> +       if (count == 0)
> +               ret = kfd_resume_all_processes();
>
> +       return ret;
>  }
>
>  static int kfd_resume(struct kfd_dev *kfd)
> @@ -526,6 +541,54 @@ void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry)
>         spin_unlock(&kfd->interrupt_lock);
>  }
>
> +/** kgd2kfd_schedule_evict_and_restore_process - Schedules work queue that will
> + *   prepare for safe eviction of KFD BOs that belong to the specified
> + *   process.
> + *
> + * @mm: mm_struct that identifies the specified KFD process
> + * @fence: eviction fence attached to KFD process BOs
> + *
> + */
> +int kgd2kfd_schedule_evict_and_restore_process(struct mm_struct *mm,
> +                                              struct dma_fence *fence)
> +{
> +       struct kfd_process *p;
> +       unsigned long active_time;
> +       unsigned long delay_jiffies = msecs_to_jiffies(PROCESS_ACTIVE_TIME_MS);
> +
> +       if (!fence)
> +               return -EINVAL;
> +
> +       if (dma_fence_is_signaled(fence))
> +               return 0;
> +
> +       p = kfd_lookup_process_by_mm(mm);
> +       if (!p)
> +               return -ENODEV;
> +
> +       if (fence->seqno == p->last_eviction_seqno)
> +               goto out;
> +
> +       p->last_eviction_seqno = fence->seqno;
> +
> +       /* Avoid KFD process starvation. Wait for at least
> +        * PROCESS_ACTIVE_TIME_MS before evicting the process again
> +        */
> +       active_time = get_jiffies_64() - p->last_restore_timestamp;
> +       if (delay_jiffies > active_time)
> +               delay_jiffies -= active_time;
> +       else
> +               delay_jiffies = 0;
> +
> +       /* During process initialization eviction_work.dwork is initialized
> +        * to kfd_evict_bo_worker
> +        */
> +       schedule_delayed_work(&p->eviction_work, delay_jiffies);
> +out:
> +       kfd_unref_process(p);
> +       return 0;
> +}
> +
>  static int kfd_gtt_sa_init(struct kfd_dev *kfd, unsigned int buf_size,
>                                 unsigned int chunk_size)
>  {
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> index b7d0639..b3b6dab 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
> @@ -21,10 +21,11 @@
>   *
>   */
>
> +#include <linux/ratelimit.h>
> +#include <linux/printk.h>
>  #include <linux/slab.h>
>  #include <linux/list.h>
>  #include <linux/types.h>
> -#include <linux/printk.h>
>  #include <linux/bitops.h>
>  #include <linux/sched.h>
>  #include "kfd_priv.h"
> @@ -180,6 +181,14 @@ static int create_queue_nocpsch(struct device_queue_manager *dqm,
>                         goto out_unlock;
>         }
>         q->properties.vmid = qpd->vmid;
> +       /*
> +        * Eviction state logic: we only mark active queues as evicted
> +        * to avoid the overhead of restoring inactive queues later
> +        */
> +       if (qpd->evicted)
> +               q->properties.is_evicted = (q->properties.queue_size > 0 &&
> +                                           q->properties.queue_percent > 0 &&
> +                                           q->properties.queue_address != 0);
>
>         q->properties.tba_addr = qpd->tba_addr;
>         q->properties.tma_addr = qpd->tma_addr;
> @@ -377,15 +386,29 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q)
>  {
>         int retval;
>         struct mqd_manager *mqd;
> +       struct kfd_process_device *pdd;
>         bool prev_active = false;
>
>         mutex_lock(&dqm->lock);
> +       pdd = kfd_get_process_device_data(q->device, q->process);
> +       if (!pdd) {
> +               retval = -ENODEV;
> +               goto out_unlock;
> +       }
>         mqd = dqm->ops.get_mqd_manager(dqm,
>                         get_mqd_type_from_queue_type(q->properties.type));
>         if (!mqd) {
>                 retval = -ENOMEM;
>                 goto out_unlock;
>         }
> +       /*
> +        * Eviction state logic: we only mark active queues as evicted
> +        * to avoid the overhead of restoring inactive queues later
> +        */
> +       if (pdd->qpd.evicted)
> +               q->properties.is_evicted = (q->properties.queue_size > 0 &&
> +                                           q->properties.queue_percent > 0 &&
> +                                           q->properties.queue_address != 0);
>
>         /* Save previous activity state for counters */
>         prev_active = q->properties.is_active;
> @@ -457,6 +480,187 @@ static struct mqd_manager *get_mqd_manager(
>         return mqd;
>  }
>
> +static int evict_process_queues_nocpsch(struct device_queue_manager *dqm,
> +                                       struct qcm_process_device *qpd)
> +{
> +       struct queue *q;
> +       struct mqd_manager *mqd;
> +       struct kfd_process_device *pdd;
> +       int retval = 0;
> +
> +       mutex_lock(&dqm->lock);
> +       if (qpd->evicted++ > 0) /* already evicted, do nothing */
> +               goto out;
> +
> +       pdd = qpd_to_pdd(qpd);
> +       pr_info_ratelimited("Evicting PASID %u queues\n",
> +                           pdd->process->pasid);
> +
> +       /* unactivate all active queues on the qpd */
> +       list_for_each_entry(q, &qpd->queues_list, list) {
> +               if (!q->properties.is_active)
> +                       continue;
> +               mqd = dqm->ops.get_mqd_manager(dqm,
> +                       get_mqd_type_from_queue_type(q->properties.type));
> +               if (!mqd) { /* should not be here */
> +                       pr_err("Cannot evict queue, mqd mgr is NULL\n");
> +                       retval = -ENOMEM;
> +                       goto out;
> +               }
> +               q->properties.is_evicted = true;
> +               q->properties.is_active = false;
> +               retval = mqd->destroy_mqd(mqd, q->mqd,
> +                               KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN,
> +                               KFD_UNMAP_LATENCY_MS, q->pipe, q->queue);
> +               if (retval)
> +                       goto out;
> +               dqm->queue_count--;
> +       }
> +
> +out:
> +       mutex_unlock(&dqm->lock);
> +       return retval;
> +}
> +
> +static int evict_process_queues_cpsch(struct device_queue_manager *dqm,
> +                                     struct qcm_process_device *qpd)
> +{
> +       struct queue *q;
> +       struct kfd_process_device *pdd;
> +       int retval = 0;
> +
> +       mutex_lock(&dqm->lock);
> +       if (qpd->evicted++ > 0) /* already evicted, do nothing */
> +               goto out;
> +
> +       pdd = qpd_to_pdd(qpd);
> +       pr_info_ratelimited("Evicting PASID %u queues\n",
> +                           pdd->process->pasid);
> +
> +       /* unactivate all active queues on the qpd */
> +       list_for_each_entry(q, &qpd->queues_list, list) {
> +               if (!q->properties.is_active)
> +                       continue;
> +               q->properties.is_evicted = true;
> +               q->properties.is_active = false;
> +               dqm->queue_count--;
> +       }
> +       retval = execute_queues_cpsch(dqm,
> +                               qpd->is_debug ?
> +                               KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES :
> +                               KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
> +
> +out:
> +       mutex_unlock(&dqm->lock);
> +       return retval;
> +}
> +
> +static int restore_process_queues_nocpsch(struct device_queue_manager *dqm,
> +                                         struct qcm_process_device *qpd)
> +{
> +       struct queue *q;
> +       struct mqd_manager *mqd;
> +       struct kfd_process_device *pdd;
> +       uint32_t pd_base;
> +       int retval = 0;
> +
> +       pdd = qpd_to_pdd(qpd);
> +       /* Retrieve PD base */
> +       pd_base = dqm->dev->kfd2kgd->get_process_page_dir(pdd->vm);
> +
> +       mutex_lock(&dqm->lock);
> +       if (WARN_ON_ONCE(!qpd->evicted)) /* already restored, do nothing */
> +               goto out;
> +       if (qpd->evicted > 1) { /* ref count still > 0, decrement & quit */
> +               qpd->evicted--;
> +               goto out;
> +       }
> +
> +       pr_info_ratelimited("Restoring PASID %u queues\n",
> +                           pdd->process->pasid);
> +
> +       /* Update PD Base in QPD */
> +       qpd->page_table_base = pd_base;
> +       pr_debug("Updated PD address to 0x%08x\n", pd_base);
> +
> +       if (!list_empty(&qpd->queues_list)) {
> +               dqm->dev->kfd2kgd->set_vm_context_page_table_base(
> +                               dqm->dev->kgd,
> +                               qpd->vmid,
> +                               qpd->page_table_base);
> +               kfd_flush_tlb(pdd);
> +       }
> +
> +       /* activate all active queues on the qpd */
> +       list_for_each_entry(q, &qpd->queues_list, list) {
> +               if (!q->properties.is_evicted)
> +                       continue;
> +               mqd = dqm->ops.get_mqd_manager(dqm,
> +                       get_mqd_type_from_queue_type(q->properties.type));
> +               if (!mqd) { /* should not be here */
> +                       pr_err("Cannot restore queue, mqd mgr is NULL\n");
> +                       retval = -ENOMEM;
> +                       goto out;
> +               }
> +               q->properties.is_evicted = false;
> +               q->properties.is_active = true;
> +               retval = mqd->load_mqd(mqd, q->mqd, q->pipe,
> +                                      q->queue, &q->properties,
> +                                      q->process->mm);
> +               if (retval)
> +                       goto out;
> +               dqm->queue_count++;
> +       }
> +       qpd->evicted = 0;
> +out:
> +       mutex_unlock(&dqm->lock);
> +       return retval;
> +}
> +
> +static int restore_process_queues_cpsch(struct device_queue_manager *dqm,
> +                                       struct qcm_process_device *qpd)
> +{
> +       struct queue *q;
> +       struct kfd_process_device *pdd;
> +       uint32_t pd_base;
> +       int retval = 0;
> +
> +       pdd = qpd_to_pdd(qpd);
> +       /* Retrieve PD base */
> +       pd_base = dqm->dev->kfd2kgd->get_process_page_dir(pdd->vm);
> +
> +       mutex_lock(&dqm->lock);
> +       if (WARN_ON_ONCE(!qpd->evicted)) /* already restored, do nothing */
> +               goto out;
> +       if (qpd->evicted > 1) { /* ref count still > 0, decrement & quit */
> +               qpd->evicted--;
> +               goto out;
> +       }
> +
> +       pr_info_ratelimited("Restoring PASID %u queues\n",
> +                           pdd->process->pasid);
> +
> +       /* Update PD Base in QPD */
> +       qpd->page_table_base = pd_base;
> +       pr_debug("Updated PD address to 0x%08x\n", pd_base);
> +
> +       /* activate all active queues on the qpd */
> +       list_for_each_entry(q, &qpd->queues_list, list) {
> +               if (!q->properties.is_evicted)
> +                       continue;
> +               q->properties.is_evicted = false;
> +               q->properties.is_active = true;
> +               dqm->queue_count++;
> +       }
> +       retval = execute_queues_cpsch(dqm,
> +                               KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
> +       if (!retval)
> +               qpd->evicted = 0;
> +out:
> +       mutex_unlock(&dqm->lock);
> +       return retval;
> +}
> +
>  static int register_process(struct device_queue_manager *dqm,
>                                         struct qcm_process_device *qpd)
>  {
> @@ -853,6 +1057,14 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
>                 retval = -ENOMEM;
>                 goto out;
>         }
> +       /*
> +        * Eviction state logic: we only mark active queues as evicted
> +        * to avoid the overhead of restoring inactive queues later
> +        */
> +       if (qpd->evicted)
> +               q->properties.is_evicted = (q->properties.queue_size > 0 &&
> +                                           q->properties.queue_percent > 0 &&
> +                                           q->properties.queue_address != 0);
>
>         dqm->asic_ops.init_sdma_vm(dqm, q, qpd);
>
> @@ -1291,6 +1503,8 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
>                 dqm->ops.set_cache_memory_policy = set_cache_memory_policy;
>                 dqm->ops.set_trap_handler = set_trap_handler;
>                 dqm->ops.process_termination = process_termination_cpsch;
> +               dqm->ops.evict_process_queues = evict_process_queues_cpsch;
> +               dqm->ops.restore_process_queues = restore_process_queues_cpsch;
>                 break;
>         case KFD_SCHED_POLICY_NO_HWS:
>                 /* initialize dqm for no cp scheduling */
> @@ -1307,6 +1521,9 @@ struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
>                 dqm->ops.set_cache_memory_policy = set_cache_memory_policy;
>                 dqm->ops.set_trap_handler = set_trap_handler;
>                 dqm->ops.process_termination = process_termination_nocpsch;
> +               dqm->ops.evict_process_queues = evict_process_queues_nocpsch;
> +               dqm->ops.restore_process_queues =
> +                       restore_process_queues_nocpsch;
>                 break;
>         default:
>                 pr_err("Invalid scheduling policy %d\n", dqm->sched_policy);
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
> index 68be0aa..412beff 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
> @@ -79,6 +79,10 @@ struct device_process_node {
>   *
>   * @process_termination: Clears all process queues belongs to that device.
>   *
> + * @evict_process_queues: Evict all active queues of a process
> + *
> + * @restore_process_queues: Restore all evicted queues queues of a process
> + *
>   */
>
>  struct device_queue_manager_ops {
> @@ -129,6 +133,11 @@ struct device_queue_manager_ops {
>
>         int (*process_termination)(struct device_queue_manager *dqm,
>                         struct qcm_process_device *qpd);
> +
> +       int (*evict_process_queues)(struct device_queue_manager *dqm,
> +                                   struct qcm_process_device *qpd);
> +       int (*restore_process_queues)(struct device_queue_manager *dqm,
> +                                     struct qcm_process_device *qpd);
>  };
>
>  struct device_queue_manager_asic_ops {
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_module.c b/drivers/gpu/drm/amd/amdkfd/kfd_module.c
> index 3ac72be..65574c6 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_module.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_module.c
> @@ -43,6 +43,8 @@ static const struct kgd2kfd_calls kgd2kfd = {
>         .interrupt      = kgd2kfd_interrupt,
>         .suspend        = kgd2kfd_suspend,
>         .resume         = kgd2kfd_resume,
> +       .schedule_evict_and_restore_process =
> +                         kgd2kfd_schedule_evict_and_restore_process,
>  };
>
>  int sched_policy = KFD_SCHED_POLICY_HWS;
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
> index fbe3f83..c00c325 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
> @@ -202,7 +202,8 @@ static int __update_mqd(struct mqd_manager *mm, void *mqd,
>
>         q->is_active = (q->queue_size > 0 &&
>                         q->queue_address != 0 &&
> -                       q->queue_percent > 0);
> +                       q->queue_percent > 0 &&
> +                       !q->is_evicted);
>
>         return 0;
>  }
> @@ -245,7 +246,8 @@ static int update_mqd_sdma(struct mqd_manager *mm, void *mqd,
>
>         q->is_active = (q->queue_size > 0 &&
>                         q->queue_address != 0 &&
> -                       q->queue_percent > 0);
> +                       q->queue_percent > 0 &&
> +                       !q->is_evicted);
>
>         return 0;
>  }
> @@ -377,7 +379,8 @@ static int update_mqd_hiq(struct mqd_manager *mm, void *mqd,
>
>         q->is_active = (q->queue_size > 0 &&
>                         q->queue_address != 0 &&
> -                       q->queue_percent > 0);
> +                       q->queue_percent > 0 &&
> +                       !q->is_evicted);
>
>         return 0;
>  }
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
> index 58221c1..89e4242 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
> @@ -198,7 +198,8 @@ static int __update_mqd(struct mqd_manager *mm, void *mqd,
>
>         q->is_active = (q->queue_size > 0 &&
>                         q->queue_address != 0 &&
> -                       q->queue_percent > 0);
> +                       q->queue_percent > 0 &&
> +                       !q->is_evicted);
>
>         return 0;
>  }
> @@ -342,7 +343,8 @@ static int update_mqd_sdma(struct mqd_manager *mm, void *mqd,
>
>         q->is_active = (q->queue_size > 0 &&
>                         q->queue_address != 0 &&
> -                       q->queue_percent > 0);
> +                       q->queue_percent > 0 &&
> +                       !q->is_evicted);
>
>         return 0;
>  }
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> index 56c2e36..cac7aa2 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
> @@ -335,7 +335,11 @@ enum kfd_queue_format {
>   * @is_interop: Defines if this is a interop queue. Interop queue means that
>   * the queue can access both graphics and compute resources.
>   *
> - * @is_active: Defines if the queue is active or not.
> + * @is_evicted: Defines if the queue is evicted. Only active queues
> + * are evicted, rendering them inactive.
> + *
> + * @is_active: Defines if the queue is active or not. @is_active and
> + * @is_evicted are protected by the DQM lock.
>   *
>   * @vmid: If the scheduling mode is no cp scheduling the field defines the vmid
>   * of the queue.
> @@ -357,6 +361,7 @@ struct queue_properties {
>         uint32_t __iomem *doorbell_ptr;
>         uint32_t doorbell_off;
>         bool is_interop;
> +       bool is_evicted;
>         bool is_active;
>         /* Not relevant for user mode queues in cp scheduling */
>         unsigned int vmid;
> @@ -460,6 +465,7 @@ struct qcm_process_device {
>         unsigned int queue_count;
>         unsigned int vmid;
>         bool is_debug;
> +       unsigned int evicted; /* eviction counter, 0=active */
>
>         /* This flag tells if we should reset all wavefronts on
>          * process termination
> @@ -486,6 +492,17 @@ struct qcm_process_device {
>         uint64_t tma_addr;
>  };
>
> +/* KFD Memory Eviction */
> +
> +/* Approx. wait time before attempting to restore evicted BOs */
> +#define PROCESS_RESTORE_TIME_MS 100
> +/* Approx. back off time if restore fails due to lack of memory */
> +#define PROCESS_BACK_OFF_TIME_MS 100
> +/* Approx. time before evicting the process again */
> +#define PROCESS_ACTIVE_TIME_MS 10
> +
> +int kgd2kfd_schedule_evict_and_restore_process(struct mm_struct *mm,
> +                                              struct dma_fence *fence);
>
>  enum kfd_pdd_bound {
>         PDD_UNBOUND = 0,
> @@ -600,6 +617,16 @@ struct kfd_process {
>          * during restore
>          */
>         struct dma_fence *ef;
> +
> +       /* Work items for evicting and restoring BOs */
> +       struct delayed_work eviction_work;
> +       struct delayed_work restore_work;
> +       /* seqno of the last scheduled eviction */
> +       unsigned int last_eviction_seqno;
> +       /* Approx. the last timestamp (in jiffies) when the process was
> +        * restored after an eviction
> +        */
> +       unsigned long last_restore_timestamp;
>  };
>
>  #define KFD_PROCESS_TABLE_SIZE 5 /* bits: 32 entries */
> @@ -629,7 +656,10 @@ void kfd_process_destroy_wq(void);
>  struct kfd_process *kfd_create_process(struct file *filep);
>  struct kfd_process *kfd_get_process(const struct task_struct *);
>  struct kfd_process *kfd_lookup_process_by_pasid(unsigned int pasid);
> +struct kfd_process *kfd_lookup_process_by_mm(const struct mm_struct *mm);
>  void kfd_unref_process(struct kfd_process *p);
> +void kfd_suspend_all_processes(void);
> +int kfd_resume_all_processes(void);
>
>  struct kfd_process_device *kfd_bind_process_to_device(struct kfd_dev *dev,
>                                                 struct kfd_process *p);
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> index cf4fa25..18b2b86 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
> @@ -55,6 +55,9 @@ static struct kfd_process *create_process(const struct task_struct *thread,
>                                         struct file *filep);
>  static int kfd_process_init_cwsr(struct kfd_process *p, struct file *filep);
>
> +static void evict_process_worker(struct work_struct *work);
> +static void restore_process_worker(struct work_struct *work);
> +
>
>  void kfd_process_create_wq(void)
>  {
> @@ -230,6 +233,9 @@ static void kfd_process_notifier_release(struct mmu_notifier *mn,
>         mutex_unlock(&kfd_processes_mutex);
>         synchronize_srcu(&kfd_processes_srcu);
>
> +       cancel_delayed_work_sync(&p->eviction_work);
> +       cancel_delayed_work_sync(&p->restore_work);
> +
>         mutex_lock(&p->mutex);
>
>         /* Iterate over all process device data structures and if the
> @@ -351,6 +357,10 @@ static struct kfd_process *create_process(const struct task_struct *thread,
>         if (err != 0)
>                 goto err_init_apertures;
>
> +       INIT_DELAYED_WORK(&process->eviction_work, evict_process_worker);
> +       INIT_DELAYED_WORK(&process->restore_work, restore_process_worker);
> +       process->last_restore_timestamp = get_jiffies_64();
> +
>         err = kfd_process_init_cwsr(process, filep);
>         if (err)
>                 goto err_init_cwsr;
> @@ -402,6 +412,7 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev,
>         INIT_LIST_HEAD(&pdd->qpd.priv_queue_list);
>         pdd->qpd.dqm = dev->dqm;
>         pdd->qpd.pqm = &p->pqm;
> +       pdd->qpd.evicted = 0;
>         pdd->process = p;
>         pdd->bound = PDD_UNBOUND;
>         pdd->already_dequeued = false;
> @@ -490,6 +501,208 @@ struct kfd_process *kfd_lookup_process_by_pasid(unsigned int pasid)
>         return ret_p;
>  }
>
> +/* This increments the process->ref counter. */
> +struct kfd_process *kfd_lookup_process_by_mm(const struct mm_struct *mm)
> +{
> +       struct kfd_process *p;
> +
> +       int idx = srcu_read_lock(&kfd_processes_srcu);
> +
> +       p = find_process_by_mm(mm);
> +       if (p)
> +               kref_get(&p->ref);
> +
> +       srcu_read_unlock(&kfd_processes_srcu, idx);
> +
> +       return p;
> +}
> +
> +/* process_evict_queues - Evict all user queues of a process
> + *
> + * Eviction is reference-counted per process-device. This means multiple
> + * evictions from different sources can be nested safely.
> + */
> +static int process_evict_queues(struct kfd_process *p)
> +{
> +       struct kfd_process_device *pdd;
> +       int r = 0;
> +       unsigned int n_evicted = 0;
> +
> +       list_for_each_entry(pdd, &p->per_device_data, per_device_list) {
> +               r = pdd->dev->dqm->ops.evict_process_queues(pdd->dev->dqm,
> +                                                           &pdd->qpd);
> +               if (r) {
> +                       pr_err("Failed to evict process queues\n");
> +                       goto fail;
> +               }
> +               n_evicted++;
> +       }
> +
> +       return r;
> +
> +fail:
> +       /* To keep state consistent, roll back partial eviction by
> +        * restoring queues
> +        */
> +       list_for_each_entry(pdd, &p->per_device_data, per_device_list) {
> +               if (n_evicted == 0)
> +                       break;
> +               if (pdd->dev->dqm->ops.restore_process_queues(pdd->dev->dqm,
> +                                                             &pdd->qpd))
> +                       pr_err("Failed to restore queues\n");
> +
> +               n_evicted--;
> +       }
> +
> +       return r;
> +}
> +
> +/* process_restore_queues - Restore all user queues of a process */
> +static  int process_restore_queues(struct kfd_process *p)
> +{
> +       struct kfd_process_device *pdd;
> +       int r, ret = 0;
> +
> +       list_for_each_entry(pdd, &p->per_device_data, per_device_list) {
> +               r = pdd->dev->dqm->ops.restore_process_queues(pdd->dev->dqm,
> +                                                             &pdd->qpd);
> +               if (r) {
> +                       pr_err("Failed to restore process queues\n");
> +                       if (!ret)
> +                               ret = r;
> +               }
> +       }
> +
> +       return ret;
> +}
> +
> +static void evict_process_worker(struct work_struct *work)
> +{
> +       int ret;
> +       struct kfd_process *p;
> +       struct delayed_work *dwork;
> +
> +       dwork = to_delayed_work(work);
> +
> +       /* Process termination destroys this worker thread. So during the
> +        * lifetime of this thread, kfd_process p will be valid
> +        */
> +       p = container_of(dwork, struct kfd_process, eviction_work);
> +       WARN_ONCE(p->last_eviction_seqno != p->ef->seqno,
> +                 "Eviction fence mismatch\n");
> +
> +       /* Narrow window of overlap between restore and evict work
> +        * item is possible. Once amdgpu_amdkfd_gpuvm_restore_process_bos
> +        * unreserves KFD BOs, it is possible to evicted again. But
> +        * restore has few more steps of finish. So lets wait for any
> +        * previous restore work to complete
> +        */
> +       flush_delayed_work(&p->restore_work);
> +
> +       pr_debug("Started evicting pasid %d\n", p->pasid);
> +       ret = process_evict_queues(p);
> +       if (!ret) {
> +               dma_fence_signal(p->ef);
> +               dma_fence_put(p->ef);
> +               p->ef = NULL;
> +               schedule_delayed_work(&p->restore_work,
> +                               msecs_to_jiffies(PROCESS_RESTORE_TIME_MS));
> +
> +               pr_debug("Finished evicting pasid %d\n", p->pasid);
> +       } else
> +               pr_err("Failed to evict queues of pasid %d\n", p->pasid);
> +}
> +
> +static void restore_process_worker(struct work_struct *work)
> +{
> +       struct delayed_work *dwork;
> +       struct kfd_process *p;
> +       struct kfd_process_device *pdd;
> +       int ret = 0;
> +
> +       dwork = to_delayed_work(work);
> +
> +       /* Process termination destroys this worker thread. So during the
> +        * lifetime of this thread, kfd_process p will be valid
> +        */
> +       p = container_of(dwork, struct kfd_process, restore_work);
> +
> +       /* Call restore_process_bos on the first KGD device. This function
> +        * takes care of restoring the whole process including other devices.
> +        * Restore can fail if enough memory is not available. If so,
> +        * reschedule again.
> +        */
> +       pdd = list_first_entry(&p->per_device_data,
> +                              struct kfd_process_device,
> +                              per_device_list);
> +
> +       pr_debug("Started restoring pasid %d\n", p->pasid);
> +
> +       /* Setting last_restore_timestamp before successful restoration.
> +        * Otherwise this would have to be set by KGD (restore_process_bos)
> +        * before KFD BOs are unreserved. If not, the process can be evicted
> +        * again before the timestamp is set.
> +        * If restore fails, the timestamp will be set again in the next
> +        * attempt. This would mean that the minimum GPU quanta would be
> +        * PROCESS_ACTIVE_TIME_MS - (time to execute the following two
> +        * functions)
> +        */
> +
> +       p->last_restore_timestamp = get_jiffies_64();
> +       ret = pdd->dev->kfd2kgd->restore_process_bos(p->kgd_process_info,
> +                                                    &p->ef);
> +       if (ret) {
> +               pr_debug("Failed to restore BOs of pasid %d, retry after %d ms\n",
> +                        p->pasid, PROCESS_BACK_OFF_TIME_MS);
> +               ret = schedule_delayed_work(&p->restore_work,
> +                               msecs_to_jiffies(PROCESS_BACK_OFF_TIME_MS));
> +               WARN(!ret, "reschedule restore work failed\n");
> +               return;
> +       }
> +
> +       ret = process_restore_queues(p);
> +       if (!ret)
> +               pr_debug("Finished restoring pasid %d\n", p->pasid);
> +       else
> +               pr_err("Failed to restore queues of pasid %d\n", p->pasid);
> +}
> +
> +void kfd_suspend_all_processes(void)
> +{
> +       struct kfd_process *p;
> +       unsigned int temp;
> +       int idx = srcu_read_lock(&kfd_processes_srcu);
> +
> +       hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
> +               cancel_delayed_work_sync(&p->eviction_work);
> +               cancel_delayed_work_sync(&p->restore_work);
> +
> +               if (process_evict_queues(p))
> +                       pr_err("Failed to suspend process %d\n", p->pasid);
> +               dma_fence_signal(p->ef);
> +               dma_fence_put(p->ef);
> +               p->ef = NULL;
> +       }
> +       srcu_read_unlock(&kfd_processes_srcu, idx);
> +}
> +
> +int kfd_resume_all_processes(void)
> +{
> +       struct kfd_process *p;
> +       unsigned int temp;
> +       int ret = 0, idx = srcu_read_lock(&kfd_processes_srcu);
> +
> +       hash_for_each_rcu(kfd_processes_table, temp, p, kfd_processes) {
> +               if (!schedule_delayed_work(&p->restore_work, 0)) {
> +                       pr_err("Restore process %d failed during resume\n",
> +                              p->pasid);
> +                       ret = -EFAULT;
> +               }
> +       }
> +       srcu_read_unlock(&kfd_processes_srcu, idx);
> +       return ret;
> +}
> +
>  int kfd_reserved_mem_mmap(struct kfd_process *process,
>                           struct vm_area_struct *vma)
>  {
> --
> 2.7.4
>

This patch is:
Acked-by: Oded Gabbay <oded.gabbay at gmail.com>