Re: [PATCH V6 07/10] accel/amdxdna: Add command execution

Lizhi Hou <lizhi.hou@xxxxxxx> · Wed, 6 Nov 2024 10:23:21 -0800

On 11/3/24 22:31, Matthew Brost wrote:
On Wed, Oct 30, 2024 at 08:51:44AM -0700, Lizhi Hou wrote:
Add interfaces for user application to submit command and wait for its
completion.

Co-developed-by: Min Ma <min.ma@xxxxxxx>
Signed-off-by: Min Ma <min.ma@xxxxxxx>
Signed-off-by: Lizhi Hou <lizhi.hou@xxxxxxx>
---
  drivers/accel/amdxdna/aie2_ctx.c              | 664 +++++++++++++++++-
  drivers/accel/amdxdna/aie2_message.c          | 343 +++++++++
  drivers/accel/amdxdna/aie2_pci.c              |   5 +
  drivers/accel/amdxdna/aie2_pci.h              |  35 +
  drivers/accel/amdxdna/aie2_psp.c              |   2 +
  drivers/accel/amdxdna/aie2_smu.c              |   2 +
  drivers/accel/amdxdna/amdxdna_ctx.c           | 330 ++++++++-
  drivers/accel/amdxdna/amdxdna_ctx.h           | 111 +++
  drivers/accel/amdxdna/amdxdna_gem.c           |  10 +
  drivers/accel/amdxdna/amdxdna_gem.h           |   1 +
  .../accel/amdxdna/amdxdna_mailbox_helper.c    |   5 +
  drivers/accel/amdxdna/amdxdna_pci_drv.c       |   5 +
  drivers/accel/amdxdna/amdxdna_pci_drv.h       |   4 +
  drivers/accel/amdxdna/amdxdna_sysfs.c         |   5 +
  drivers/accel/amdxdna/npu1_regs.c             |   1 +
  drivers/accel/amdxdna/npu2_regs.c             |   1 +
  drivers/accel/amdxdna/npu4_regs.c             |   1 +
  drivers/accel/amdxdna/npu5_regs.c             |   1 +
  include/trace/events/amdxdna.h                |  41 ++
  include/uapi/drm/amdxdna_accel.h              |  38 +
  20 files changed, 1596 insertions(+), 9 deletions(-)

diff --git a/drivers/accel/amdxdna/aie2_ctx.c b/drivers/accel/amdxdna/aie2_ctx.c
index 617fc05077d9..c3ac668e16ab 100644
--- a/drivers/accel/amdxdna/aie2_ctx.c
+++ b/drivers/accel/amdxdna/aie2_ctx.c
@@ -8,8 +8,12 @@
  #include <drm/drm_gem.h>
  #include <drm/drm_gem_shmem_helper.h>
  #include <drm/drm_print.h>
+#include <drm/drm_syncobj.h>
+#include <linux/hmm.h>
  #include <linux/types.h>
+#include <trace/events/amdxdna.h>
  
+#include "aie2_msg_priv.h"
  #include "aie2_pci.h"
  #include "aie2_solver.h"
  #include "amdxdna_ctx.h"
@@ -17,6 +21,337 @@
  #include "amdxdna_mailbox.h"
  #include "amdxdna_pci_drv.h"
  
+bool force_cmdlist;
+module_param(force_cmdlist, bool, 0600);
+MODULE_PARM_DESC(force_cmdlist, "Force use command list (Default false)");
+
+#define HWCTX_MAX_TIMEOUT	60000 /* milliseconds */
+
+static struct amdxdna_sched_job *
+aie2_hwctx_get_job(struct amdxdna_hwctx *hwctx, u64 seq)
+{
+	int idx;
+
+	/* Special sequence number for oldest fence if exist */
+	if (seq == AMDXDNA_INVALID_CMD_HANDLE) {
+		idx = get_job_idx(hwctx->priv->seq);
+		goto out;
+	}
+
+	if (seq >= hwctx->priv->seq)
+		return ERR_PTR(-EINVAL);
+
+	if (seq + HWCTX_MAX_CMDS < hwctx->priv->seq)
+		return NULL;
+
+	idx = get_job_idx(seq);
+
+out:
+	return hwctx->priv->pending[idx];
+}
+
+/* The bad_job is used in aie2_sched_job_timedout, otherwise, set it to NULL */
+static void aie2_hwctx_stop(struct amdxdna_dev *xdna, struct amdxdna_hwctx *hwctx,
+			    struct drm_sched_job *bad_job)
+{
+	drm_sched_stop(&hwctx->priv->sched, bad_job);
+	aie2_destroy_context(xdna->dev_handle, hwctx);
+}
+
+static int aie2_hwctx_restart(struct amdxdna_dev *xdna, struct amdxdna_hwctx *hwctx)
+{
+	struct amdxdna_gem_obj *heap = hwctx->priv->heap;
+	int ret;
+
+	ret = aie2_create_context(xdna->dev_handle, hwctx);
+	if (ret) {
+		XDNA_ERR(xdna, "Create hwctx failed, ret %d", ret);
+		goto out;
+	}
+
+	ret = aie2_map_host_buf(xdna->dev_handle, hwctx->fw_ctx_id,
+				heap->mem.userptr, heap->mem.size);
+	if (ret) {
+		XDNA_ERR(xdna, "Map host buf failed, ret %d", ret);
+		goto out;
+	}
+
+	if (hwctx->status != HWCTX_STAT_READY) {
+		XDNA_DBG(xdna, "hwctx is not ready, status %d", hwctx->status);
+		goto out;
+	}
+
+	ret = aie2_config_cu(hwctx);
+	if (ret) {
+		XDNA_ERR(xdna, "Config cu failed, ret %d", ret);
+		goto out;
+	}
+
+out:
+	drm_sched_start(&hwctx->priv->sched);
+	XDNA_DBG(xdna, "%s restarted, ret %d", hwctx->name, ret);
+	return ret;
+}
+
+void aie2_stop_ctx_by_col_map(struct amdxdna_client *client, u32 col_map)
+{
+	struct amdxdna_dev *xdna = client->xdna;
+	struct amdxdna_hwctx *hwctx;
+	int next = 0;
+
+	drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
+	mutex_lock(&client->hwctx_lock);
+	idr_for_each_entry_continue(&client->hwctx_idr, hwctx, next) {
+		/* check if the HW context uses the error column */
+		if (!(col_map & amdxdna_hwctx_col_map(hwctx)))
+			continue;
+
+		aie2_hwctx_stop(xdna, hwctx, NULL);
+		hwctx->old_status = hwctx->status;
+		hwctx->status = HWCTX_STAT_STOP;
+		XDNA_DBG(xdna, "Stop %s", hwctx->name);
+	}
+	mutex_unlock(&client->hwctx_lock);
+}
+
+void aie2_restart_ctx(struct amdxdna_client *client)
+{
+	struct amdxdna_dev *xdna = client->xdna;
+	struct amdxdna_hwctx *hwctx;
+	int next = 0;
+
+	drm_WARN_ON(&xdna->ddev, !mutex_is_locked(&xdna->dev_lock));
+	mutex_lock(&client->hwctx_lock);
+	idr_for_each_entry_continue(&client->hwctx_idr, hwctx, next) {
+		if (hwctx->status != HWCTX_STAT_STOP)
+			continue;
+
+		hwctx->status = hwctx->old_status;
+		XDNA_DBG(xdna, "Resetting %s", hwctx->name);
+		aie2_hwctx_restart(xdna, hwctx);
+	}
+	mutex_unlock(&client->hwctx_lock);
+}
+
+static int aie2_hwctx_wait_for_idle(struct amdxdna_hwctx *hwctx)
+{
+	struct amdxdna_sched_job *job;
+
+	mutex_lock(&hwctx->priv->io_lock);
+	if (!hwctx->priv->seq) {
+		mutex_unlock(&hwctx->priv->io_lock);
+		return 0;
+	}
+
+	job = aie2_hwctx_get_job(hwctx, hwctx->priv->seq - 1);
+	if (IS_ERR_OR_NULL(job)) {
+		mutex_unlock(&hwctx->priv->io_lock);
+		XDNA_WARN(hwctx->client->xdna, "Corrupted pending list");
+		return 0;
+	}
+	mutex_unlock(&hwctx->priv->io_lock);
+
+	wait_event(hwctx->priv->job_free_wq, !job->fence);
+
+	return 0;
+}
+
+static void
+aie2_sched_notify(struct amdxdna_sched_job *job)
+{
+	struct dma_fence *fence = job->fence;
+
+	job->hwctx->priv->completed++;
+	dma_fence_signal(fence);
+	trace_xdna_job(&job->base, job->hwctx->name, "signaled fence", job->seq);
+	dma_fence_put(fence);
+	mmput(job->mm);
+	amdxdna_job_put(job);
+}
+
+static int
+aie2_sched_resp_handler(void *handle, const u32 *data, size_t size)
+{
+	struct amdxdna_sched_job *job = handle;
+	struct amdxdna_gem_obj *cmd_abo;
+	u32 ret = 0;
+	u32 status;
+
+	cmd_abo = job->cmd_bo;
+
+	if (unlikely(!data))
+		goto out;
+
+	if (unlikely(size != sizeof(u32))) {
+		amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_ABORT);
+		ret = -EINVAL;
+		goto out;
+	}
+
+	status = *data;
+	XDNA_DBG(job->hwctx->client->xdna, "Resp status 0x%x", status);
+	if (status == AIE2_STATUS_SUCCESS)
+		amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_COMPLETED);
+	else
+		amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_ERROR);
+
+out:
+	aie2_sched_notify(job);
+	return ret;
+}
+
+static int
+aie2_sched_nocmd_resp_handler(void *handle, const u32 *data, size_t size)
+{
+	struct amdxdna_sched_job *job = handle;
+	u32 ret = 0;
+	u32 status;
+
+	if (unlikely(!data))
+		goto out;
+
+	if (unlikely(size != sizeof(u32))) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	status = *data;
+	XDNA_DBG(job->hwctx->client->xdna, "Resp status 0x%x", status);
+
+out:
+	aie2_sched_notify(job);
+	return ret;
+}
+
+static int
+aie2_sched_cmdlist_resp_handler(void *handle, const u32 *data, size_t size)
+{
+	struct amdxdna_sched_job *job = handle;
+	struct amdxdna_gem_obj *cmd_abo;
+	struct cmd_chain_resp *resp;
+	struct amdxdna_dev *xdna;
+	u32 fail_cmd_status;
+	u32 fail_cmd_idx;
+	u32 ret = 0;
+
+	cmd_abo = job->cmd_bo;
+	if (unlikely(!data) || unlikely(size != sizeof(u32) * 3)) {
+		amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_ABORT);
+		ret = -EINVAL;
+		goto out;
+	}
+
+	resp = (struct cmd_chain_resp *)data;
+	xdna = job->hwctx->client->xdna;
+	XDNA_DBG(xdna, "Status 0x%x", resp->status);
+	if (resp->status == AIE2_STATUS_SUCCESS) {
+		amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_COMPLETED);
+		goto out;
+	}
+
+	/* Slow path to handle error, read from ringbuf on BAR */
+	fail_cmd_idx = resp->fail_cmd_idx;
+	fail_cmd_status = resp->fail_cmd_status;
+	XDNA_DBG(xdna, "Failed cmd idx %d, status 0x%x",
+		 fail_cmd_idx, fail_cmd_status);
+
+	if (fail_cmd_status == AIE2_STATUS_SUCCESS) {
+		amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_ABORT);
+		ret = -EINVAL;
+		goto out;
+	}
+	amdxdna_cmd_set_state(cmd_abo, fail_cmd_status);
+
+	if (amdxdna_cmd_get_op(cmd_abo) == ERT_CMD_CHAIN) {
+		struct amdxdna_cmd_chain *cc = amdxdna_cmd_get_payload(cmd_abo, NULL);
+
+		cc->error_index = fail_cmd_idx;
+		if (cc->error_index >= cc->command_count)
+			cc->error_index = 0;
+	}
+out:
+	aie2_sched_notify(job);
+	return ret;
+}
+
+static struct dma_fence *
+aie2_sched_job_run(struct drm_sched_job *sched_job)
+{
+	struct amdxdna_sched_job *job = drm_job_to_xdna_job(sched_job);
+	struct amdxdna_gem_obj *cmd_abo = job->cmd_bo;
+	struct amdxdna_hwctx *hwctx = job->hwctx;
+	struct dma_fence *fence;
+	int ret;
+
+	if (!mmget_not_zero(job->mm))
+		return ERR_PTR(-ESRCH);
+
+	kref_get(&job->refcnt);
+	fence = dma_fence_get(job->fence);
+
+	if (unlikely(!cmd_abo)) {
+		ret = aie2_sync_bo(hwctx, job, aie2_sched_nocmd_resp_handler);
+		goto out;
+	}
+
+	amdxdna_cmd_set_state(cmd_abo, ERT_CMD_STATE_NEW);
+
+	if (amdxdna_cmd_get_op(cmd_abo) == ERT_CMD_CHAIN)
+		ret = aie2_cmdlist_multi_execbuf(hwctx, job, aie2_sched_cmdlist_resp_handler);
+	else if (force_cmdlist)
+		ret = aie2_cmdlist_single_execbuf(hwctx, job, aie2_sched_cmdlist_resp_handler);
+	else
+		ret = aie2_execbuf(hwctx, job, aie2_sched_resp_handler);
+
+out:
+	if (ret) {
+		dma_fence_put(job->fence);
+		amdxdna_job_put(job);
+		mmput(job->mm);
+		fence = ERR_PTR(ret);
+	}
+	trace_xdna_job(sched_job, hwctx->name, "sent to device", job->seq);
+
+	return fence;
+}
+
+static void aie2_sched_job_free(struct drm_sched_job *sched_job)
+{
+	struct amdxdna_sched_job *job = drm_job_to_xdna_job(sched_job);
+	struct amdxdna_hwctx *hwctx = job->hwctx;
+
+	trace_xdna_job(sched_job, hwctx->name, "job free", job->seq);
+	drm_sched_job_cleanup(sched_job);
+	job->fence = NULL;
+	amdxdna_job_put(job);
+
+	wake_up(&hwctx->priv->job_free_wq);
+}
+
+static enum drm_gpu_sched_stat
+aie2_sched_job_timedout(struct drm_sched_job *sched_job)
+{
+	struct amdxdna_sched_job *job = drm_job_to_xdna_job(sched_job);
+	struct amdxdna_hwctx *hwctx = job->hwctx;
+	struct amdxdna_dev *xdna;
+
+	xdna = hwctx->client->xdna;
+	trace_xdna_job(sched_job, hwctx->name, "job timedout", job->seq);
+	mutex_lock(&xdna->dev_lock);
+	aie2_hwctx_stop(xdna, hwctx, sched_job);
+
+	aie2_hwctx_restart(xdna, hwctx);
+	mutex_unlock(&xdna->dev_lock);
+
+	return DRM_GPU_SCHED_STAT_NOMINAL;
+}
+
+const struct drm_sched_backend_ops sched_ops = {
+	.run_job = aie2_sched_job_run,
+	.free_job = aie2_sched_job_free,
+	.timedout_job = aie2_sched_job_timedout,
+};
+
  static int aie2_hwctx_col_list(struct amdxdna_hwctx *hwctx)
  {
  	struct amdxdna_dev *xdna = hwctx->client->xdna;
@@ -126,13 +461,66 @@ static void aie2_release_resource(struct amdxdna_hwctx *hwctx)
  		XDNA_ERR(xdna, "Release AIE resource failed, ret %d", ret);
  }
  
+static int aie2_ctx_syncobj_create(struct amdxdna_hwctx *hwctx)
+{
+	struct amdxdna_dev *xdna = hwctx->client->xdna;
+	struct drm_file *filp = hwctx->client->filp;
+	struct drm_syncobj *syncobj;
+	u32 hdl;
+	int ret;
+
+	hwctx->syncobj_hdl = AMDXDNA_INVALID_FENCE_HANDLE;
+
+	ret = drm_syncobj_create(&syncobj, DRM_SYNCOBJ_CREATE_SIGNALED, NULL);
+	if (ret) {
+		XDNA_ERR(xdna, "Create ctx syncobj failed, ret %d", ret);
+		return ret;
+	}
+	ret = drm_syncobj_get_handle(filp, syncobj, &hdl);
+	if (ret) {
+		drm_syncobj_put(syncobj);
+		XDNA_ERR(xdna, "Create ctx syncobj handle failed, ret %d", ret);
+		return ret;
+	}
+	hwctx->priv->syncobj = syncobj;
+	hwctx->syncobj_hdl = hdl;
+
+	return 0;
+}
+
+static void aie2_ctx_syncobj_destroy(struct amdxdna_hwctx *hwctx)
+{
+	/*
+	 * The syncobj_hdl is owned by user space and will be cleaned up
+	 * separately.
+	 */
+	drm_syncobj_put(hwctx->priv->syncobj);
+}
+
+static void aie2_ctx_syncobj_add_fence(struct amdxdna_hwctx *hwctx,
+				       struct dma_fence *ofence, u64 seq)
+{
+	struct drm_syncobj *syncobj = hwctx->priv->syncobj;
+	struct dma_fence_chain *chain;
+
+	if (!syncobj)
+		return;
+
+	chain = dma_fence_chain_alloc();
+	if (!chain)
+		return;
+
+	drm_syncobj_add_point(syncobj, chain, ofence, seq);
+}
+
  int aie2_hwctx_init(struct amdxdna_hwctx *hwctx)
  {
  	struct amdxdna_client *client = hwctx->client;
  	struct amdxdna_dev *xdna = client->xdna;
+	struct drm_gpu_scheduler *sched;
  	struct amdxdna_hwctx_priv *priv;
  	struct amdxdna_gem_obj *heap;
-	int ret;
+	int i, ret;
  
  	priv = kzalloc(sizeof(*hwctx->priv), GFP_KERNEL);
  	if (!priv)
@@ -157,10 +545,48 @@ int aie2_hwctx_init(struct amdxdna_hwctx *hwctx)
  		goto put_heap;
  	}
  
+	for (i = 0; i < ARRAY_SIZE(priv->cmd_buf); i++) {
+		struct amdxdna_gem_obj *abo;
+		struct amdxdna_drm_create_bo args = {
+			.flags = 0,
+			.type = AMDXDNA_BO_DEV,
+			.vaddr = 0,
+			.size = MAX_CHAIN_CMDBUF_SIZE,
+		};
+
+		abo = amdxdna_drm_alloc_dev_bo(&xdna->ddev, &args, client->filp, true);
+		if (IS_ERR(abo)) {
+			ret = PTR_ERR(abo);
+			goto free_cmd_bufs;
+		}
+
+		XDNA_DBG(xdna, "Command buf %d addr 0x%llx size 0x%lx",
+			 i, abo->mem.dev_addr, abo->mem.size);
+		priv->cmd_buf[i] = abo;
+	}
+
+	sched = &priv->sched;
+	mutex_init(&priv->io_lock);
+	ret = drm_sched_init(sched, &sched_ops, NULL, DRM_SCHED_PRIORITY_COUNT,
+			     HWCTX_MAX_CMDS, 0, msecs_to_jiffies(HWCTX_MAX_TIMEOUT),
+			     NULL, NULL, hwctx->name, xdna->ddev.dev);
+	if (ret) {
+		XDNA_ERR(xdna, "Failed to init DRM scheduler. ret %d", ret);
+		goto free_cmd_bufs;
+	}
+
+	ret = drm_sched_entity_init(&priv->entity, DRM_SCHED_PRIORITY_NORMAL,
+				    &sched, 1, NULL);
+	if (ret) {
+		XDNA_ERR(xdna, "Failed to initial sched entiry. ret %d", ret);
+		goto free_sched;
+	}
+	init_waitqueue_head(&priv->job_free_wq);
+
  	ret = aie2_hwctx_col_list(hwctx);
  	if (ret) {
  		XDNA_ERR(xdna, "Create col list failed, ret %d", ret);
-		goto unpin;
+		goto free_entity;
  	}
  
  	ret = aie2_alloc_resource(hwctx);
@@ -175,6 +601,13 @@ int aie2_hwctx_init(struct amdxdna_hwctx *hwctx)
  		XDNA_ERR(xdna, "Map host buffer failed, ret %d", ret);
  		goto release_resource;
  	}
+
+	ret = aie2_ctx_syncobj_create(hwctx);
+	if (ret) {
+		XDNA_ERR(xdna, "Create syncobj failed, ret %d", ret);
+		goto release_resource;
+	}
+
  	hwctx->status = HWCTX_STAT_INIT;
  
  	XDNA_DBG(xdna, "hwctx %s init completed", hwctx->name);
@@ -185,7 +618,16 @@ int aie2_hwctx_init(struct amdxdna_hwctx *hwctx)
  	aie2_release_resource(hwctx);
  free_col_list:
  	kfree(hwctx->col_list);
-unpin:
+free_entity:
+	drm_sched_entity_destroy(&priv->entity);
+free_sched:
+	drm_sched_fini(&priv->sched);
+free_cmd_bufs:
+	for (i = 0; i < ARRAY_SIZE(priv->cmd_buf); i++) {
+		if (!priv->cmd_buf[i])
+			continue;
+		drm_gem_object_put(to_gobj(priv->cmd_buf[i]));
+	}
  	amdxdna_gem_unpin(heap);
  put_heap:
  	drm_gem_object_put(to_gobj(heap));
@@ -196,11 +638,44 @@ int aie2_hwctx_init(struct amdxdna_hwctx *hwctx)
  
  void aie2_hwctx_fini(struct amdxdna_hwctx *hwctx)
  {
+	struct amdxdna_sched_job *job;
+	struct amdxdna_dev *xdna;
+	int idx;
+
+	xdna = hwctx->client->xdna;
+	aie2_ctx_syncobj_destroy(hwctx);
+	drm_sched_wqueue_stop(&hwctx->priv->sched);
+
+	/* Now, scheduler will not send command to device. */
  	aie2_release_resource(hwctx);
  
+	/*
+	 * All submitted commands are aborted.
+	 * Restart scheduler queues to cleanup jobs. The amdxdna_sched_job_run()
+	 * will return NODEV if it is called.
+	 */
+	drm_sched_wqueue_start(&hwctx->priv->sched);
+
+	aie2_hwctx_wait_for_idle(hwctx);
+	drm_sched_entity_destroy(&hwctx->priv->entity);
+	drm_sched_fini(&hwctx->priv->sched);
+
+	for (idx = 0; idx < HWCTX_MAX_CMDS; idx++) {
+		job = hwctx->priv->pending[idx];
+		if (!job)
+			continue;
+
+		dma_fence_put(job->out_fence);
+		amdxdna_job_put(job);
+	}
+	XDNA_DBG(xdna, "%s sequence number %lld", hwctx->name, hwctx->priv->seq);
+
+	for (idx = 0; idx < ARRAY_SIZE(hwctx->priv->cmd_buf); idx++)
+		drm_gem_object_put(to_gobj(hwctx->priv->cmd_buf[idx]));
  	amdxdna_gem_unpin(hwctx->priv->heap);
  	drm_gem_object_put(to_gobj(hwctx->priv->heap));
  
+	mutex_destroy(&hwctx->priv->io_lock);
  	kfree(hwctx->col_list);
  	kfree(hwctx->priv);
  	kfree(hwctx->cus);
@@ -267,3 +742,186 @@ int aie2_hwctx_config(struct amdxdna_hwctx *hwctx, u32 type, u64 value, void *bu
  		return -EOPNOTSUPP;
  	}
  }
+
+static int aie2_populate_range(struct amdxdna_gem_obj *abo)
+{
+	struct amdxdna_dev *xdna = to_xdna_dev(to_gobj(abo)->dev);
+	struct mm_struct *mm = abo->mem.notifier.mm;
+	struct hmm_range range = { 0 };
+	unsigned long timeout;
+	int ret;
+
+	XDNA_INFO_ONCE(xdna, "populate memory range %llx size %lx",
+		       abo->mem.userptr, abo->mem.size);
+	range.notifier = &abo->mem.notifier;
+	range.start = abo->mem.userptr;
+	range.end = abo->mem.userptr + abo->mem.size;
+	range.hmm_pfns = abo->mem.pfns;
+	range.default_flags = HMM_PFN_REQ_FAULT;
+
+	if (!mmget_not_zero(mm))
+		return -EFAULT;
+
+	timeout = jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT);
+again:
+	range.notifier_seq = mmu_interval_read_begin(&abo->mem.notifier);
+	mmap_read_lock(mm);
+	ret = hmm_range_fault(&range);
+	mmap_read_unlock(mm);
+	if (ret) {
+		if (time_after(jiffies, timeout)) {
+			ret = -ETIME;
+			goto put_mm;
+		}
+
+		if (ret == -EBUSY)
+			goto again;
+
+		goto put_mm;
+	}
+
+	mutex_lock(&abo->mem.notify_lock);
+	if (mmu_interval_read_retry(&abo->mem.notifier, range.notifier_seq)) {
+		mutex_unlock(&abo->mem.notify_lock);
+		goto again;
+	}
+	abo->mem.map_invalid = false;
+	mutex_unlock(&abo->mem.notify_lock);
+
+put_mm:
+	mmput(mm);
+	return ret;
+}
+
+static int aie2_hwctx_push_job(struct amdxdna_sched_job *job, u64 *seq)
+{
+	struct amdxdna_hwctx *hwctx = job->hwctx;
+	struct amdxdna_sched_job *other;
+	struct dma_fence *fence;
+	long ret;
+	int idx;
+
+again:
+	mutex_lock(&hwctx->priv->io_lock);
+	idx = get_job_idx(hwctx->priv->seq);
+	/* When pending list full, hwctx->seq points to oldest fence */
+	other = hwctx->priv->pending[idx];
+	if (other && !dma_fence_is_signaled(other->out_fence)) {
+		fence = dma_fence_get(other->out_fence);
+		mutex_unlock(&hwctx->priv->io_lock);
+
+		ret = dma_fence_wait_timeout(fence, true, MAX_SCHEDULE_TIMEOUT);
+		dma_fence_put(fence);
+		if (!ret)
+			return -ETIME;
+		else if (ret < 0)
+			return ret;
+		goto again;
+	}
+
+	if (other) {
+		dma_fence_put(other->out_fence);
+		amdxdna_job_put(other);
+	}
+
+	hwctx->priv->pending[idx] = job;
+	job->seq = hwctx->priv->seq++;
+	*seq = job->seq;
+	kref_get(&job->refcnt);
+
+	fence = dma_fence_get(job->out_fence);
+	drm_sched_entity_push_job(&job->base);
+	mutex_unlock(&hwctx->priv->io_lock);
+
+	aie2_ctx_syncobj_add_fence(hwctx, fence, *seq);
+	dma_fence_put(fence);
+	return 0;
+}
+
+int aie2_cmd_submit(struct amdxdna_hwctx *hwctx, struct amdxdna_sched_job *job, u64 *seq)
+{
+	struct amdxdna_dev *xdna = hwctx->client->xdna;
+	struct ww_acquire_ctx acquire_ctx;
+	struct amdxdna_gem_obj *abo;
+	unsigned long timeout = 0;
+	int ret, i;
+
+	ret = drm_sched_job_init(&job->base, &hwctx->priv->entity, 1, hwctx);
+	if (ret) {
+		XDNA_ERR(xdna, "DRM job init failed, ret %d", ret);
+		return ret;
+	}
+
+	drm_sched_job_arm(&job->base);
Again drive by comments.

This looks wildly dangerous. This typically should be called once
holding all locks at the point in which you cannot fail. I get that
you signal the jobs fence on failure but that doesn't seem like a great
idea nor do I think how the schedule is designed.

The flow typically is:

acquire all locks and setup job...
arm
install fences
push

^^^ With not being able to to fail between arn & push.

Your flow is...

arm
acquire locks...
install fences
drop locks...
acquire different locks...
push
drops different locks...

Seems dangerous, I would reconsider.

Ok, I worked on this and will send out v7 patches to follow the 
suggested flow.


Thanks,

Lizhi