Re: [PATCH 2/2] accel/qaic: Expand DRM device lifecycle

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Reviewed-by: Jacek Lawrynowicz <jacek.lawrynowicz@xxxxxxxxxxxxxxx>

On 17.11.2023 18:43, Jeffrey Hugo wrote:
> From: Carl Vanderlip <quic_carlv@xxxxxxxxxxx>
> 
> Currently the QAIC DRM device registers itself when the MHI QAIC_CONTROL
> channel becomes available. This is when the device is able to process
> workloads. However, the DRM driver also provides the debugfs interface
> bootlog for the device. If the device fails to boot to the QSM (which
> brings up the MHI QAIC_CONTROL channel), the bootlog won't be available for
> debugging why it failed to boot.
> 
> Change when the DRM device registers itself from when QAIC_CONTROL is
> available to when the card is first probed on the PCI bus. Additionally,
> make the DRM driver persist through reset/error cases so the driver
> doesn't have to be reloaded to access the card again. Send
> KOBJ_ONLINE/OFFLINE uevents so userspace can know when DRM device is
> ready to handle requests.
> 
> Signed-off-by: Carl Vanderlip <quic_carlv@xxxxxxxxxxx>
> Reviewed-by: Pranjal Ramajor Asha Kanojiya <quic_pkanojiy@xxxxxxxxxxx>
> Reviewed-by: Jeffrey Hugo <quic_jhugo@xxxxxxxxxxx>
> Signed-off-by: Jeffrey Hugo <quic_jhugo@xxxxxxxxxxx>
> ---
>  Documentation/accel/qaic/qaic.rst   |  9 +++++-
>  drivers/accel/qaic/mhi_controller.c |  2 +-
>  drivers/accel/qaic/qaic.h           |  2 +-
>  drivers/accel/qaic/qaic_drv.c       | 44 +++++++++++------------------
>  4 files changed, 27 insertions(+), 30 deletions(-)
> 
> diff --git a/Documentation/accel/qaic/qaic.rst b/Documentation/accel/qaic/qaic.rst
> index f81020736ebf..efb7771273bb 100644
> --- a/Documentation/accel/qaic/qaic.rst
> +++ b/Documentation/accel/qaic/qaic.rst
> @@ -93,8 +93,15 @@ commands (does not impact QAIC).
>  uAPI
>  ====
>  
> +QAIC creates an accel device per phsyical PCIe device. This accel device exists
> +for as long as the PCIe device is known to Linux.
> +
> +The PCIe device may not be in the state to accept requests from userspace at
> +all times. QAIC will trigger KOBJ_ONLINE/OFFLINE uevents to advertise when the
> +device can accept requests (ONLINE) and when the device is no longer accepting
> +requests (OFFLINE) because of a reset or other state transition.
> +
>  QAIC defines a number of driver specific IOCTLs as part of the userspace API.
> -This section describes those APIs.
>  
>  DRM_IOCTL_QAIC_MANAGE
>    This IOCTL allows userspace to send a NNC request to the QSM. The call will
> diff --git a/drivers/accel/qaic/mhi_controller.c b/drivers/accel/qaic/mhi_controller.c
> index 5d3cc30009cc..832464f2833a 100644
> --- a/drivers/accel/qaic/mhi_controller.c
> +++ b/drivers/accel/qaic/mhi_controller.c
> @@ -469,7 +469,7 @@ static void mhi_status_cb(struct mhi_controller *mhi_cntrl, enum mhi_callback re
>  		pci_err(qdev->pdev, "Fatal error received from device. Attempting to recover\n");
>  	/* this event occurs in non-atomic context */
>  	if (reason == MHI_CB_SYS_ERROR)
> -		qaic_dev_reset_clean_local_state(qdev, true);
> +		qaic_dev_reset_clean_local_state(qdev);
>  }
>  
>  static int mhi_reset_and_async_power_up(struct mhi_controller *mhi_cntrl)
> diff --git a/drivers/accel/qaic/qaic.h b/drivers/accel/qaic/qaic.h
> index bd0c884e6bf7..66f4abf6c4c4 100644
> --- a/drivers/accel/qaic/qaic.h
> +++ b/drivers/accel/qaic/qaic.h
> @@ -283,7 +283,7 @@ void wakeup_dbc(struct qaic_device *qdev, u32 dbc_id);
>  void release_dbc(struct qaic_device *qdev, u32 dbc_id);
>  
>  void wake_all_cntl(struct qaic_device *qdev);
> -void qaic_dev_reset_clean_local_state(struct qaic_device *qdev, bool exit_reset);
> +void qaic_dev_reset_clean_local_state(struct qaic_device *qdev);
>  
>  struct drm_gem_object *qaic_gem_prime_import(struct drm_device *dev, struct dma_buf *dma_buf);
>  
> diff --git a/drivers/accel/qaic/qaic_drv.c b/drivers/accel/qaic/qaic_drv.c
> index 02fe23248da4..c19bc83b249c 100644
> --- a/drivers/accel/qaic/qaic_drv.c
> +++ b/drivers/accel/qaic/qaic_drv.c
> @@ -8,6 +8,7 @@
>  #include <linux/idr.h>
>  #include <linux/interrupt.h>
>  #include <linux/list.h>
> +#include <linux/kobject.h>
>  #include <linux/kref.h>
>  #include <linux/mhi.h>
>  #include <linux/module.h>
> @@ -43,9 +44,6 @@ MODULE_PARM_DESC(datapath_polling, "Operate the datapath in polling mode");
>  static bool link_up;
>  static DEFINE_IDA(qaic_usrs);
>  
> -static int qaic_create_drm_device(struct qaic_device *qdev, s32 partition_id);
> -static void qaic_destroy_drm_device(struct qaic_device *qdev, s32 partition_id);
> -
>  static void free_usr(struct kref *kref)
>  {
>  	struct qaic_user *usr = container_of(kref, struct qaic_user, ref_count);
> @@ -183,13 +181,6 @@ static int qaic_create_drm_device(struct qaic_device *qdev, s32 partition_id)
>  
>  	qddev->partition_id = partition_id;
>  
> -	/*
> -	 * drm_dev_unregister() sets the driver data to NULL and
> -	 * drm_dev_register() does not update the driver data. During a SOC
> -	 * reset drm dev is unregistered and registered again leaving the
> -	 * driver data to NULL.
> -	 */
> -	dev_set_drvdata(to_accel_kdev(qddev), drm->accel);
>  	ret = drm_dev_register(drm, 0);
>  	if (ret)
>  		pci_dbg(qdev->pdev, "drm_dev_register failed %d\n", ret);
> @@ -203,7 +194,6 @@ static void qaic_destroy_drm_device(struct qaic_device *qdev, s32 partition_id)
>  	struct drm_device *drm = to_drm(qddev);
>  	struct qaic_user *usr;
>  
> -	drm_dev_get(drm);
>  	drm_dev_unregister(drm);
>  	qddev->partition_id = 0;
>  	/*
> @@ -232,7 +222,6 @@ static void qaic_destroy_drm_device(struct qaic_device *qdev, s32 partition_id)
>  		mutex_lock(&qddev->users_mutex);
>  	}
>  	mutex_unlock(&qddev->users_mutex);
> -	drm_dev_put(drm);
>  }
>  
>  static int qaic_mhi_probe(struct mhi_device *mhi_dev, const struct mhi_device_id *id)
> @@ -254,8 +243,6 @@ static int qaic_mhi_probe(struct mhi_device *mhi_dev, const struct mhi_device_id
>  
>  	qdev = pci_get_drvdata(to_pci_dev(mhi_dev->mhi_cntrl->cntrl_dev));
>  
> -	qdev->reset_state = QAIC_ONLINE;
> -
>  	dev_set_drvdata(&mhi_dev->dev, qdev);
>  	qdev->cntl_ch = mhi_dev;
>  
> @@ -265,6 +252,7 @@ static int qaic_mhi_probe(struct mhi_device *mhi_dev, const struct mhi_device_id
>  		return ret;
>  	}
>  
> +	qdev->reset_state = QAIC_BOOT;
>  	ret = get_cntl_version(qdev, NULL, &major, &minor);
>  	if (ret || major != CNTL_MAJOR || minor > CNTL_MINOR) {
>  		pci_err(qdev->pdev, "%s: Control protocol version (%d.%d) not supported. Supported version is (%d.%d). Ret: %d\n",
> @@ -272,8 +260,8 @@ static int qaic_mhi_probe(struct mhi_device *mhi_dev, const struct mhi_device_id
>  		ret = -EINVAL;
>  		goto close_control;
>  	}
> -
> -	ret = qaic_create_drm_device(qdev, QAIC_NO_PARTITION);
> +	qdev->reset_state = QAIC_ONLINE;
> +	kobject_uevent(&(to_accel_kdev(qdev->qddev))->kobj, KOBJ_ONLINE);
>  
>  	return ret;
>  
> @@ -291,6 +279,7 @@ static void qaic_notify_reset(struct qaic_device *qdev)
>  {
>  	int i;
>  
> +	kobject_uevent(&(to_accel_kdev(qdev->qddev))->kobj, KOBJ_OFFLINE);
>  	qdev->reset_state = QAIC_OFFLINE;
>  	/* wake up any waiters to avoid waiting for timeouts at sync */
>  	wake_all_cntl(qdev);
> @@ -299,21 +288,15 @@ static void qaic_notify_reset(struct qaic_device *qdev)
>  	synchronize_srcu(&qdev->dev_lock);
>  }
>  
> -void qaic_dev_reset_clean_local_state(struct qaic_device *qdev, bool exit_reset)
> +void qaic_dev_reset_clean_local_state(struct qaic_device *qdev)
>  {
>  	int i;
>  
>  	qaic_notify_reset(qdev);
>  
> -	/* remove drmdevs to prevent new users from coming in */
> -	qaic_destroy_drm_device(qdev, QAIC_NO_PARTITION);
> -
>  	/* start tearing things down */
>  	for (i = 0; i < qdev->num_dbc; ++i)
>  		release_dbc(qdev, i);
> -
> -	if (exit_reset)
> -		qdev->reset_state = QAIC_ONLINE;
>  }
>  
>  static void cleanup_qdev(struct qaic_device *qdev)
> @@ -338,6 +321,7 @@ static struct qaic_device *create_qdev(struct pci_dev *pdev, const struct pci_de
>  	if (!qdev)
>  		return NULL;
>  
> +	qdev->reset_state = QAIC_OFFLINE;
>  	if (id->device == PCI_DEV_AIC100) {
>  		qdev->num_dbc = 16;
>  		qdev->dbc = devm_kcalloc(&pdev->dev, qdev->num_dbc, sizeof(*qdev->dbc), GFP_KERNEL);
> @@ -499,15 +483,21 @@ static int qaic_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
>  		goto cleanup_qdev;
>  	}
>  
> +	ret = qaic_create_drm_device(qdev, QAIC_NO_PARTITION);
> +	if (ret)
> +		goto cleanup_qdev;
> +
>  	qdev->mhi_cntrl = qaic_mhi_register_controller(pdev, qdev->bar_0, mhi_irq,
>  						       qdev->single_msi);
>  	if (IS_ERR(qdev->mhi_cntrl)) {
>  		ret = PTR_ERR(qdev->mhi_cntrl);
> -		goto cleanup_qdev;
> +		goto cleanup_drm_dev;
>  	}
>  
>  	return 0;
>  
> +cleanup_drm_dev:
> +	qaic_destroy_drm_device(qdev, QAIC_NO_PARTITION);
>  cleanup_qdev:
>  	cleanup_qdev(qdev);
>  	return ret;
> @@ -520,7 +510,8 @@ static void qaic_pci_remove(struct pci_dev *pdev)
>  	if (!qdev)
>  		return;
>  
> -	qaic_dev_reset_clean_local_state(qdev, false);
> +	qaic_dev_reset_clean_local_state(qdev);
> +	qaic_destroy_drm_device(qdev, QAIC_NO_PARTITION);
>  	qaic_mhi_free_controller(qdev->mhi_cntrl, link_up);
>  	cleanup_qdev(qdev);
>  }
> @@ -543,14 +534,13 @@ static void qaic_pci_reset_prepare(struct pci_dev *pdev)
>  
>  	qaic_notify_reset(qdev);
>  	qaic_mhi_start_reset(qdev->mhi_cntrl);
> -	qaic_dev_reset_clean_local_state(qdev, false);
> +	qaic_dev_reset_clean_local_state(qdev);
>  }
>  
>  static void qaic_pci_reset_done(struct pci_dev *pdev)
>  {
>  	struct qaic_device *qdev = pci_get_drvdata(pdev);
>  
> -	qdev->reset_state = QAIC_ONLINE;
>  	qaic_mhi_reset_done(qdev->mhi_cntrl);
>  }
>  



[Index of Archives]     [Linux DRI Users]     [Linux Intel Graphics]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]     [XFree86]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Linux Kernel]     [Linux SCSI]     [XFree86]
  Powered by Linux