> Concerning the pending flag, I think it is also handled by the
> coredump code: dev_coredumpm() will not create another device if there
> is one already associated with the device being dumped. I should
> probably mention this in the commit message.
Thats a good point, I checked that now as well but I still think we
need
this flag because
it also makes sure devcoredump is taken and read together within our
driver itself instead of relying
on the devcoredump. Its not a strong preference.
But if you would like to go ahead with this, might have to retest its
robustness.
With the older logic how i verified this was that I relaxed the
underrun
cnt check in this patch
( https://patchwork.freedesktop.org/patch/429112/?series=89181&rev=1 )
here and simulated an error:
@@ -1336,6 +1337,11 @@ static void
dpu_encoder_underrun_callback(struct
drm_encoder *drm_enc,
DPU_ATRACE_BEGIN("encoder_underrun_callback");
atomic_inc(&phy_enc->underrun_cnt);
+
+ /* trigger dump only on the first underrun */
+ if (atomic_read(&phy_enc->underrun_cnt) == 1)
+ msm_disp_snapshot_state(drm_enc->dev);
+
And verified that across various underrun interrupts, the devcoredump
is
stable.
>
> If you agree with this, I'll send v2 then (also adding plls dumping).
Looking fwd to seeing the pll dumping, that will be a great addition
to
this.
>
>>
>>>
>>> Signed-off-by: Dmitry Baryshkov <dmitry.baryshkov@xxxxxxxxxx>
>>> ---
>>> drivers/gpu/drm/msm/disp/msm_disp_snapshot.c | 87
>>> ++++---------------
>>> drivers/gpu/drm/msm/disp/msm_disp_snapshot.h | 13 +--
>>> .../gpu/drm/msm/disp/msm_disp_snapshot_util.c | 5 +-
>>> drivers/gpu/drm/msm/msm_kms.h | 5 +-
>>> 4 files changed, 28 insertions(+), 82 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/msm/disp/msm_disp_snapshot.c
>>> b/drivers/gpu/drm/msm/disp/msm_disp_snapshot.c
>>> index 70fd5a1fe13e..371358893716 100644
>>> --- a/drivers/gpu/drm/msm/disp/msm_disp_snapshot.c
>>> +++ b/drivers/gpu/drm/msm/disp/msm_disp_snapshot.c
>>> @@ -7,8 +7,7 @@
>>>
>>> #include "msm_disp_snapshot.h"
>>>
>>> -#ifdef CONFIG_DEV_COREDUMP
>>> -static ssize_t disp_devcoredump_read(char *buffer, loff_t offset,
>>> +static ssize_t __maybe_unused disp_devcoredump_read(char *buffer,
>>> loff_t offset,
>>> size_t count, void *data, size_t datalen)
>>> {
>>> struct drm_print_iterator iter;
>>> @@ -29,24 +28,21 @@ static ssize_t disp_devcoredump_read(char
>>> *buffer,
>>> loff_t offset,
>>> return count - iter.remain;
>>> }
>>>
>>> -static void disp_devcoredump_free(void *data)
>>> +static void _msm_disp_snapshot_work(struct kthread_work *work)
>>> {
>>> + struct msm_kms *msm_kms = container_of(work, struct msm_kms,
>>> dump_work);
>>> + struct drm_device *drm_dev = msm_kms->dev;
>>> struct msm_disp_state *disp_state;
>>> + struct drm_printer p;
>>>
>>> - disp_state = data;
>>> -
>>> - msm_disp_state_free(disp_state);
>>> -
>>> - disp_state->coredump_pending = false;
>>> -}
>>> -#endif /* CONFIG_DEV_COREDUMP */
>>> + disp_state = kzalloc(sizeof(struct msm_disp_state), GFP_KERNEL);
>>> + if (!disp_state)
>>> + return;
>>>
>>> -static void _msm_disp_snapshot_work(struct kthread_work *work)
>>> -{
>>> - struct msm_disp_state *disp_state = container_of(work, struct
>>> msm_disp_state, dump_work);
>>> - struct drm_printer p;
>>> + disp_state->dev = drm_dev->dev;
>>> + disp_state->drm_dev = drm_dev;
>>>
>>> - mutex_lock(&disp_state->mutex);
>>> + INIT_LIST_HEAD(&disp_state->blocks);
>>>
>>> msm_disp_snapshot_capture_state(disp_state);
>>>
>>> @@ -55,26 +51,15 @@ static void _msm_disp_snapshot_work(struct
>>> kthread_work *work)
>>> msm_disp_state_print(disp_state, &p);
>>> }
>>>
>>> - /*
>>> - * if devcoredump is not defined free the state immediately
>>> - * otherwise it will be freed in the free handler.
>>> - */
>>> -#ifdef CONFIG_DEV_COREDUMP
>>> + /* If COREDUMP is disabled, the stub will call the free
>>> function. */
>> This is a good cleanup, I just checked that the free() is called even
>> if the config is not enabled
>>> dev_coredumpm(disp_state->dev, THIS_MODULE, disp_state, 0,
>>> GFP_KERNEL,
>>> - disp_devcoredump_read, disp_devcoredump_free);
>>> - disp_state->coredump_pending = true;
>>> -#else
>>> - msm_disp_state_free(disp_state);
>>> -#endif
>>> -
>>> - mutex_unlock(&disp_state->mutex);
>>> + disp_devcoredump_read, msm_disp_state_free);
>>> }
>>>
>>> void msm_disp_snapshot_state(struct drm_device *drm_dev)
>>> {
>>> struct msm_drm_private *priv;
>>> struct msm_kms *kms;
>>> - struct msm_disp_state *disp_state;
>>>
>>> if (!drm_dev) {
>>> DRM_ERROR("invalid params\n");
>>> @@ -83,30 +68,13 @@ void msm_disp_snapshot_state(struct drm_device
>>> *drm_dev)
>>>
>>> priv = drm_dev->dev_private;
>>> kms = priv->kms;
>>> - disp_state = kms->disp_state;
>>> -
>>> - if (!disp_state) {
>>> - DRM_ERROR("invalid params\n");
>>> - return;
>>> - }
>>> -
>>> - /*
>>> - * if there is a coredump pending return immediately till dump
>>> - * if read by userspace or timeout happens
>>> - */
>>> - if (disp_state->coredump_pending) {
>>> - DRM_DEBUG("coredump is pending read\n");
>>> - return;
>>> - }
>>>
>>> - kthread_queue_work(disp_state->dump_worker,
>>> - &disp_state->dump_work);
>>> + kthread_queue_work(kms->dump_worker, &kms->dump_work);
>>> }
>>>
>>> int msm_disp_snapshot_init(struct drm_device *drm_dev)
>>> {
>>> struct msm_drm_private *priv;
>>> - struct msm_disp_state *disp_state;
>>> struct msm_kms *kms;
>>>
>>> if (!drm_dev) {
>>> @@ -117,22 +85,11 @@ int msm_disp_snapshot_init(struct drm_device
>>> *drm_dev)
>>> priv = drm_dev->dev_private;
>>> kms = priv->kms;
>>>
>>> - disp_state = devm_kzalloc(drm_dev->dev, sizeof(struct
>>> msm_disp_state), GFP_KERNEL);
>>> -
>>> - mutex_init(&disp_state->mutex);
>>> -
>>> - disp_state->dev = drm_dev->dev;
>>> - disp_state->drm_dev = drm_dev;
>>> -
>>> - INIT_LIST_HEAD(&disp_state->blocks);
>>> -
>>> - disp_state->dump_worker = kthread_create_worker(0, "%s",
>>> "disp_snapshot");
>>> - if (IS_ERR(disp_state->dump_worker))
>>> + kms->dump_worker = kthread_create_worker(0, "%s",
>>> "disp_snapshot");
>>> + if (IS_ERR(kms->dump_worker))
>>> DRM_ERROR("failed to create disp state task\n");
>>>
>>> - kthread_init_work(&disp_state->dump_work,
>>> _msm_disp_snapshot_work);
>>> -
>>> - kms->disp_state = disp_state;
>>> + kthread_init_work(&kms->dump_work, _msm_disp_snapshot_work);
>>>
>>> return 0;
>>> }
>>> @@ -141,7 +98,6 @@ void msm_disp_snapshot_destroy(struct drm_device
>>> *drm_dev)
>>> {
>>> struct msm_kms *kms;
>>> struct msm_drm_private *priv;
>>> - struct msm_disp_state *disp_state;
>>>
>>> if (!drm_dev) {
>>> DRM_ERROR("invalid params\n");
>>> @@ -150,12 +106,7 @@ void msm_disp_snapshot_destroy(struct drm_device
>>> *drm_dev)
>>>
>>> priv = drm_dev->dev_private;
>>> kms = priv->kms;
>>> - disp_state = kms->disp_state;
>>> -
>>> - if (disp_state->dump_worker)
>>> - kthread_destroy_worker(disp_state->dump_worker);
>>> -
>>> - list_del(&disp_state->blocks);
>>>
>>> - mutex_destroy(&disp_state->mutex);
>>> + if (kms->dump_worker)
>>> + kthread_destroy_worker(kms->dump_worker);
>>> }
>>> diff --git a/drivers/gpu/drm/msm/disp/msm_disp_snapshot.h
>>> b/drivers/gpu/drm/msm/disp/msm_disp_snapshot.h
>>> index 32f52799a1ba..c6174a366095 100644
>>> --- a/drivers/gpu/drm/msm/disp/msm_disp_snapshot.h
>>> +++ b/drivers/gpu/drm/msm/disp/msm_disp_snapshot.h
>>> @@ -41,26 +41,17 @@
>>> * struct msm_disp_state - structure to store current dpu state
>>> * @dev: device pointer
>>> * @drm_dev: drm device pointer
>>> - * @mutex: mutex to serialize access to serialze dumps, debugfs
>>> access
>>> - * @coredump_pending: coredump is pending read from userspace
>>> * @atomic_state: atomic state duplicated at the time of the error
>>> - * @dump_worker: kworker thread which runs the dump work
>>> - * @dump_work: kwork which dumps the registers and drm state
>>> * @timestamp: timestamp at which the coredump was captured
>>> */
>>> struct msm_disp_state {
>>> struct device *dev;
>>> struct drm_device *drm_dev;
>>> - struct mutex mutex;
>>> -
>>> - bool coredump_pending;
>>>
>>> struct list_head blocks;
>>>
>>> struct drm_atomic_state *atomic_state;
>>>
>>> - struct kthread_worker *dump_worker;
>>> - struct kthread_work dump_work;
>>> ktime_t timestamp;
>>> };
>>>
>>> @@ -123,11 +114,11 @@ void msm_disp_snapshot_capture_state(struct
>>> msm_disp_state *disp_state);
>>>
>>> /**
>>> * msm_disp_state_free - free the memory after the coredump has been
>>> read
>>> - * @disp_state: handle to struct msm_disp_state
>>> + * @data: handle to struct msm_disp_state
>>>
>>> * Returns: none
>>> */
>>> -void msm_disp_state_free(struct msm_disp_state *disp_state);
>>> +void msm_disp_state_free(void *data);
>>>
>>> /**
>>> * msm_disp_snapshot_add_block - add a hardware block with its
>>> register dump
>>> diff --git a/drivers/gpu/drm/msm/disp/msm_disp_snapshot_util.c
>>> b/drivers/gpu/drm/msm/disp/msm_disp_snapshot_util.c
>>> index ca6632550337..cabe15190ec1 100644
>>> --- a/drivers/gpu/drm/msm/disp/msm_disp_snapshot_util.c
>>> +++ b/drivers/gpu/drm/msm/disp/msm_disp_snapshot_util.c
>>> @@ -142,8 +142,9 @@ void msm_disp_snapshot_capture_state(struct
>>> msm_disp_state *disp_state)
>>> msm_disp_capture_atomic_state(disp_state);
>>> }
>>>
>>> -void msm_disp_state_free(struct msm_disp_state *disp_state)
>>> +void msm_disp_state_free(void *data)
>>> {
>>> + struct msm_disp_state *disp_state = data;
>>> struct msm_disp_state_block *block, *tmp;
>>>
>>> if (disp_state->atomic_state) {
>>> @@ -156,6 +157,8 @@ void msm_disp_state_free(struct msm_disp_state
>>> *disp_state)
>>> kfree(block->state);
>>> kfree(block);
>>> }
>>> +
>>> + kfree(disp_state);
>>> }
>>>
>>> void msm_disp_snapshot_add_block(struct msm_disp_state *disp_state,
>>> u32 len,
>>> diff --git a/drivers/gpu/drm/msm/msm_kms.h
>>> b/drivers/gpu/drm/msm/msm_kms.h
>>> index 146dcab123f4..529b9dacf7ca 100644
>>> --- a/drivers/gpu/drm/msm/msm_kms.h
>>> +++ b/drivers/gpu/drm/msm/msm_kms.h
>>> @@ -156,8 +156,9 @@ struct msm_kms {
>>> /* mapper-id used to request GEM buffer mapped for scanout: */
>>> struct msm_gem_address_space *aspace;
>>>
>>> - /* handle to disp snapshot state */
>>> - struct msm_disp_state *disp_state;
>>> + /* disp snapshot support */
>>> + struct kthread_worker *dump_worker;
>>> + struct kthread_work dump_work;
>>>
>>> /*
>>> * For async commit, where ->flush_commit() and later happens