Acked-by: Alex Deucher <alexander.deucher@xxxxxxx> On Mon, Oct 4, 2021 at 4:31 AM Christian König <ckoenig.leichtzumerken@xxxxxxxxx> wrote: > > Ping? Alex any objections to this? > > Otherwise I'm going to push it with Nirmoy's acked-by. > > Christian. > > Am 30.09.21 um 11:26 schrieb Christian König: > > This reverts commit 728e7e0cd61899208e924472b9e641dbeb0775c4. > > > > Further discussion reveals that this feature is severely broken > > and needs to be reverted ASAP. > > > > GPU reset can never be delayed by userspace even for debugging or > > otherwise we can run into in kernel deadlocks. > > > > Signed-off-by: Christian König <christian.koenig@xxxxxxx> > > --- > > drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 - > > drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c | 80 --------------------- > > drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h | 5 -- > > drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 -- > > 4 files changed, 91 deletions(-) > > > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h > > index dc3c6b3a00e5..6a1928a720a6 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h > > @@ -1078,8 +1078,6 @@ struct amdgpu_device { > > char product_name[32]; > > char serial[20]; > > > > - struct amdgpu_autodump autodump; > > - > > atomic_t throttling_logging_enabled; > > struct ratelimit_state throttling_logging_rs; > > uint32_t ras_hw_enabled; > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c > > index 277128846dd1..0b89ba142a59 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c > > @@ -27,7 +27,6 @@ > > #include <linux/pci.h> > > #include <linux/uaccess.h> > > #include <linux/pm_runtime.h> > > -#include <linux/poll.h> > > > > #include "amdgpu.h" > > #include "amdgpu_pm.h" > > @@ -37,85 +36,7 @@ > > #include "amdgpu_securedisplay.h" > > #include "amdgpu_fw_attestation.h" > > > > -int amdgpu_debugfs_wait_dump(struct amdgpu_device *adev) > > -{ > > #if defined(CONFIG_DEBUG_FS) > > - unsigned long timeout = 600 * HZ; > > - int ret; > > - > > - wake_up_interruptible(&adev->autodump.gpu_hang); > > - > > - ret = wait_for_completion_interruptible_timeout(&adev->autodump.dumping, timeout); > > - if (ret == 0) { > > - pr_err("autodump: timeout, move on to gpu recovery\n"); > > - return -ETIMEDOUT; > > - } > > -#endif > > - return 0; > > -} > > - > > -#if defined(CONFIG_DEBUG_FS) > > - > > -static int amdgpu_debugfs_autodump_open(struct inode *inode, struct file *file) > > -{ > > - struct amdgpu_device *adev = inode->i_private; > > - int ret; > > - > > - file->private_data = adev; > > - > > - ret = down_read_killable(&adev->reset_sem); > > - if (ret) > > - return ret; > > - > > - if (adev->autodump.dumping.done) { > > - reinit_completion(&adev->autodump.dumping); > > - ret = 0; > > - } else { > > - ret = -EBUSY; > > - } > > - > > - up_read(&adev->reset_sem); > > - > > - return ret; > > -} > > - > > -static int amdgpu_debugfs_autodump_release(struct inode *inode, struct file *file) > > -{ > > - struct amdgpu_device *adev = file->private_data; > > - > > - complete_all(&adev->autodump.dumping); > > - return 0; > > -} > > - > > -static unsigned int amdgpu_debugfs_autodump_poll(struct file *file, struct poll_table_struct *poll_table) > > -{ > > - struct amdgpu_device *adev = file->private_data; > > - > > - poll_wait(file, &adev->autodump.gpu_hang, poll_table); > > - > > - if (amdgpu_in_reset(adev)) > > - return POLLIN | POLLRDNORM | POLLWRNORM; > > - > > - return 0; > > -} > > - > > -static const struct file_operations autodump_debug_fops = { > > - .owner = THIS_MODULE, > > - .open = amdgpu_debugfs_autodump_open, > > - .poll = amdgpu_debugfs_autodump_poll, > > - .release = amdgpu_debugfs_autodump_release, > > -}; > > - > > -static void amdgpu_debugfs_autodump_init(struct amdgpu_device *adev) > > -{ > > - init_completion(&adev->autodump.dumping); > > - complete_all(&adev->autodump.dumping); > > - init_waitqueue_head(&adev->autodump.gpu_hang); > > - > > - debugfs_create_file("amdgpu_autodump", 0600, > > - adev_to_drm(adev)->primary->debugfs_root, > > - adev, &autodump_debug_fops); > > -} > > > > /** > > * amdgpu_debugfs_process_reg_op - Handle MMIO register reads/writes > > @@ -1590,7 +1511,6 @@ int amdgpu_debugfs_init(struct amdgpu_device *adev) > > } > > > > amdgpu_ras_debugfs_create_all(adev); > > - amdgpu_debugfs_autodump_init(adev); > > amdgpu_rap_debugfs_init(adev); > > amdgpu_securedisplay_debugfs_init(adev); > > amdgpu_fw_attestation_debugfs_init(adev); > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h > > index 141a8474e24f..8b641f40fdf6 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h > > @@ -26,10 +26,6 @@ > > /* > > * Debugfs > > */ > > -struct amdgpu_autodump { > > - struct completion dumping; > > - struct wait_queue_head gpu_hang; > > -}; > > > > int amdgpu_debugfs_regs_init(struct amdgpu_device *adev); > > int amdgpu_debugfs_init(struct amdgpu_device *adev); > > @@ -37,4 +33,3 @@ void amdgpu_debugfs_fini(struct amdgpu_device *adev); > > void amdgpu_debugfs_fence_init(struct amdgpu_device *adev); > > void amdgpu_debugfs_firmware_init(struct amdgpu_device *adev); > > void amdgpu_debugfs_gem_init(struct amdgpu_device *adev); > > -int amdgpu_debugfs_wait_dump(struct amdgpu_device *adev); > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > > index 41c6b3aacd37..4d34b2da8582 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > > @@ -4458,10 +4458,6 @@ int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev, > > if (reset_context->reset_req_dev == adev) > > job = reset_context->job; > > > > - /* no need to dump if device is not in good state during probe period */ > > - if (!adev->gmc.xgmi.pending_reset) > > - amdgpu_debugfs_wait_dump(adev); > > - > > if (amdgpu_sriov_vf(adev)) { > > /* stop the data exchange thread */ > > amdgpu_virt_fini_data_exchange(adev); >