yeah, that's fine. Alex On Fri, Aug 30, 2019 at 8:21 PM Grodzovsky, Andrey <Andrey.Grodzovsky@xxxxxxx> wrote: > > But I am not the one cherry-picking to DKMS, should I just let this person know this is the DKMS code he should use for when appropriate API doesn't exist ? > > Andrey > > ________________________________________ > From: Alex Deucher <alexdeucher@xxxxxxxxx> > Sent: 30 August 2019 15:55:03 > To: Grodzovsky, Andrey > Cc: amd-gfx list; Zhang, Hawking; Christian König; Zhou1, Tao; Kuehling, Felix > Subject: Re: [PATCH v3 3/3] dmr/amdgpu: Add system auto reboot to RAS. > > On Fri, Aug 30, 2019 at 12:39 PM Andrey Grodzovsky > <andrey.grodzovsky@xxxxxxx> wrote: > > > > In case of RAS error allow user configure auto system > > reboot through ras_ctrl. > > This is also part of the temproray work around for the RAS > > hang problem. > > > > Signed-off-by: Andrey Grodzovsky <andrey.grodzovsky@xxxxxxx> > > Typo in title: dmr -> drm > > > --- > > drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 18 ++++++++++++++++++ > > drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 10 +++++++++- > > drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | 1 + > > 3 files changed, 28 insertions(+), 1 deletion(-) > > > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > > index c9825ae..e26f2e9 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c > > @@ -3760,6 +3760,24 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, > > int i, r = 0; > > bool in_ras_intr = amdgpu_ras_intr_triggered(); > > > > + /* > > + * Flush RAM to disk so that after reboot > > + * the user can read log and see why the system rebooted. > > + * > > + * Using user mode app call instead of kernel APIs such as > > + * ksys_sync_helper for backward comparability with earlier > > + * kernels into which this is also intended. > > + */ > > + if (in_ras_intr && amdgpu_ras_get_context(adev)->reboot) { > > + char *envp[] = { "HOME=/", NULL }; > > + char *argv[] = { "/bin/sync", NULL }; > > + > > + DRM_WARN("Emergency reboot."); > > + > > + call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC); > > + emergency_restart(); > > + } > > + > > This is fine for dkms, but for upstream/amd-staging, we probably want > to call the appropriate APIs directly. > > > need_full_reset = job_signaled = false; > > INIT_LIST_HEAD(&device_list); > > > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c > > index 1cc34de..bbcfb4f 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c > > @@ -30,6 +30,7 @@ > > #include "amdgpu_ras.h" > > #include "amdgpu_atomfirmware.h" > > #include "ivsrcid/nbio/irqsrcs_nbif_7_4.h" > > +#include <linux/kmod.h> > > > > const char *ras_error_string[] = { > > "none", > > @@ -154,6 +155,8 @@ static int amdgpu_ras_debugfs_ctrl_parse_data(struct file *f, > > op = 1; > > else if (sscanf(str, "inject %32s %8s", block_name, err) == 2) > > op = 2; > > + else if (sscanf(str, "reboot %32s", block_name) == 1) > > + op = 3; > > else if (str[0] && str[1] && str[2] && str[3]) > > /* ascii string, but commands are not matched. */ > > return -EINVAL; > > @@ -287,6 +290,9 @@ static ssize_t amdgpu_ras_debugfs_ctrl_write(struct file *f, const char __user * > > /* data.inject.address is offset instead of absolute gpu address */ > > ret = amdgpu_ras_error_inject(adev, &data.inject); > > break; > > + case 3: > > + amdgpu_ras_get_context(adev)->reboot = true; > > + break; > > default: > > ret = -EINVAL; > > break; > > @@ -1733,6 +1739,8 @@ int amdgpu_ras_fini(struct amdgpu_device *adev) > > void amdgpu_ras_global_ras_isr(struct amdgpu_device *adev) > > { > > if (atomic_cmpxchg(&amdgpu_ras_in_intr, 0, 1) == 0) { > > - DRM_WARN("RAS event of type ERREVENT_ATHUB_INTERRUPT detected! Stopping all GPU jobs.\n"); > > + DRM_WARN("RAS event of type ERREVENT_ATHUB_INTERRUPT detected!\n"); > > + > > + amdgpu_ras_reset_gpu(adev, false); > > } > > } > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h > > index 3ec2a87..a83ec99 100644 > > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h > > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h > > @@ -333,6 +333,7 @@ struct amdgpu_ras { > > struct mutex recovery_lock; > > > > uint32_t flags; > > + bool reboot; > > }; > > > > struct ras_fs_data { > > -- > > 2.7.4 > > _______________________________________________ amd-gfx mailing list amd-gfx@xxxxxxxxxxxxxxxxxxxxx https://lists.freedesktop.org/mailman/listinfo/amd-gfx