Hi Slava, The two x86_64 systems I tried are: - System One Tyan S7025 with dual Xeon X5675 and 48GB registered ECC memory, with a NVIDIA GTX 1050Ti 4GB(also used for display) and an AMD RX 550 4GB Running standard Ubuntu 16.04.4 with kernels linux-image-4.13.0-38-generic and linux-image-4.4.0-122-generic, mesa-17.2.8-0ubuntu0, libdrm-2.4.83-1 and amdgpu-pro 17.50/amdgpu-pro 18.10 lsb_release -a Description: Ubuntu 16.04.4 LTS BIOS configuration: ACPI enabled v3.0 ACPI APIC support Enabled ACPI SRAT table Enabled SR-IOV Enabled Intel VT-d Disabled PCI MMIO 64 Bits support Disabled - System Two Tyan S7002 with dual Xeon X5670 and 12GB registered ECC memory, with an AMD RX 480 Running Ubuntu 18.04 with kernels vanilla 4.16.7 and linux-image-4.15.0-20-generic, mesa-18.0.0~rc5-1ubuntu1, libdrm-2.4.91-2 and mesa-opencl-icd, libclc-0.2.0+git20180312-1 BIOS configuration: ACPI enabled v2.0 ACPI APIC support Enabled ACPI SRAT table Enabled SR-IOV Enabled Intel VT-d Disabled PCI MMIO 64 Bits support Disabled amdgpu-pro-install --headless --opencl=legacy When I try to run the attached openCL code (which computes a cross-correlation between two square matrices directly by cross correlation function definition), the GPU hangs, but there are also other kernels where this also happens. As soon as I try to run the kernel the system hangs at the first kernel computation on all the two systems, and after a couple of seconds dmesg shows: [drm:amdgpu_job_timedout [amdgpu]] *ERROR* ring gfx timeout, last signaled seq=2, last emitted seq=3 [drm] IP block:gmc_v8_0 is hung! [drm] IP block:tonga_ih is hung! [drm] IP block:gfx_v8_0 is hung! [drm] IP block:sdma_v3_0 is hung! [drm] IP block:uvd_v6_0 is hung! [drm] IP block:vce_v3_0 is hung! [drm] GPU recovery disabled. - On another system with armhf 32 bits, 1GB ram, 512GB SSD, AMD RX 480 or AMD RX 550 with Ubuntu 17.10, vanilla kernel 4.16.7, mesa-18.0.2, libdrm-2.4.92-git, libclc-git at commit 3d994f2ff2cbb4531223fe2657144cb19f0c5328 (15/Nov/2017) The kernels work properly on the same AMD cards. On Fri, May 4, 2018 at 7:18 PM, Abramov, Slava <Slava.Abramov at amd.com> wrote: > Luis, > > > Can you please provide more details on your system environment and steps on > configuring the software and reproducing the issue? > > > > Slava A > > ________________________________ > From: amd-gfx <amd-gfx-bounces at lists.freedesktop.org> on behalf of LuÃs > Mendes <luis.p.mendes at gmail.com> > Sent: Friday, May 4, 2018 12:27:47 PM > To: amd-gfx list; Koenig, Christian; Michel Dänzer > Subject: GPU hang trying to run OpenCL kernels on x86_64 > > Hi, > > I am a collaborator with Syncleus/aparapi project on github and I've > been testing OpenCL on AMD and NVIDIA cards. > > Currently I have a set of kernels that hang the GPU (AMD RX 460 and > AMD RX 550) across all compute units on x86_64 running vanilla kernel > 4.16.7 on Ubuntu 18.04, also on Ubuntu 16.04.4 with AMDGPU PRO 17.50 > and 18.10 show the same problems, in fact, AMDGPU-PRO 18.10 is even > worse. > > However the same set of kernels run happily on armhf with vanilla > Linux 4.16.7 and mesa 18.0 (mesa-opencl-icd and libclc for amdgcn), > Ubuntu 17.10, on an AMD RX460 and an AMD RX 550. > > LuÃs Mendes > _______________________________________________ > amd-gfx mailing list > amd-gfx at lists.freedesktop.org > https://lists.freedesktop.org/mailman/listinfo/amd-gfx -------------- next part -------------- May 04, 2018 10:37:41 PM com.aparapi.internal.kernel.KernelRunner executeInternalInner INFO: typedef struct This_s{ __global int *tilesGeometry; __global int *inputGeometry; __global int *threadOutputStart; __global int *outputGeometry; __global int *threadOffsetI; __global int *threadOffsetJ; __global float *matrixInF; __global float *matrixInG; __global float *matrixOut; int passid; }This; int get_pass_id(This *this){ return this->passid; } short pt_ist_ceris_vipivist_jobs_CrossCorrelationKernel__signX(This *this, short x){ short value = (short)((x + x) + 1); return((short)(value / abs(value))); } short pt_ist_ceris_vipivist_jobs_CrossCorrelationKernel__relocateX(This *this, short x, short dimX){ short result = (short)(((pt_ist_ceris_vipivist_jobs_CrossCorrelationKernel__signX(this, x) + 1) * (x + 1)) / 2); result = (short)(((pt_ist_ceris_vipivist_jobs_CrossCorrelationKernel__signX(this, (short)(dimX - result)) + 1) * result) / 2); return(result); } __kernel void run( __global int *tilesGeometry, __global int *inputGeometry, __global int *threadOutputStart, __global int *outputGeometry, __global int *threadOffsetI, __global int *threadOffsetJ, __global float *matrixInF, __global float *matrixInG, __global float *matrixOut, int passid ){ This thisStruct; This* this=&thisStruct; this->tilesGeometry = tilesGeometry; this->inputGeometry = inputGeometry; this->threadOutputStart = threadOutputStart; this->outputGeometry = outputGeometry; this->threadOffsetI = threadOffsetI; this->threadOffsetJ = threadOffsetJ; this->matrixInF = matrixInF; this->matrixInG = matrixInG; this->matrixOut = matrixOut; this->passid = passid; { int sizeI = get_global_size(1); int sizeJ = get_global_size(0); int i = get_global_id(1); int j = get_global_id(0); int k = get_global_id(2); int il = get_local_id(0); int jl = get_local_id(1); int matrixInputStart = ((((k * this->tilesGeometry[0]) * this->tilesGeometry[1]) * (this->inputGeometry[1] + 1)) * (this->inputGeometry[0] + 1)) + (((this->threadOutputStart[((i * sizeJ) + j)] / (this->outputGeometry[0] * this->outputGeometry[1])) * (this->inputGeometry[0] + 1)) * (this->inputGeometry[1] + 1)); int matrixOutputStart = ((((k * this->tilesGeometry[0]) * this->tilesGeometry[1]) * this->outputGeometry[0]) * this->outputGeometry[1]) + this->threadOutputStart[((i * sizeJ) + j)]; short subMatrixI = (short)this->threadOffsetI[i]; short subMatrixJ = (short)this->threadOffsetJ[j]; float accum = 0.0f; for (short indexN = (short)(-this->outputGeometry[0] / 2); indexN<=(this->outputGeometry[0] / 2); indexN = (short)(indexN + 1)){ for (short indexM = (short)(-this->outputGeometry[1] / 2); indexM<=(this->outputGeometry[1] / 2); indexM = (short)(indexM + 1)){ short fi = pt_ist_ceris_vipivist_jobs_CrossCorrelationKernel__relocateX(this, indexN, (short)this->inputGeometry[0]); short fj = pt_ist_ceris_vipivist_jobs_CrossCorrelationKernel__relocateX(this, indexM, (short)this->inputGeometry[1]); short gi = pt_ist_ceris_vipivist_jobs_CrossCorrelationKernel__relocateX(this, (short)(subMatrixI + indexN), (short)this->inputGeometry[0]); short gj = pt_ist_ceris_vipivist_jobs_CrossCorrelationKernel__relocateX(this, (short)(subMatrixJ + indexM), (short)this->inputGeometry[1]); accum = accum + (this->matrixInF[((matrixInputStart + (fi * (this->inputGeometry[1] + 1))) + fj)] * this->matrixInG[((matrixInputStart + (gi * (this->inputGeometry[1] + 1))) + gj)]); } } int outIndex = (matrixOutputStart + ((subMatrixI + (this->outputGeometry[0] / 2)) * this->outputGeometry[1])) + (subMatrixJ + (this->outputGeometry[1] / 2)); this->matrixOut[outIndex] = accum; return; } }