The kfdtest user queue validation cases don't cover those error condition path, thanks for catching it.
This patch is
Reviewed-by: Philip Yang <Philip.Yang@xxxxxxx>
On 2024-07-26 02:47, Srinivasan
Shanmugam wrote:
The fix involves setting 'err' to '-EINVAL' before each 'goto out_err_unreserve'. Fixes the below: drivers/gpu/drm/amd/amdgpu/../amdkfd/kfd_queue.c:265 kfd_queue_acquire_buffers() warn: missing error code 'err' drivers/gpu/drm/amd/amdgpu/../amdkfd/kfd_queue.c 226 int kfd_queue_acquire_buffers(struct kfd_process_device *pdd, struct queue_properties *properties) 227 { 228 struct kfd_topology_device *topo_dev; 229 struct amdgpu_vm *vm; 230 u32 total_cwsr_size; 231 int err; 232 233 topo_dev = kfd_topology_device_by_id(pdd->dev->id); 234 if (!topo_dev) 235 return -EINVAL; 236 237 vm = drm_priv_to_vm(pdd->drm_priv); 238 err = amdgpu_bo_reserve(vm->root.bo, false); 239 if (err) 240 return err; 241 242 err = kfd_queue_buffer_get(vm, properties->write_ptr, &properties->wptr_bo, PAGE_SIZE); 243 if (err) 244 goto out_err_unreserve; 245 246 err = kfd_queue_buffer_get(vm, properties->read_ptr, &properties->rptr_bo, PAGE_SIZE); 247 if (err) 248 goto out_err_unreserve; 249 250 err = kfd_queue_buffer_get(vm, (void *)properties->queue_address, 251 &properties->ring_bo, properties->queue_size); 252 if (err) 253 goto out_err_unreserve; 254 255 /* only compute queue requires EOP buffer and CWSR area */ 256 if (properties->type != KFD_QUEUE_TYPE_COMPUTE) 257 goto out_unreserve; This is clearly a success path. 258 259 /* EOP buffer is not required for all ASICs */ 260 if (properties->eop_ring_buffer_address) { 261 if (properties->eop_ring_buffer_size != topo_dev->node_props.eop_buffer_size) { 262 pr_debug("queue eop bo size 0x%lx not equal to node eop buf size 0x%x\n", 263 properties->eop_buf_bo->tbo.base.size, 264 topo_dev->node_props.eop_buffer_size); --> 265 goto out_err_unreserve; This has err in the label name. err = -EINVAL? 266 } 267 err = kfd_queue_buffer_get(vm, (void *)properties->eop_ring_buffer_address, 268 &properties->eop_buf_bo, 269 properties->eop_ring_buffer_size); 270 if (err) 271 goto out_err_unreserve; 272 } 273 274 if (properties->ctl_stack_size != topo_dev->node_props.ctl_stack_size) { 275 pr_debug("queue ctl stack size 0x%x not equal to node ctl stack size 0x%x\n", 276 properties->ctl_stack_size, 277 topo_dev->node_props.ctl_stack_size); 278 goto out_err_unreserve; err? 279 } 280 281 if (properties->ctx_save_restore_area_size != topo_dev->node_props.cwsr_size) { 282 pr_debug("queue cwsr size 0x%x not equal to node cwsr size 0x%x\n", 283 properties->ctx_save_restore_area_size, 284 topo_dev->node_props.cwsr_size); 285 goto out_err_unreserve; err? Not sure. 286 } 287 288 total_cwsr_size = (topo_dev->node_props.cwsr_size + topo_dev->node_props.debug_memory_size) 289 * NUM_XCC(pdd->dev->xcc_mask); 290 total_cwsr_size = ALIGN(total_cwsr_size, PAGE_SIZE); 291 292 err = kfd_queue_buffer_get(vm, (void *)properties->ctx_save_restore_area_address, 293 &properties->cwsr_bo, total_cwsr_size); 294 if (!err) 295 goto out_unreserve; 296 297 amdgpu_bo_unreserve(vm->root.bo); 298 299 err = kfd_queue_buffer_svm_get(pdd, properties->ctx_save_restore_area_address, 300 total_cwsr_size); 301 if (err) 302 goto out_err_release; 303 304 return 0; 305 306 out_unreserve: 307 amdgpu_bo_unreserve(vm->root.bo); 308 return 0; 309 310 out_err_unreserve: 311 amdgpu_bo_unreserve(vm->root.bo); 312 out_err_release: 313 kfd_queue_release_buffers(pdd, properties); 314 return err; 315 } Fixes: 629568d25fea ("drm/amdkfd: Validate queue cwsr area and eop buffer size") Reported-by: Dan Carpenter <dan.carpenter@xxxxxxxxxx> Cc: Philip Yang <Philip.Yang@xxxxxxx> Cc: Felix Kuehling <Felix.Kuehling@xxxxxxx> Cc: Christian König <christian.koenig@xxxxxxx> Cc: Alex Deucher <alexander.deucher@xxxxxxx> Signed-off-by: Srinivasan Shanmugam <srinivasan.shanmugam@xxxxxxx> --- drivers/gpu/drm/amd/amdkfd/kfd_queue.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_queue.c index 9807e8adf77d..63795f0cd55a 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_queue.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_queue.c @@ -262,6 +262,7 @@ int kfd_queue_acquire_buffers(struct kfd_process_device *pdd, struct queue_prope pr_debug("queue eop bo size 0x%lx not equal to node eop buf size 0x%x\n", properties->eop_buf_bo->tbo.base.size, topo_dev->node_props.eop_buffer_size); + err = -EINVAL; goto out_err_unreserve; } err = kfd_queue_buffer_get(vm, (void *)properties->eop_ring_buffer_address, @@ -275,6 +276,7 @@ int kfd_queue_acquire_buffers(struct kfd_process_device *pdd, struct queue_prope pr_debug("queue ctl stack size 0x%x not equal to node ctl stack size 0x%x\n", properties->ctl_stack_size, topo_dev->node_props.ctl_stack_size); + err = -EINVAL; goto out_err_unreserve; } @@ -282,6 +284,7 @@ int kfd_queue_acquire_buffers(struct kfd_process_device *pdd, struct queue_prope pr_debug("queue cwsr size 0x%x not equal to node cwsr size 0x%x\n", properties->ctx_save_restore_area_size, topo_dev->node_props.cwsr_size); + err = -EINVAL; goto out_err_unreserve; }