Am 2021-07-06 um 11:36 a.m. schrieb Colin Ian King: > Hi, > > Static analysis with Coverity on linux-next has found a potential null > pointer dereference in function svm_range_restore_pages in > drivers/gpu/drm/amd/amdkfd/kfd_svm.c from the following commit: > > commit d4ebc2007040a0aff01bfe1b194085d3867328fd > Author: Philip Yang <Philip.Yang@xxxxxxx> > Date: Tue Jun 22 00:12:32 2021 -0400 > > drm/amdkfd: implement counters for vm fault and migration > > The analysis is as follows: Thanks. Philip, see inline ... > > 2397 int > 2398 svm_range_restore_pages(struct amdgpu_device *adev, unsigned int pasid, > 2399 uint64_t addr) > 2400{ > 2401 struct mm_struct *mm = NULL; > 2402 struct svm_range_list *svms; > 2403 struct svm_range *prange; > 2404 struct kfd_process *p; > 2405 uint64_t timestamp; > 2406 int32_t best_loc; > 2407 int32_t gpuidx = MAX_GPU_INSTANCE; > 2408 bool write_locked = false; > 2409 int r = 0; > 2410 > > 1. Condition !(adev->kfd.dev->pgmap.type != 0), taking false branch. > > 2411 if (!KFD_IS_SVM_API_SUPPORTED(adev->kfd.dev)) { > 2412 pr_debug("device does not support SVM\n"); > 2413 return -EFAULT; > 2414 } > 2415 > 2416 p = kfd_lookup_process_by_pasid(pasid); > > 2. Condition !p, taking false branch. > > 2417 if (!p) { > 2418 pr_debug("kfd process not founded pasid 0x%x\n", pasid); > 2419 return -ESRCH; > 2420 } > > 3. Condition !p->xnack_enabled, taking false branch. > > 2421 if (!p->xnack_enabled) { > 2422 pr_debug("XNACK not enabled for pasid 0x%x\n", pasid); > 2423 return -EFAULT; > 2424 } > 2425 svms = &p->svms; > 2426 > > 4. Condition 0 /* __builtin_types_compatible_p() */, taking false > branch. > 5. Condition 1 /* __builtin_types_compatible_p() */, taking true branch. > 6. Falling through to end of if statement. > 7. Condition !!branch, taking false branch. > 8. Condition ({...; !!branch;}), taking false branch. > > 2427 pr_debug("restoring svms 0x%p fault address 0x%llx\n", svms, > addr); > 2428 > 2429 mm = get_task_mm(p->lead_thread); > > 9. Condition !mm, taking false branch. > > 2430 if (!mm) { > 2431 pr_debug("svms 0x%p failed to get mm\n", svms); > 2432 r = -ESRCH; > 2433 goto out; > 2434 } > 2435 > 2436 mmap_read_lock(mm); > 2437retry_write_locked: > 2438 mutex_lock(&svms->lock); > 2439 prange = svm_range_from_addr(svms, addr, NULL); > > 10. Condition !prange, taking true branch. > 18. Condition !prange, taking true branch. > 2440 if (!prange) { > 11. Condition 0 /* __builtin_types_compatible_p() */, taking false > branch. > 12. Condition 1 /* __builtin_types_compatible_p() */, taking true > branch. > 13. Falling through to end of if statement. > 14. Condition !!branch, taking false branch. > 15. Condition ({...; !!branch;}), taking false branch. > 19. Condition 0 /* __builtin_types_compatible_p() */, taking false > branch. > 20. Condition 1 /* __builtin_types_compatible_p() */, taking true > branch. > 21. Falling through to end of if statement. > 22. Condition !!branch, taking false branch. > 23. Condition ({...; !!branch;}), taking false branch. > > 2441 pr_debug("failed to find prange svms 0x%p address > [0x%llx]\n", > 2442 svms, addr); > > 16. Condition !write_locked, taking true branch. > 24. Condition !write_locked, taking false branch. > > 2443 if (!write_locked) { > 2444 /* Need the write lock to create new range > with MMU notifier. > 2445 * Also flush pending deferred work to make > sure the interval > 2446 * tree is up to date before we add a new range > 2447 */ > 2448 mutex_unlock(&svms->lock); > 2449 mmap_read_unlock(mm); > 2450 mmap_write_lock(mm); > 2451 write_locked = true; > > 17. Jumping to label retry_write_locked. > > 2452 goto retry_write_locked; > 2453 } > 2454 prange = svm_range_create_unregistered_range(adev, > p, mm, addr); > > 25. Condition !prange, taking true branch. > 26. var_compare_op: Comparing prange to null implies that prange > might be null. > > 2455 if (!prange) { > > 27. Condition 0 /* __builtin_types_compatible_p() */, taking false > branch. > 28. Condition 1 /* __builtin_types_compatible_p() */, taking true > branch. > 29. Falling through to end of if statement. > 30. Condition !!branch, taking false branch. > 31. Condition ({...; !!branch;}), taking false branch. > > 2456 pr_debug("failed to create unregistered > range svms 0x%p address [0x%llx]\n", > 2457 svms, addr); > 2458 mmap_write_downgrade(mm); > 2459 r = -EFAULT; > > 32. Jumping to label out_unlock_svms. > > 2460 goto out_unlock_svms; > 2461 } > 2462 } > 2463 if (write_locked) > 2464 mmap_write_downgrade(mm); > 2465 > 2466 mutex_lock(&prange->migrate_mutex); > 2467 > 2468 if (svm_range_skip_recover(prange)) { > 2469 amdgpu_gmc_filter_faults_remove(adev, addr, pasid); > 2470 goto out_unlock_range; > 2471 } > 2472 > 2473 timestamp = ktime_to_us(ktime_get()) - > prange->validate_timestamp; > 2474 /* skip duplicate vm fault on different pages of same range */ > 2475 if (timestamp < AMDGPU_SVM_RANGE_RETRY_FAULT_PENDING) { > 2476 pr_debug("svms 0x%p [0x%lx %lx] already restored\n", > 2477 svms, prange->start, prange->last); > 2478 goto out_unlock_range; > 2479 } > 2480 > 2481 best_loc = svm_range_best_restore_location(prange, adev, > &gpuidx); > 2482 if (best_loc == -1) { > 2483 pr_debug("svms %p failed get best restore loc [0x%lx > 0x%lx]\n", > 2484 svms, prange->start, prange->last); > 2485 r = -EACCES; > 2486 goto out_unlock_range; > 2487 } > 2488 > 2489 pr_debug("svms %p [0x%lx 0x%lx] best restore 0x%x, actual > loc 0x%x\n", > 2490 svms, prange->start, prange->last, best_loc, > 2491 prange->actual_loc); > 2492 > 2493 if (prange->actual_loc != best_loc) { > 2494 if (best_loc) { > 2495 r = svm_migrate_to_vram(prange, best_loc, mm); > 2496 if (r) { > 2497 pr_debug("svm_migrate_to_vram failed > (%d) at %llx, falling back to system memory\n", > 2498 r, addr); > 2499 /* Fallback to system memory if > migration to > 2500 * VRAM failed > 2501 */ > 2502 if (prange->actual_loc) > 2503 r = > svm_migrate_vram_to_ram(prange, mm); > 2504 else > 2505 r = 0; > 2506 } > 2507 } else { > 2508 r = svm_migrate_vram_to_ram(prange, mm); > 2509 } > 2510 if (r) { > 2511 pr_debug("failed %d to migrate svms %p > [0x%lx 0x%lx]\n", > 2512 r, svms, prange->start, prange->last); > 2513 goto out_unlock_range; > 2514 } > 2515 } > 2516 > 2517 r = svm_range_validate_and_map(mm, prange, gpuidx, false, > false); > 2518 if (r) > 2519 pr_debug("failed %d to map svms 0x%p [0x%lx 0x%lx] > to gpus\n", > 2520 r, svms, prange->start, prange->last); > 2521 > 2522out_unlock_range: > 2523 mutex_unlock(&prange->migrate_mutex); > 2524out_unlock_svms: > 2525 mutex_unlock(&svms->lock); > 2526 mmap_read_unlock(mm); > 2527 > > Dereference after null check (FORWARD_NULL) > 33. var_deref_model: Passing null pointer prange to > svm_range_count_fault, which dereferences it. > > 2528 svm_range_count_fault(adev, p, prange, gpuidx); Looks like you need to add a NULL check for prange here. Regards, Felix > > > The jump in line 2460 to out_unlock_svms will occur if prange is null, > however, calling svm_range_count_fault with a null prange will cause a > null pointer deference when it calls svm_range_get_pdd_by_adev and > dereferences the pointer as follows: > > p = container_of(prange->svms, struct kfd_process, svms); > > Colin >