On 2023-02-27 18:07, Xiaogang.Chen wrote:
From: Xiaogang Chen <xiaogang.chen@xxxxxxx>
svm_migrate_ram_to_vram migrate a prange from sys ram to vram. The prange may
cross multiple vma. Need remember current dst vram offset in page for each migration.
Good catch. It's not the offset in the page, but the offset in the TTM
resource, I think. See more nit-picks inline.
Signed-off-by: Xiaogang Chen <Xiaogang.Chen@xxxxxxx>
---
drivers/gpu/drm/amd/amdkfd/kfd_migrate.c | 17 ++++++++++-------
1 file changed, 10 insertions(+), 7 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
index 1c625433ff30..60664e0cbc1c 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_migrate.c
@@ -294,7 +294,7 @@ static unsigned long svm_migrate_unsuccessful_pages(struct migrate_vma *migrate)
static int
svm_migrate_copy_to_vram(struct amdgpu_device *adev, struct svm_range *prange,
struct migrate_vma *migrate, struct dma_fence **mfence,
- dma_addr_t *scratch)
+ dma_addr_t *scratch, uint64_t *cur_dst)
The name cur_dst is a bit confusing. There is a local variable "dst" in
this function, which has a very different meaning. It's the pointer to
an array of VRAM physical addresses. A better name for cur_dst would be
ttm_res_offset, as it is the offset from the start of the TTM resource.
I'd prefer if it was not a pointer. The calculations to increment the
offset should be done at the top level, where it iterates through the
VMAs. This low level code doesn't need to make any assumptions about how
that iteration works.
{
uint64_t npages = migrate->npages;
struct device *dev = adev->dev;
@@ -304,8 +304,8 @@ svm_migrate_copy_to_vram(struct amdgpu_device *adev, struct svm_range *prange,
uint64_t i, j;
int r;
- pr_debug("svms 0x%p [0x%lx 0x%lx]\n", prange->svms, prange->start,
- prange->last);
+ pr_debug("svms 0x%p [0x%lx 0x%lx 0x%lx]\n", prange->svms, prange->start,
+ prange->last, *cur_dst);
src = scratch;
dst = (uint64_t *)(scratch + npages);
@@ -316,7 +316,7 @@ svm_migrate_copy_to_vram(struct amdgpu_device *adev, struct svm_range *prange,
goto out;
}
- amdgpu_res_first(prange->ttm_res, prange->offset << PAGE_SHIFT,
+ amdgpu_res_first(prange->ttm_res, *cur_dst << PAGE_SHIFT,
Maybe do the PAGE_SHIFT in the caller, where ttm_res_offset is
calculated and updated.
npages << PAGE_SHIFT, &cursor);
for (i = j = 0; i < npages; i++) {
struct page *spage;
@@ -381,6 +381,7 @@ svm_migrate_copy_to_vram(struct amdgpu_device *adev, struct svm_range *prange,
migrate->dst[i] = 0;
}
}
+ *cur_dst = *cur_dst + i;
#ifdef DEBUG_FORCE_MIXED_DOMAINS
for (i = 0, j = 0; i < npages; i += 4, j++) {
@@ -403,7 +404,7 @@ svm_migrate_copy_to_vram(struct amdgpu_device *adev, struct svm_range *prange,
static long
svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct svm_range *prange,
struct vm_area_struct *vma, uint64_t start,
- uint64_t end, uint32_t trigger)
+ uint64_t end, uint32_t trigger, uint64_t *cur_dst)
Same as above.
{
struct kfd_process *p = container_of(prange->svms, struct kfd_process, svms);
uint64_t npages = (end - start) >> PAGE_SHIFT;
@@ -456,7 +457,7 @@ svm_migrate_vma_to_vram(struct amdgpu_device *adev, struct svm_range *prange,
else
pr_debug("0x%lx pages migrated\n", cpages);
- r = svm_migrate_copy_to_vram(adev, prange, &migrate, &mfence, scratch);
+ r = svm_migrate_copy_to_vram(adev, prange, &migrate, &mfence, scratch, cur_dst);
migrate_vma_pages(&migrate);
pr_debug("successful/cpages/npages 0x%lx/0x%lx/0x%lx\n",
@@ -504,6 +505,7 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc,
unsigned long addr, start, end;
struct vm_area_struct *vma;
struct amdgpu_device *adev;
+ uint64_t cur_dst;
unsigned long cpages = 0;
long r = 0;
@@ -524,6 +526,7 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc,
start = prange->start << PAGE_SHIFT;
end = (prange->last + 1) << PAGE_SHIFT;
+ cur_dst = prange->offset;
You could do the PAGE_SHIFT here.
for (addr = start; addr < end;) {
unsigned long next;
@@ -533,7 +536,7 @@ svm_migrate_ram_to_vram(struct svm_range *prange, uint32_t best_loc,
break;
next = min(vma->vm_end, end);
- r = svm_migrate_vma_to_vram(adev, prange, vma, addr, next, trigger);
+ r = svm_migrate_vma_to_vram(adev, prange, vma, addr, next, trigger, &cur_dst);
if (r < 0) {
pr_debug("failed %ld to migrate\n", r);
break;
Do the increment somewhere here, before next is assigned to addr.
ttm_res_offset += next - addr;
Regards,
Felix