Signed-off-by: Alex Xie <AlexBin.Xie at amd.com> --- drivers/gpu/drm/amd/amdgpu/amdgpu.h | 5 +- drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c | 110 +++++++++++++++++++---------- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 1 - 3 files changed, 76 insertions(+), 40 deletions(-) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 7caf514..21d318b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -1588,9 +1588,8 @@ struct amdgpu_device { /* data for buffer migration throttling */ struct { - spinlock_t lock; - s64 last_update_us; - s64 accum_us; /* accumulated microseconds */ + atomic64_t last_update_us; + atomic64_t accum_us; /* accumulated microseconds */ u32 log2_max_MBps; } mm_stats; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index 82131d7..7b6f42e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -225,6 +225,9 @@ static u64 amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev) s64 time_us, increment_us; u64 max_bytes; u64 free_vram, total_vram, used_vram; + s64 old_update_us, head_time_us; + s64 accum_us; + s64 old_accum_us, head_accum_us; /* Allow a maximum of 200 accumulated ms. This is basically per-IB * throttling. @@ -242,47 +245,83 @@ static u64 amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev) used_vram = atomic64_read(&adev->vram_usage); free_vram = used_vram >= total_vram ? 0 : total_vram - used_vram; - spin_lock(&adev->mm_stats.lock); - /* Increase the amount of accumulated us. */ - time_us = ktime_to_us(ktime_get()); - increment_us = time_us - adev->mm_stats.last_update_us; - adev->mm_stats.last_update_us = time_us; - adev->mm_stats.accum_us = min(adev->mm_stats.accum_us + increment_us, - us_upper_bound); - - /* This prevents the short period of low performance when the VRAM - * usage is low and the driver is in debt or doesn't have enough - * accumulated us to fill VRAM quickly. - * - * The situation can occur in these cases: - * - a lot of VRAM is freed by userspace - * - the presence of a big buffer causes a lot of evictions - * (solution: split buffers into smaller ones) - * - * If 128 MB or 1/8th of VRAM is free, start filling it now by setting - * accum_us to a positive number. - */ - if (free_vram >= 128 * 1024 * 1024 || free_vram >= total_vram / 8) { - s64 min_us; - - /* Be more aggresive on dGPUs. Try to fill a portion of free - * VRAM now. - */ - if (!(adev->flags & AMD_IS_APU)) - min_us = bytes_to_us(adev, free_vram / 4); + old_update_us = atomic64_read(&adev->mm_stats.last_update_us); + for (;;) { + time_us = ktime_to_us(ktime_get()); + head_time_us = atomic64_cmpxchg(&adev->mm_stats.last_update_us, + old_update_us, time_us); + + if (likely(head_time_us == old_update_us)) + /* + * No other task modified adev->mm_stats.last_update_us. + * Update was successful. + */ + break; else - min_us = 0; /* Reset accum_us on APUs. */ + /* Another task modified the value after we read it. + * A rare contention happens, let us retry. + * In most case, one retry can do the job. + * See function atomic64_add_unless as a similar idea. + */ + old_update_us = head_time_us; + } + increment_us = time_us - old_update_us; + + old_accum_us = atomic64_read(&adev->mm_stats.accum_us); + + for (;;) { + accum_us = min(old_accum_us + increment_us, us_upper_bound); + + /* This prevents the short period of low performance when the + * VRAM usage is low and the driver is in debt or doesn't have + * enough accumulated us to fill VRAM quickly. + * + * The situation can occur in these cases: + * - a lot of VRAM is freed by userspace + * - the presence of a big buffer causes a lot of evictions + * (solution: split buffers into smaller ones) + * + * If 128 MB or 1/8th of VRAM is free, start filling it now by + * setting accum_us to a positive number. + */ + if (free_vram >= 128 * 1024 * 1024 || + free_vram >= total_vram / 8) { + s64 min_us; + + /* Be more aggresive on dGPUs. Try to fill a portion of + * free VRAM now. + */ + if (!(adev->flags & AMD_IS_APU)) + min_us = bytes_to_us(adev, free_vram / 4); + else + min_us = 0; /* Reset accum_us on APUs. */ + + accum_us = max(min_us, accum_us); + } + + head_accum_us = atomic64_cmpxchg(&adev->mm_stats.accum_us, + old_accum_us, accum_us); - adev->mm_stats.accum_us = max(min_us, adev->mm_stats.accum_us); + if (likely(head_accum_us == old_accum_us)) + /* + * No other task modified adev->mm_stats.accum_us. + * Update was successful. + */ + break; + else + /* Another task modified the value after we read it. + * A rare contention happens, let us retry. + * In most case, one retry can do the job. + * See function atomic64_add_unless as a similar idea. + */ + old_accum_us = head_accum_us; } /* This returns 0 if the driver is in debt to disallow (optional) * buffer moves. */ - max_bytes = us_to_bytes(adev, adev->mm_stats.accum_us); - - spin_unlock(&adev->mm_stats.lock); + max_bytes = us_to_bytes(adev, accum_us); return max_bytes; } @@ -292,9 +331,8 @@ static u64 amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev) */ void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64 num_bytes) { - spin_lock(&adev->mm_stats.lock); - adev->mm_stats.accum_us -= bytes_to_us(adev, num_bytes); - spin_unlock(&adev->mm_stats.lock); + s64 i = bytes_to_us(adev, num_bytes); + atomic64_sub(i, &adev->mm_stats.accum_us); } static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index ff90f78..9e9d592 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -2117,7 +2117,6 @@ int amdgpu_device_init(struct amdgpu_device *adev, spin_lock_init(&adev->didt_idx_lock); spin_lock_init(&adev->gc_cac_idx_lock); spin_lock_init(&adev->audio_endpt_idx_lock); - spin_lock_init(&adev->mm_stats.lock); INIT_LIST_HEAD(&adev->shadow_list); mutex_init(&adev->shadow_list_lock); -- 2.7.4