On Wed, Jun 1, 2022 at 4:53 PM David Yat Sin <david.yatsin@xxxxxxx> wrote: > > From: Daniel Phillips <Daniel.Phillips@xxxxxxx> > > Add a new KFD ioctl to return the largest possible memory size that > can be allocated as a buffer object using > kfd_ioctl_alloc_memory_of_gpu. It attempts to use exactly the same > accept/reject criteria as that function so that allocating a new > buffer object of the size returned by this new ioctl is guaranteed to > succeed, barring races with other allocating tasks. > > Signed-off-by: Daniel Phillips <Daniel.Phillips@xxxxxxx> > Signed-off-by: David Yat Sin <david.yatsin@xxxxxxx> Got a link to the new UMD code which uses this? Please include that in the commit message. Thanks, Alex > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h | 1 + > .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 37 +++++++++++++++++-- > drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 34 +++++++++++++++++ > include/uapi/linux/kfd_ioctl.h | 14 ++++++- > 4 files changed, 80 insertions(+), 6 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h > index f8b9f27adcf5..0b0ab1de76ca 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h > @@ -266,6 +266,7 @@ int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct amdgpu_device *adev, > void amdgpu_amdkfd_gpuvm_release_process_vm(struct amdgpu_device *adev, > void *drm_priv); > uint64_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *drm_priv); > +size_t amdgpu_amdkfd_get_available_memory(struct amdgpu_device *adev); > int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( > struct amdgpu_device *adev, uint64_t va, uint64_t size, > void *drm_priv, struct kgd_mem **mem, > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c > index 34ba9e776521..105af82d41a4 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c > @@ -38,6 +38,12 @@ > */ > #define AMDGPU_USERPTR_RESTORE_DELAY_MS 1 > > +/* > + * Align VRAM allocations to 2MB to avoid fragmentation caused by 4K allocations in the tail 2MB > + * BO chunk > + */ > +#define VRAM_ALLOCATION_ALIGN (1 << 21) > + > /* Impose limit on how much memory KFD can use */ > static struct { > uint64_t max_system_mem_limit; > @@ -108,7 +114,7 @@ void amdgpu_amdkfd_reserve_system_mem(uint64_t size) > * compromise that should work in most cases without reserving too > * much memory for page tables unnecessarily (factor 16K, >> 14). > */ > -#define ESTIMATE_PT_SIZE(mem_size) ((mem_size) >> 14) > +#define ESTIMATE_PT_SIZE(mem_size) max(((mem_size) >> 14), AMDGPU_VM_RESERVED_VRAM) > > static size_t amdgpu_amdkfd_acc_size(uint64_t size) > { > @@ -148,7 +154,12 @@ static int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev, > } else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) { > system_mem_needed = acc_size; > ttm_mem_needed = acc_size; > - vram_needed = size; > + > + /* > + * Conservatively round up the allocation requirement to 2 MB to avoid fragmentation > + * caused by 4K allocations in the tail 2M BO chunk > + */ > + vram_needed = ALIGN(size, VRAM_ALLOCATION_ALIGN); > } else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) { > system_mem_needed = acc_size + size; > ttm_mem_needed = acc_size; > @@ -173,7 +184,9 @@ static int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev, > (kfd_mem_limit.ttm_mem_used + ttm_mem_needed > > kfd_mem_limit.max_ttm_mem_limit) || > (adev->kfd.vram_used + vram_needed > > - adev->gmc.real_vram_size - reserved_for_pt)) { > + adev->gmc.real_vram_size - > + atomic64_read(&adev->vram_pin_size) - > + reserved_for_pt)) { > ret = -ENOMEM; > goto release; > } > @@ -205,7 +218,7 @@ static void unreserve_mem_limit(struct amdgpu_device *adev, > } else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_VRAM) { > kfd_mem_limit.system_mem_used -= acc_size; > kfd_mem_limit.ttm_mem_used -= acc_size; > - adev->kfd.vram_used -= size; > + adev->kfd.vram_used -= ALIGN(size, VRAM_ALLOCATION_ALIGN); > } else if (alloc_flag & KFD_IOC_ALLOC_MEM_FLAGS_USERPTR) { > kfd_mem_limit.system_mem_used -= (acc_size + size); > kfd_mem_limit.ttm_mem_used -= acc_size; > @@ -1492,6 +1505,22 @@ int amdgpu_amdkfd_criu_resume(void *p) > return ret; > } > > +size_t amdgpu_amdkfd_get_available_memory(struct amdgpu_device *adev) > +{ > + uint64_t reserved_for_pt = > + ESTIMATE_PT_SIZE(amdgpu_amdkfd_total_mem_size); > + size_t available; > + > + spin_lock(&kfd_mem_limit.mem_limit_lock); > + available = adev->gmc.real_vram_size > + - adev->kfd.vram_used > + - atomic64_read(&adev->vram_pin_size) > + - reserved_for_pt; > + spin_unlock(&kfd_mem_limit.mem_limit_lock); > + > + return ALIGN_DOWN(available, VRAM_ALLOCATION_ALIGN); > +} > + > int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( > struct amdgpu_device *adev, uint64_t va, uint64_t size, > void *drm_priv, struct kgd_mem **mem, > diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c > index 95fa7a9718bb..625e837f0119 100644 > --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c > +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c > @@ -65,6 +65,25 @@ static int kfd_char_dev_major = -1; > static struct class *kfd_class; > struct device *kfd_device; > > +static inline struct kfd_process_device *kfd_lock_pdd_by_id(struct kfd_process *p, __u32 gpu_id) > +{ > + struct kfd_process_device *pdd; > + > + mutex_lock(&p->mutex); > + pdd = kfd_process_device_data_by_id(p, gpu_id); > + > + if (pdd) > + return pdd; > + > + mutex_unlock(&p->mutex); > + return NULL; > +} > + > +static inline void kfd_unlock_pdd(struct kfd_process_device *pdd) > +{ > + mutex_unlock(&pdd->process->mutex); > +} > + > int kfd_chardev_init(void) > { > int err = 0; > @@ -958,6 +977,19 @@ bool kfd_dev_is_large_bar(struct kfd_dev *dev) > return false; > } > > +static int kfd_ioctl_get_available_memory(struct file *filep, > + struct kfd_process *p, void *data) > +{ > + struct kfd_ioctl_get_available_memory_args *args = data; > + struct kfd_process_device *pdd = kfd_lock_pdd_by_id(p, args->gpu_id); > + > + if (!pdd) > + return -EINVAL; > + args->available = amdgpu_amdkfd_get_available_memory(pdd->dev->adev); > + kfd_unlock_pdd(pdd); > + return 0; > +} > + > static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep, > struct kfd_process *p, void *data) > { > @@ -2642,6 +2674,8 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = { > AMDKFD_IOCTL_DEF(AMDKFD_IOC_CRIU_OP, > kfd_ioctl_criu, KFD_IOC_FLAG_CHECKPOINT_RESTORE), > > + AMDKFD_IOCTL_DEF(AMDKFD_IOC_AVAILABLE_MEMORY, > + kfd_ioctl_get_available_memory, 0), > }; > > #define AMDKFD_CORE_IOCTL_COUNT ARRAY_SIZE(amdkfd_ioctls) > diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h > index eb9ff85f8556..c648ed7c5ff1 100644 > --- a/include/uapi/linux/kfd_ioctl.h > +++ b/include/uapi/linux/kfd_ioctl.h > @@ -34,9 +34,10 @@ > * - 1.6 - Query clear flags in SVM get_attr API > * - 1.7 - Checkpoint Restore (CRIU) API > * - 1.8 - CRIU - Support for SDMA transfers with GTT BOs > + * - 1.9 - Add available memory ioctl > */ > #define KFD_IOCTL_MAJOR_VERSION 1 > -#define KFD_IOCTL_MINOR_VERSION 8 > +#define KFD_IOCTL_MINOR_VERSION 9 > > struct kfd_ioctl_get_version_args { > __u32 major_version; /* from KFD */ > @@ -100,6 +101,12 @@ struct kfd_ioctl_get_queue_wave_state_args { > __u32 pad; > }; > > +struct kfd_ioctl_get_available_memory_args { > + __u64 available; /* from KFD */ > + __u32 gpu_id; /* to KFD */ > + __u32 pad; > +}; > + > /* For kfd_ioctl_set_memory_policy_args.default_policy and alternate_policy */ > #define KFD_IOC_CACHE_POLICY_COHERENT 0 > #define KFD_IOC_CACHE_POLICY_NONCOHERENT 1 > @@ -824,7 +831,10 @@ struct kfd_ioctl_set_xnack_mode_args { > #define AMDKFD_IOC_CRIU_OP \ > AMDKFD_IOWR(0x22, struct kfd_ioctl_criu_args) > > +#define AMDKFD_IOC_AVAILABLE_MEMORY \ > + AMDKFD_IOWR(0x23, struct kfd_ioctl_get_available_memory_args) > + > #define AMDKFD_COMMAND_START 0x01 > -#define AMDKFD_COMMAND_END 0x23 > +#define AMDKFD_COMMAND_END 0x24 > > #endif > -- > 2.30.2 >