Re: [PATCH 2/5] drm/amdkfd: enable per process SMI event

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Am 2022-01-14 um 3:38 p.m. schrieb Philip Yang:
> Process receive event log from same process by default. Add a flag to
> be able to receive event log from all processes, this requires super
> user permission.
>
> Event log with pid 0 send to all processes.
>
> Define new event log id, migration trigger, user queue eviction
> trigger, those new event log will be added in following patches.
>
> Update kfd_ioctl.h version.
>
> Signed-off-by: Philip Yang <Philip.Yang@xxxxxxx>
> ---
>  drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c | 28 +++++++++++++++------
>  include/uapi/linux/kfd_ioctl.h              | 27 ++++++++++++++++++++
>  2 files changed, 48 insertions(+), 7 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
> index 7023fa21a0a9..5818ea8ad4ce 100644
> --- a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c
> @@ -37,6 +37,8 @@ struct kfd_smi_client {
>  	uint64_t events;
>  	struct kfd_dev *dev;
>  	spinlock_t lock;
> +	pid_t pid;
> +	bool suser;
>  };
>  
>  #define MAX_KFIFO_SIZE	1024
> @@ -150,16 +152,26 @@ static int kfd_smi_ev_release(struct inode *inode, struct file *filep)
>  	return 0;
>  }
>  
> -static void add_event_to_kfifo(struct kfd_dev *dev, unsigned int smi_event,
> -			      char *event_msg, int len)
> +static bool kfd_smi_ev_enabled(pid_t pid, struct kfd_smi_client *client,
> +			       unsigned int smi_event)
> +{
> +	if (pid &&
> +	    !(client->suser && client->events & KFD_SMI_EVENT_MASK_FROM_INDEX(KFD_SMI_EVENT_ALL_PROCESSES)) &&
> +	    client->pid != pid)

We cannot change the default behaviour for existing SMI events.
Otherwise be break existing user mode.

I'd make the new flag KFD_SMI_EVENT_SAME_PROCESS. Its default is 0,
which preserves the existing behaviour.

The SAME_PROCESS flag can apply to all events. But for the new profiler
events, we need to check both the flag and the ADMIN capability to allow
events from all processes, because they contain more sensitive
information than the existing SMI events.


> +		return false;
> +
> +	return client->events & KFD_SMI_EVENT_MASK_FROM_INDEX(smi_event);

You still need to access client->events with READ_ONCE here. The reason
is, that this function is not holding the lock. So we need to read the
variable atomically.

Regards,
  Felix


> +}
> +
> +static void add_event_to_kfifo(pid_t pid, struct kfd_dev *dev,
> +			       unsigned int smi_event, char *event_msg, int len)
>  {
>  	struct kfd_smi_client *client;
>  
>  	rcu_read_lock();
>  
>  	list_for_each_entry_rcu(client, &dev->smi_clients, list) {
> -		if (!(READ_ONCE(client->events) &
> -				KFD_SMI_EVENT_MASK_FROM_INDEX(smi_event)))
> +		if (!kfd_smi_ev_enabled(pid, client, smi_event))
>  			continue;
>  		spin_lock(&client->lock);
>  		if (kfifo_avail(&client->fifo) >= len) {
> @@ -202,7 +214,7 @@ void kfd_smi_event_update_gpu_reset(struct kfd_dev *dev, bool post_reset)
>  	len = snprintf(fifo_in, sizeof(fifo_in), "%x %x\n", event,
>  						dev->reset_seq_num);
>  
> -	add_event_to_kfifo(dev, event, fifo_in, len);
> +	add_event_to_kfifo(0, dev, event, fifo_in, len);
>  }
>  
>  void kfd_smi_event_update_thermal_throttling(struct kfd_dev *dev,
> @@ -225,7 +237,7 @@ void kfd_smi_event_update_thermal_throttling(struct kfd_dev *dev,
>  		       KFD_SMI_EVENT_THERMAL_THROTTLE, throttle_bitmask,
>  		       amdgpu_dpm_get_thermal_throttling_counter(dev->adev));
>  
> -	add_event_to_kfifo(dev, KFD_SMI_EVENT_THERMAL_THROTTLE,	fifo_in, len);
> +	add_event_to_kfifo(0, dev, KFD_SMI_EVENT_THERMAL_THROTTLE, fifo_in, len);
>  }
>  
>  void kfd_smi_event_update_vmfault(struct kfd_dev *dev, uint16_t pasid)
> @@ -250,7 +262,7 @@ void kfd_smi_event_update_vmfault(struct kfd_dev *dev, uint16_t pasid)
>  	len = snprintf(fifo_in, sizeof(fifo_in), "%x %x:%s\n", KFD_SMI_EVENT_VMFAULT,
>  		task_info.pid, task_info.task_name);
>  
> -	add_event_to_kfifo(dev, KFD_SMI_EVENT_VMFAULT, fifo_in, len);
> +	add_event_to_kfifo(task_info.pid, dev, KFD_SMI_EVENT_VMFAULT, fifo_in, len);
>  }
>  
>  int kfd_smi_event_open(struct kfd_dev *dev, uint32_t *fd)
> @@ -282,6 +294,8 @@ int kfd_smi_event_open(struct kfd_dev *dev, uint32_t *fd)
>  	spin_lock_init(&client->lock);
>  	client->events = 0;
>  	client->dev = dev;
> +	client->pid = current->pid;
> +	client->suser = capable(CAP_SYS_ADMIN);
>  
>  	spin_lock(&dev->smi_lock);
>  	list_add_rcu(&client->list, &dev->smi_clients);
> diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h
> index af96af174dc4..bbbae8ad9721 100644
> --- a/include/uapi/linux/kfd_ioctl.h
> +++ b/include/uapi/linux/kfd_ioctl.h
> @@ -32,6 +32,7 @@
>   * - 1.4 - Indicate new SRAM EDC bit in device properties
>   * - 1.5 - Add SVM API
>   * - 1.6 - Query clear flags in SVM get_attr API
> + * - 1.7 - Add SMI profiler event log
>   */
>  #define KFD_IOCTL_MAJOR_VERSION 1
>  #define KFD_IOCTL_MINOR_VERSION 6
> @@ -459,10 +460,36 @@ enum kfd_smi_event {
>  	KFD_SMI_EVENT_THERMAL_THROTTLE = 2,
>  	KFD_SMI_EVENT_GPU_PRE_RESET = 3,
>  	KFD_SMI_EVENT_GPU_POST_RESET = 4,
> +	KFD_SMI_EVENT_MIGRATION = 5,
> +	KFD_SMI_EVENT_PAGE_FAULT = 6,
> +	KFD_SMI_EVENT_QUEUE_EVICTION = 7,
> +	KFD_SMI_EVENT_QUEUE_EVICTION_RESTORE = 8,
> +
> +	/*
> +	 * max event number, as a flag bit to get events from all processes,
> +	 * this requires super user permission, otherwise will not be able to
> +	 * receive events from any process. Without this flag to receive events
> +	 * from same process.
> +	 */
> +	KFD_SMI_EVENT_ALL_PROCESSES = 64
>  };
>  
>  #define KFD_SMI_EVENT_MASK_FROM_INDEX(i) (1ULL << ((i) - 1))
>  
> +enum KFD_MIGRATION_TRIGGER {
> +	MIGRATION_TRIGGER_PREFETCH = 1,
> +	MIGRATION_TRIGGER_PAGEFAULT,
> +	MIGRATION_TRIGGER_PAGEFAULT_CPU,
> +	MIGRATION_TRIGGER_TTM_EVICTION
> +};
> +
> +enum KFD_USER_QUEUE_EVICTION_TRIGGER {
> +	SVM_RANGE_EVICTION = 1,
> +	USERPTR_EVICTION,
> +	TTM_EVICTION,
> +	SUSPEND_EVICTION
> +};
> +
>  struct kfd_ioctl_smi_events_args {
>  	__u32 gpuid;	/* to KFD */
>  	__u32 anon_fd;	/* from KFD */



[Index of Archives]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux