Re: [PATCH bpf-next v6 3/7] bpf: Generalize bpf_sk_storage

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 




On 25.07.20 03:13, Martin KaFai Lau wrote:
> On Thu, Jul 23, 2020 at 01:50:28PM +0200, KP Singh wrote:
>> From: KP Singh <kpsingh@xxxxxxxxxx>
>>
>> Refactor the functionality in bpf_sk_storage.c so that concept of
>> storage linked to kernel objects can be extended to other objects like
>> inode, task_struct etc.
>>
>> Each new local storage will still be a separate map and provide its own
>> set of helpers. This allows for future object specific extensions and
>> still share a lot of the underlying implementation.
>>
> 
> [ ... ]
> 
>> @@ -386,54 +407,28 @@ static int sk_storage_alloc(struct sock *sk,
>>   * Otherwise, it will become a leak (and other memory issues
>>   * during map destruction).
>>   */
>> -static struct bpf_local_storage_data *
>> -bpf_local_storage_update(struct sock *sk, struct bpf_map *map, void *value,
>> +struct bpf_local_storage_data *
>> +bpf_local_storage_update(void *owner, struct bpf_map *map,
>> +			 struct bpf_local_storage *local_storage, void *value,
>>  			 u64 map_flags)
>>  {
>>  	struct bpf_local_storage_data *old_sdata = NULL;
>>  	struct bpf_local_storage_elem *selem;
>> -	struct bpf_local_storage *local_storage;
>>  	struct bpf_local_storage_map *smap;
>>  	int err;
>>  
>> -	/* BPF_EXIST and BPF_NOEXIST cannot be both set */
>> -	if (unlikely((map_flags & ~BPF_F_LOCK) > BPF_EXIST) ||
>> -	    /* BPF_F_LOCK can only be used in a value with spin_lock */
>> -	    unlikely((map_flags & BPF_F_LOCK) && !map_value_has_spin_lock(map)))
>> -		return ERR_PTR(-EINVAL);
>> -
>>  	smap = (struct bpf_local_storage_map *)map;
>> -	local_storage = rcu_dereference(sk->sk_bpf_storage);
>> -	if (!local_storage || hlist_empty(&local_storage->list)) {
>> -		/* Very first elem for this object */
>> -		err = check_flags(NULL, map_flags);
> This check_flags here is missing in the later sk_storage_update().
> 
>> -		if (err)
>> -			return ERR_PTR(err);
>> -
>> -		selem = bpf_selem_alloc(smap, sk, value, true);
>> -		if (!selem)
>> -			return ERR_PTR(-ENOMEM);
>> -
>> -		err = sk_storage_alloc(sk, smap, selem);
>> -		if (err) {
>> -			kfree(selem);
>> -			atomic_sub(smap->elem_size, &sk->sk_omem_alloc);
>> -			return ERR_PTR(err);
>> -		}
>> -
>> -		return SDATA(selem);
>> -	}
>>  
>>  	if ((map_flags & BPF_F_LOCK) && !(map_flags & BPF_NOEXIST)) {
>>  		/* Hoping to find an old_sdata to do inline update
>>  		 * such that it can avoid taking the local_storage->lock
>>  		 * and changing the lists.
>>  		 */
>> -		old_sdata =
>> -			bpf_local_storage_lookup(local_storage, smap, false);
>> +		old_sdata = bpf_local_storage_lookup(local_storage, smap, false);
>>  		err = check_flags(old_sdata, map_flags);
>>  		if (err)
>>  			return ERR_PTR(err);
>> +
>>  		if (old_sdata && selem_linked_to_storage(SELEM(old_sdata))) {
>>  			copy_map_value_locked(map, old_sdata->data,
>>  					      value, false);
> 
> [ ... ]
> 
>> +static struct bpf_local_storage_data *
>> +sk_storage_update(void *owner, struct bpf_map *map, void *value, u64 map_flags)
>> +{
>> +	struct bpf_local_storage_data *old_sdata = NULL;
>> +	struct bpf_local_storage_elem *selem;
>> +	struct bpf_local_storage *local_storage;
>> +	struct bpf_local_storage_map *smap;
>> +	struct sock *sk;
>> +	int err;
>> +
>> +	err = bpf_local_storage_check_update_flags(map, map_flags);
>> +	if (err)
>> +		return ERR_PTR(err);
>> +
>> +	sk = owner;
>> +	local_storage = rcu_dereference(sk->sk_bpf_storage);
>> +	smap = (struct bpf_local_storage_map *)map;
>> +
>> +	if (!local_storage || hlist_empty(&local_storage->list)) {
> 
> "check_flags(NULL, map_flags);" is gone in this refactoring.
> 
> This part of code is copied into the inode_storage_update()
> in the latter patch which then has the same issue.
> 
>> +		/* Very first elem */
>> +		selem = map->ops->map_selem_alloc(smap, owner, value, !old_sdata);
>> +		if (!selem)
>> +			return ERR_PTR(-ENOMEM);
> 
>>  static int sk_storage_map_btf_id;
>>  const struct bpf_map_ops sk_storage_map_ops = {
>> -	.map_alloc_check = bpf_sk_storage_map_alloc_check,
>> -	.map_alloc = bpf_local_storage_map_alloc,
>> -	.map_free = bpf_local_storage_map_free,
>> +	.map_alloc_check = bpf_local_storage_map_alloc_check,
>> +	.map_alloc = sk_storage_map_alloc,
>> +	.map_free = sk_storage_map_free,
>>  	.map_get_next_key = notsupp_get_next_key,
>>  	.map_lookup_elem = bpf_fd_sk_storage_lookup_elem,
>>  	.map_update_elem = bpf_fd_sk_storage_update_elem,
>>  	.map_delete_elem = bpf_fd_sk_storage_delete_elem,
>> -	.map_check_btf = bpf_sk_storage_map_check_btf,
>> +	.map_check_btf = bpf_local_storage_map_check_btf,
>>  	.map_btf_name = "bpf_local_storage_map",
>>  	.map_btf_id = &sk_storage_map_btf_id,
>> +	.map_selem_alloc = sk_selem_alloc,
>> +	.map_local_storage_update = sk_storage_update,
>> +	.map_local_storage_unlink = unlink_sk_storage,
> I think refactoring codes as map_selem_alloc, map_local_storage_update,
> and map_local_storage_unlink is not the best option.  The sk and inode
> version of the above map_ops are mostly the same.  Fixing the
> issue like the one mentioned above need to fix both sk, inode, and
> the future kernel-object code.
> 
> The only difference is sk charge omem and inode does not.
> I have played around a little.  I think adding the following three ops (pasted at
> the end) is better and should be enough for both sk and inode.  The inode
> does not even have to implement the (un)charge ops at all.
> 
> That should remove the duplication for the followings:
> - (sk|inode)_selem_alloc
> - (sk|inode)_storage_update
> - unlink_(sk|inode)_storage
> - (sk|inode)_storage_alloc
> 
> Another bonus is the new bpf_local_storage_check_update_flags() and
> bpf_local_storage_publish() will be no longer needed too.

I really like this approach. Thank you so much!

> 
> I have hacked up this patch 3 change to compiler-test out this idea.
> I will post in another email.  Let me know wdy> 
> --- a/include/linux/bpf.h
> +++ b/include/linux/bpf.h
> @@ -33,6 +33,9 @@ struct btf;
>  struct btf_type;
>  struct exception_table_entry;
>  struct seq_operations;
> +struct bpf_local_storage;
> +struct bpf_local_storage_map;
> +struct bpf_local_storage_elem;
>  
>  extern struct idr btf_idr;
>  extern spinlock_t btf_idr_lock;
> @@ -93,6 +96,13 @@ struct bpf_map_ops {
>  	__poll_t (*map_poll)(struct bpf_map *map, struct file *filp,
>  			     struct poll_table_struct *pts);
>  
> +	/* Functions called by bpf_local_storage maps */
> +	int (*map_local_storage_charge)(struct bpf_local_storage_map *smap,
> +					void *owner, u32 size);
> +	void (*map_local_storage_uncharge)(struct bpf_local_storage_map *smap,
> +					   void *owner, u32 size);
> +	struct bpf_local_storage __rcu ** (*map_owner_storage_ptr)(struct bpf_local_storage_map *smap,
> +								   void *owner);
>  	/* BTF name and id of struct allocated by map_alloc */
>  	const char * const map_btf_name;
>  	int *map_btf_id;
> 



[Index of Archives]     [Linux Samsung SoC]     [Linux Rockchip SoC]     [Linux Actions SoC]     [Linux for Synopsys ARC Processors]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]


  Powered by Linux