Re: [PATCH net-next v3 3/3] net: add netmem_ref to skb_frag_t

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Mina Almasry wrote:
> Use netmem_ref instead of page in skb_frag_t. Currently netmem_ref
> is always a struct page underneath, but the abstraction allows efforts
> to add support for skb frags not backed by pages.
> 
> There is unfortunately 1 instance where the skb_frag_t is assumed to be
> a bio_vec in kcm. For this case, add a debug assert that the skb frag is
> indeed backed by a page, and do a cast.
> 
> Add skb[_frag]_fill_netmem_*() and skb_add_rx_frag_netmem() helpers so
> that the API can be used to create netmem skbs.
> 
> Signed-off-by: Mina Almasry <almasrymina@xxxxxxxxxx>
> 
> ---
> 
> v3;
> - Renamed the fields in skb_frag_t.
> 
> v2:
> - Add skb frag filling helpers.
> 
> ---
>  include/linux/skbuff.h | 92 +++++++++++++++++++++++++++++-------------
>  net/core/skbuff.c      | 22 +++++++---
>  net/kcm/kcmsock.c      | 10 ++++-
>  3 files changed, 89 insertions(+), 35 deletions(-)
> 
> diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
> index 7ce38874dbd1..729c95e97be1 100644
> --- a/include/linux/skbuff.h
> +++ b/include/linux/skbuff.h
> @@ -37,6 +37,7 @@
>  #endif
>  #include <net/net_debug.h>
>  #include <net/dropreason-core.h>
> +#include <net/netmem.h>
>  
>  /**
>   * DOC: skb checksums
> @@ -359,7 +360,11 @@ extern int sysctl_max_skb_frags;
>   */
>  #define GSO_BY_FRAGS	0xFFFF
>  
> -typedef struct bio_vec skb_frag_t;
> +typedef struct skb_frag {
> +	netmem_ref netmem;
> +	unsigned int len;
> +	unsigned int offset;
> +} skb_frag_t;
>  
>  /**
>   * skb_frag_size() - Returns the size of a skb fragment
> @@ -367,7 +372,7 @@ typedef struct bio_vec skb_frag_t;
>   */
>  static inline unsigned int skb_frag_size(const skb_frag_t *frag)
>  {
> -	return frag->bv_len;
> +	return frag->len;
>  }
>  
>  /**
> @@ -377,7 +382,7 @@ static inline unsigned int skb_frag_size(const skb_frag_t *frag)
>   */
>  static inline void skb_frag_size_set(skb_frag_t *frag, unsigned int size)
>  {
> -	frag->bv_len = size;
> +	frag->len = size;
>  }
>  
>  /**
> @@ -387,7 +392,7 @@ static inline void skb_frag_size_set(skb_frag_t *frag, unsigned int size)
>   */
>  static inline void skb_frag_size_add(skb_frag_t *frag, int delta)
>  {
> -	frag->bv_len += delta;
> +	frag->len += delta;
>  }
>  
>  /**
> @@ -397,7 +402,7 @@ static inline void skb_frag_size_add(skb_frag_t *frag, int delta)
>   */
>  static inline void skb_frag_size_sub(skb_frag_t *frag, int delta)
>  {
> -	frag->bv_len -= delta;
> +	frag->len -= delta;
>  }
>  
>  /**
> @@ -417,7 +422,7 @@ static inline bool skb_frag_must_loop(struct page *p)
>   *	skb_frag_foreach_page - loop over pages in a fragment
>   *
>   *	@f:		skb frag to operate on
> - *	@f_off:		offset from start of f->bv_page
> + *	@f_off:		offset from start of f->netmem
>   *	@f_len:		length from f_off to loop over
>   *	@p:		(temp var) current page
>   *	@p_off:		(temp var) offset from start of current page,
> @@ -2431,22 +2436,37 @@ static inline unsigned int skb_pagelen(const struct sk_buff *skb)
>  	return skb_headlen(skb) + __skb_pagelen(skb);
>  }
>  
> +static inline void skb_frag_fill_netmem_desc(skb_frag_t *frag,
> +					     netmem_ref netmem, int off,
> +					     int size)
> +{
> +	frag->netmem = netmem;
> +	frag->offset = off;
> +	skb_frag_size_set(frag, size);
> +}
> +
>  static inline void skb_frag_fill_page_desc(skb_frag_t *frag,
>  					   struct page *page,
>  					   int off, int size)
>  {
> -	frag->bv_page = page;
> -	frag->bv_offset = off;
> -	skb_frag_size_set(frag, size);
> +	skb_frag_fill_netmem_desc(frag, page_to_netmem(page), off, size);
> +}
> +
> +static inline void __skb_fill_netmem_desc_noacc(struct skb_shared_info *shinfo,
> +						int i, netmem_ref netmem,
> +						int off, int size)
> +{
> +	skb_frag_t *frag = &shinfo->frags[i];
> +
> +	skb_frag_fill_netmem_desc(frag, netmem, off, size);
>  }
>  
>  static inline void __skb_fill_page_desc_noacc(struct skb_shared_info *shinfo,
>  					      int i, struct page *page,
>  					      int off, int size)
>  {
> -	skb_frag_t *frag = &shinfo->frags[i];
> -
> -	skb_frag_fill_page_desc(frag, page, off, size);
> +	__skb_fill_netmem_desc_noacc(shinfo, i, page_to_netmem(page), off,
> +				     size);
>  }
>  
>  /**
> @@ -2462,10 +2482,10 @@ static inline void skb_len_add(struct sk_buff *skb, int delta)
>  }
>  
>  /**
> - * __skb_fill_page_desc - initialise a paged fragment in an skb
> + * __skb_fill_netmem_desc - initialise a fragment in an skb
>   * @skb: buffer containing fragment to be initialised
> - * @i: paged fragment index to initialise
> - * @page: the page to use for this fragment
> + * @i: fragment index to initialise
> + * @netmem: the netmem to use for this fragment
>   * @off: the offset to the data with @page
>   * @size: the length of the data
>   *
> @@ -2474,10 +2494,13 @@ static inline void skb_len_add(struct sk_buff *skb, int delta)
>   *
>   * Does not take any additional reference on the fragment.
>   */
> -static inline void __skb_fill_page_desc(struct sk_buff *skb, int i,
> -					struct page *page, int off, int size)
> +static inline void __skb_fill_netmem_desc(struct sk_buff *skb, int i,
> +					  netmem_ref netmem, int off,
> +					  int size)
>  {
> -	__skb_fill_page_desc_noacc(skb_shinfo(skb), i, page, off, size);
> +	struct page *page = netmem_to_page(netmem);
> +
> +	__skb_fill_netmem_desc_noacc(skb_shinfo(skb), i, netmem, off, size);
>  
>  	/* Propagate page pfmemalloc to the skb if we can. The problem is
>  	 * that not all callers have unique ownership of the page but rely
> @@ -2485,7 +2508,21 @@ static inline void __skb_fill_page_desc(struct sk_buff *skb, int i,
>  	 */
>  	page = compound_head(page);
>  	if (page_is_pfmemalloc(page))
> -		skb->pfmemalloc	= true;
> +		skb->pfmemalloc = true;
> +}
> +
> +static inline void __skb_fill_page_desc(struct sk_buff *skb, int i,
> +					struct page *page, int off, int size)
> +{
> +	__skb_fill_netmem_desc(skb, i, page_to_netmem(page), off, size);
> +}
> +
> +static inline void skb_fill_netmem_desc(struct sk_buff *skb, int i,
> +					netmem_ref netmem, int off,
> +					int size)
> +{
> +	__skb_fill_netmem_desc(skb, i, netmem, off, size);
> +	skb_shinfo(skb)->nr_frags = i + 1;
>  }
>  
>  /**
> @@ -2505,8 +2542,7 @@ static inline void __skb_fill_page_desc(struct sk_buff *skb, int i,
>  static inline void skb_fill_page_desc(struct sk_buff *skb, int i,
>  				      struct page *page, int off, int size)
>  {
> -	__skb_fill_page_desc(skb, i, page, off, size);
> -	skb_shinfo(skb)->nr_frags = i + 1;
> +	skb_fill_netmem_desc(skb, i, page_to_netmem(page), off, size);
>  }
>  
>  /**
> @@ -2532,6 +2568,8 @@ static inline void skb_fill_page_desc_noacc(struct sk_buff *skb, int i,
>  
>  void skb_add_rx_frag(struct sk_buff *skb, int i, struct page *page, int off,
>  		     int size, unsigned int truesize);
> +void skb_add_rx_frag_netmem(struct sk_buff *skb, int i, netmem_ref netmem,
> +			    int off, int size, unsigned int truesize);
>  
>  void skb_coalesce_rx_frag(struct sk_buff *skb, int i, int size,
>  			  unsigned int truesize);
> @@ -3380,7 +3418,7 @@ static inline void skb_propagate_pfmemalloc(const struct page *page,
>   */
>  static inline unsigned int skb_frag_off(const skb_frag_t *frag)
>  {
> -	return frag->bv_offset;
> +	return frag->offset;
>  }
>  
>  /**
> @@ -3390,7 +3428,7 @@ static inline unsigned int skb_frag_off(const skb_frag_t *frag)
>   */
>  static inline void skb_frag_off_add(skb_frag_t *frag, int delta)
>  {
> -	frag->bv_offset += delta;
> +	frag->offset += delta;
>  }
>  
>  /**
> @@ -3400,7 +3438,7 @@ static inline void skb_frag_off_add(skb_frag_t *frag, int delta)
>   */
>  static inline void skb_frag_off_set(skb_frag_t *frag, unsigned int offset)
>  {
> -	frag->bv_offset = offset;
> +	frag->offset = offset;
>  }
>  
>  /**
> @@ -3411,7 +3449,7 @@ static inline void skb_frag_off_set(skb_frag_t *frag, unsigned int offset)
>  static inline void skb_frag_off_copy(skb_frag_t *fragto,
>  				     const skb_frag_t *fragfrom)
>  {
> -	fragto->bv_offset = fragfrom->bv_offset;
> +	fragto->offset = fragfrom->offset;
>  }
>  
>  /**
> @@ -3422,7 +3460,7 @@ static inline void skb_frag_off_copy(skb_frag_t *fragto,
>   */
>  static inline struct page *skb_frag_page(const skb_frag_t *frag)
>  {
> -	return frag->bv_page;
> +	return netmem_to_page(frag->netmem);
>  }
>  
>  /**
> @@ -3526,7 +3564,7 @@ static inline void *skb_frag_address_safe(const skb_frag_t *frag)
>  static inline void skb_frag_page_copy(skb_frag_t *fragto,
>  				      const skb_frag_t *fragfrom)
>  {
> -	fragto->bv_page = fragfrom->bv_page;
> +	fragto->netmem = fragfrom->netmem;
>  }
>  
>  bool skb_page_frag_refill(unsigned int sz, struct page_frag *pfrag, gfp_t prio);
> diff --git a/net/core/skbuff.c b/net/core/skbuff.c
> index 4d4b11b0a83d..8b55e927bbe9 100644
> --- a/net/core/skbuff.c
> +++ b/net/core/skbuff.c
> @@ -845,16 +845,24 @@ struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len,
>  }
>  EXPORT_SYMBOL(__napi_alloc_skb);
>  
> -void skb_add_rx_frag(struct sk_buff *skb, int i, struct page *page, int off,
> -		     int size, unsigned int truesize)
> +void skb_add_rx_frag_netmem(struct sk_buff *skb, int i, netmem_ref netmem,
> +			    int off, int size, unsigned int truesize)
>  {
>  	DEBUG_NET_WARN_ON_ONCE(size > truesize);
>  
> -	skb_fill_page_desc(skb, i, page, off, size);
> +	skb_fill_netmem_desc(skb, i, netmem, off, size);
>  	skb->len += size;
>  	skb->data_len += size;
>  	skb->truesize += truesize;
>  }
> +EXPORT_SYMBOL(skb_add_rx_frag_netmem);
> +
> +void skb_add_rx_frag(struct sk_buff *skb, int i, struct page *page, int off,
> +		     int size, unsigned int truesize)
> +{
> +	skb_add_rx_frag_netmem(skb, i, page_to_netmem(page), off, size,
> +			       truesize);
> +}
>  EXPORT_SYMBOL(skb_add_rx_frag);
>  
>  void skb_coalesce_rx_frag(struct sk_buff *skb, int i, int size,
> @@ -1904,10 +1912,11 @@ int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask)
>  
>  	/* skb frags point to kernel buffers */
>  	for (i = 0; i < new_frags - 1; i++) {
> -		__skb_fill_page_desc(skb, i, head, 0, psize);
> +		__skb_fill_netmem_desc(skb, i, page_to_netmem(head), 0, psize);
>  		head = (struct page *)page_private(head);
>  	}
> -	__skb_fill_page_desc(skb, new_frags - 1, head, 0, d_off);
> +	__skb_fill_netmem_desc(skb, new_frags - 1, page_to_netmem(head), 0,
> +			       d_off);
>  	skb_shinfo(skb)->nr_frags = new_frags;
>  
>  release:
> @@ -3645,7 +3654,8 @@ skb_zerocopy(struct sk_buff *to, struct sk_buff *from, int len, int hlen)
>  		if (plen) {
>  			page = virt_to_head_page(from->head);
>  			offset = from->data - (unsigned char *)page_address(page);
> -			__skb_fill_page_desc(to, 0, page, offset, plen);
> +			__skb_fill_netmem_desc(to, 0, page_to_netmem(page),
> +					       offset, plen);
>  			get_page(page);
>  			j = 1;
>  			len -= plen;
> diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c
> index 65d1f6755f98..3180a54b2c68 100644
> --- a/net/kcm/kcmsock.c
> +++ b/net/kcm/kcmsock.c
> @@ -636,9 +636,15 @@ static int kcm_write_msgs(struct kcm_sock *kcm)
>  		for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
>  			msize += skb_shinfo(skb)->frags[i].bv_len;
>  
> +		/* The cast to struct bio_vec* here assumes the frags are
> +		 * struct page based. WARN if there is no page in this skb.
> +		 */
> +		DEBUG_NET_WARN_ON_ONCE(
> +			!skb_frag_page(&skb_shinfo(skb)->frags[0]));
> +

It would be unsafe to continue the operation in this case. Even though
we should never get here, test and exit in all codepaths, similar to
other test above?

                if (WARN_ON(!skb_shinfo(skb)->nr_frags)) {
                        ret = -EINVAL;
                        goto out;
                }

>  		iov_iter_bvec(&msg.msg_iter, ITER_SOURCE,
> -			      skb_shinfo(skb)->frags, skb_shinfo(skb)->nr_frags,
> -			      msize);
> +			      (const struct bio_vec *)skb_shinfo(skb)->frags,
> +			      skb_shinfo(skb)->nr_frags, msize);
>  		iov_iter_advance(&msg.msg_iter, txm->frag_offset);
>  
>  		do {
> -- 
> 2.43.0.472.g3155946c3a-goog
> 






[Index of Archives]     [KVM ARM]     [KVM ia64]     [KVM ppc]     [Virtualization Tools]     [Spice Development]     [Libvirt]     [Libvirt Users]     [Linux USB Devel]     [Linux Audio Users]     [Yosemite Questions]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux