Mina Almasry wrote: > Use netmem_ref instead of page in skb_frag_t. Currently netmem_ref > is always a struct page underneath, but the abstraction allows efforts > to add support for skb frags not backed by pages. > > There is unfortunately 1 instance where the skb_frag_t is assumed to be > a bio_vec in kcm. For this case, add a debug assert that the skb frag is > indeed backed by a page, and do a cast. > > Add skb[_frag]_fill_netmem_*() and skb_add_rx_frag_netmem() helpers so > that the API can be used to create netmem skbs. > > Signed-off-by: Mina Almasry <almasrymina@xxxxxxxxxx> > > --- > > v3; > - Renamed the fields in skb_frag_t. > > v2: > - Add skb frag filling helpers. > > --- > include/linux/skbuff.h | 92 +++++++++++++++++++++++++++++------------- > net/core/skbuff.c | 22 +++++++--- > net/kcm/kcmsock.c | 10 ++++- > 3 files changed, 89 insertions(+), 35 deletions(-) > > diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h > index 7ce38874dbd1..729c95e97be1 100644 > --- a/include/linux/skbuff.h > +++ b/include/linux/skbuff.h > @@ -37,6 +37,7 @@ > #endif > #include <net/net_debug.h> > #include <net/dropreason-core.h> > +#include <net/netmem.h> > > /** > * DOC: skb checksums > @@ -359,7 +360,11 @@ extern int sysctl_max_skb_frags; > */ > #define GSO_BY_FRAGS 0xFFFF > > -typedef struct bio_vec skb_frag_t; > +typedef struct skb_frag { > + netmem_ref netmem; > + unsigned int len; > + unsigned int offset; > +} skb_frag_t; > > /** > * skb_frag_size() - Returns the size of a skb fragment > @@ -367,7 +372,7 @@ typedef struct bio_vec skb_frag_t; > */ > static inline unsigned int skb_frag_size(const skb_frag_t *frag) > { > - return frag->bv_len; > + return frag->len; > } > > /** > @@ -377,7 +382,7 @@ static inline unsigned int skb_frag_size(const skb_frag_t *frag) > */ > static inline void skb_frag_size_set(skb_frag_t *frag, unsigned int size) > { > - frag->bv_len = size; > + frag->len = size; > } > > /** > @@ -387,7 +392,7 @@ static inline void skb_frag_size_set(skb_frag_t *frag, unsigned int size) > */ > static inline void skb_frag_size_add(skb_frag_t *frag, int delta) > { > - frag->bv_len += delta; > + frag->len += delta; > } > > /** > @@ -397,7 +402,7 @@ static inline void skb_frag_size_add(skb_frag_t *frag, int delta) > */ > static inline void skb_frag_size_sub(skb_frag_t *frag, int delta) > { > - frag->bv_len -= delta; > + frag->len -= delta; > } > > /** > @@ -417,7 +422,7 @@ static inline bool skb_frag_must_loop(struct page *p) > * skb_frag_foreach_page - loop over pages in a fragment > * > * @f: skb frag to operate on > - * @f_off: offset from start of f->bv_page > + * @f_off: offset from start of f->netmem > * @f_len: length from f_off to loop over > * @p: (temp var) current page > * @p_off: (temp var) offset from start of current page, > @@ -2431,22 +2436,37 @@ static inline unsigned int skb_pagelen(const struct sk_buff *skb) > return skb_headlen(skb) + __skb_pagelen(skb); > } > > +static inline void skb_frag_fill_netmem_desc(skb_frag_t *frag, > + netmem_ref netmem, int off, > + int size) > +{ > + frag->netmem = netmem; > + frag->offset = off; > + skb_frag_size_set(frag, size); > +} > + > static inline void skb_frag_fill_page_desc(skb_frag_t *frag, > struct page *page, > int off, int size) > { > - frag->bv_page = page; > - frag->bv_offset = off; > - skb_frag_size_set(frag, size); > + skb_frag_fill_netmem_desc(frag, page_to_netmem(page), off, size); > +} > + > +static inline void __skb_fill_netmem_desc_noacc(struct skb_shared_info *shinfo, > + int i, netmem_ref netmem, > + int off, int size) > +{ > + skb_frag_t *frag = &shinfo->frags[i]; > + > + skb_frag_fill_netmem_desc(frag, netmem, off, size); > } > > static inline void __skb_fill_page_desc_noacc(struct skb_shared_info *shinfo, > int i, struct page *page, > int off, int size) > { > - skb_frag_t *frag = &shinfo->frags[i]; > - > - skb_frag_fill_page_desc(frag, page, off, size); > + __skb_fill_netmem_desc_noacc(shinfo, i, page_to_netmem(page), off, > + size); > } > > /** > @@ -2462,10 +2482,10 @@ static inline void skb_len_add(struct sk_buff *skb, int delta) > } > > /** > - * __skb_fill_page_desc - initialise a paged fragment in an skb > + * __skb_fill_netmem_desc - initialise a fragment in an skb > * @skb: buffer containing fragment to be initialised > - * @i: paged fragment index to initialise > - * @page: the page to use for this fragment > + * @i: fragment index to initialise > + * @netmem: the netmem to use for this fragment > * @off: the offset to the data with @page > * @size: the length of the data > * > @@ -2474,10 +2494,13 @@ static inline void skb_len_add(struct sk_buff *skb, int delta) > * > * Does not take any additional reference on the fragment. > */ > -static inline void __skb_fill_page_desc(struct sk_buff *skb, int i, > - struct page *page, int off, int size) > +static inline void __skb_fill_netmem_desc(struct sk_buff *skb, int i, > + netmem_ref netmem, int off, > + int size) > { > - __skb_fill_page_desc_noacc(skb_shinfo(skb), i, page, off, size); > + struct page *page = netmem_to_page(netmem); > + > + __skb_fill_netmem_desc_noacc(skb_shinfo(skb), i, netmem, off, size); > > /* Propagate page pfmemalloc to the skb if we can. The problem is > * that not all callers have unique ownership of the page but rely > @@ -2485,7 +2508,21 @@ static inline void __skb_fill_page_desc(struct sk_buff *skb, int i, > */ > page = compound_head(page); > if (page_is_pfmemalloc(page)) > - skb->pfmemalloc = true; > + skb->pfmemalloc = true; > +} > + > +static inline void __skb_fill_page_desc(struct sk_buff *skb, int i, > + struct page *page, int off, int size) > +{ > + __skb_fill_netmem_desc(skb, i, page_to_netmem(page), off, size); > +} > + > +static inline void skb_fill_netmem_desc(struct sk_buff *skb, int i, > + netmem_ref netmem, int off, > + int size) > +{ > + __skb_fill_netmem_desc(skb, i, netmem, off, size); > + skb_shinfo(skb)->nr_frags = i + 1; > } > > /** > @@ -2505,8 +2542,7 @@ static inline void __skb_fill_page_desc(struct sk_buff *skb, int i, > static inline void skb_fill_page_desc(struct sk_buff *skb, int i, > struct page *page, int off, int size) > { > - __skb_fill_page_desc(skb, i, page, off, size); > - skb_shinfo(skb)->nr_frags = i + 1; > + skb_fill_netmem_desc(skb, i, page_to_netmem(page), off, size); > } > > /** > @@ -2532,6 +2568,8 @@ static inline void skb_fill_page_desc_noacc(struct sk_buff *skb, int i, > > void skb_add_rx_frag(struct sk_buff *skb, int i, struct page *page, int off, > int size, unsigned int truesize); > +void skb_add_rx_frag_netmem(struct sk_buff *skb, int i, netmem_ref netmem, > + int off, int size, unsigned int truesize); > > void skb_coalesce_rx_frag(struct sk_buff *skb, int i, int size, > unsigned int truesize); > @@ -3380,7 +3418,7 @@ static inline void skb_propagate_pfmemalloc(const struct page *page, > */ > static inline unsigned int skb_frag_off(const skb_frag_t *frag) > { > - return frag->bv_offset; > + return frag->offset; > } > > /** > @@ -3390,7 +3428,7 @@ static inline unsigned int skb_frag_off(const skb_frag_t *frag) > */ > static inline void skb_frag_off_add(skb_frag_t *frag, int delta) > { > - frag->bv_offset += delta; > + frag->offset += delta; > } > > /** > @@ -3400,7 +3438,7 @@ static inline void skb_frag_off_add(skb_frag_t *frag, int delta) > */ > static inline void skb_frag_off_set(skb_frag_t *frag, unsigned int offset) > { > - frag->bv_offset = offset; > + frag->offset = offset; > } > > /** > @@ -3411,7 +3449,7 @@ static inline void skb_frag_off_set(skb_frag_t *frag, unsigned int offset) > static inline void skb_frag_off_copy(skb_frag_t *fragto, > const skb_frag_t *fragfrom) > { > - fragto->bv_offset = fragfrom->bv_offset; > + fragto->offset = fragfrom->offset; > } > > /** > @@ -3422,7 +3460,7 @@ static inline void skb_frag_off_copy(skb_frag_t *fragto, > */ > static inline struct page *skb_frag_page(const skb_frag_t *frag) > { > - return frag->bv_page; > + return netmem_to_page(frag->netmem); > } > > /** > @@ -3526,7 +3564,7 @@ static inline void *skb_frag_address_safe(const skb_frag_t *frag) > static inline void skb_frag_page_copy(skb_frag_t *fragto, > const skb_frag_t *fragfrom) > { > - fragto->bv_page = fragfrom->bv_page; > + fragto->netmem = fragfrom->netmem; > } > > bool skb_page_frag_refill(unsigned int sz, struct page_frag *pfrag, gfp_t prio); > diff --git a/net/core/skbuff.c b/net/core/skbuff.c > index 4d4b11b0a83d..8b55e927bbe9 100644 > --- a/net/core/skbuff.c > +++ b/net/core/skbuff.c > @@ -845,16 +845,24 @@ struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len, > } > EXPORT_SYMBOL(__napi_alloc_skb); > > -void skb_add_rx_frag(struct sk_buff *skb, int i, struct page *page, int off, > - int size, unsigned int truesize) > +void skb_add_rx_frag_netmem(struct sk_buff *skb, int i, netmem_ref netmem, > + int off, int size, unsigned int truesize) > { > DEBUG_NET_WARN_ON_ONCE(size > truesize); > > - skb_fill_page_desc(skb, i, page, off, size); > + skb_fill_netmem_desc(skb, i, netmem, off, size); > skb->len += size; > skb->data_len += size; > skb->truesize += truesize; > } > +EXPORT_SYMBOL(skb_add_rx_frag_netmem); > + > +void skb_add_rx_frag(struct sk_buff *skb, int i, struct page *page, int off, > + int size, unsigned int truesize) > +{ > + skb_add_rx_frag_netmem(skb, i, page_to_netmem(page), off, size, > + truesize); > +} > EXPORT_SYMBOL(skb_add_rx_frag); > > void skb_coalesce_rx_frag(struct sk_buff *skb, int i, int size, > @@ -1904,10 +1912,11 @@ int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask) > > /* skb frags point to kernel buffers */ > for (i = 0; i < new_frags - 1; i++) { > - __skb_fill_page_desc(skb, i, head, 0, psize); > + __skb_fill_netmem_desc(skb, i, page_to_netmem(head), 0, psize); > head = (struct page *)page_private(head); > } > - __skb_fill_page_desc(skb, new_frags - 1, head, 0, d_off); > + __skb_fill_netmem_desc(skb, new_frags - 1, page_to_netmem(head), 0, > + d_off); > skb_shinfo(skb)->nr_frags = new_frags; > > release: > @@ -3645,7 +3654,8 @@ skb_zerocopy(struct sk_buff *to, struct sk_buff *from, int len, int hlen) > if (plen) { > page = virt_to_head_page(from->head); > offset = from->data - (unsigned char *)page_address(page); > - __skb_fill_page_desc(to, 0, page, offset, plen); > + __skb_fill_netmem_desc(to, 0, page_to_netmem(page), > + offset, plen); > get_page(page); > j = 1; > len -= plen; > diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c > index 65d1f6755f98..3180a54b2c68 100644 > --- a/net/kcm/kcmsock.c > +++ b/net/kcm/kcmsock.c > @@ -636,9 +636,15 @@ static int kcm_write_msgs(struct kcm_sock *kcm) > for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) > msize += skb_shinfo(skb)->frags[i].bv_len; > > + /* The cast to struct bio_vec* here assumes the frags are > + * struct page based. WARN if there is no page in this skb. > + */ > + DEBUG_NET_WARN_ON_ONCE( > + !skb_frag_page(&skb_shinfo(skb)->frags[0])); > + It would be unsafe to continue the operation in this case. Even though we should never get here, test and exit in all codepaths, similar to other test above? if (WARN_ON(!skb_shinfo(skb)->nr_frags)) { ret = -EINVAL; goto out; } > iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, > - skb_shinfo(skb)->frags, skb_shinfo(skb)->nr_frags, > - msize); > + (const struct bio_vec *)skb_shinfo(skb)->frags, > + skb_shinfo(skb)->nr_frags, msize); > iov_iter_advance(&msg.msg_iter, txm->frag_offset); > > do { > -- > 2.43.0.472.g3155946c3a-goog >