Yan Zhai wrote: > Software GRO is currently controlled by a single switch, i.e. > > ethtool -K dev gro on|off > > However, this is not always desired. When GRO is enabled, even if the > kernel cannot GRO certain traffic, it has to run through the GRO receive > handlers with no benefit. > > There are also scenarios that turning off GRO is a requirement. For > example, our production environment has a scenario that a TC egress hook > may add multiple encapsulation headers to forwarded skbs for load > balancing and isolation purpose. The encapsulation is implemented via > BPF. But the problem arises then: there is no way to properly offload a > double-encapsulated packet, since skb only has network_header and > inner_network_header to track one layer of encapsulation, but not two. > On the other hand, not all the traffic through this device needs double > encapsulation. But we have to turn off GRO completely for any ingress > device as a result. > > Introduce a bit on skb so that GRO engine can be notified to skip GRO on > this skb, rather than having to be 0-or-1 for all traffic. > > Signed-off-by: Yan Zhai <yan@xxxxxxxxxxxxxx> > --- > include/linux/netdevice.h | 9 +++++++-- > include/linux/skbuff.h | 10 ++++++++++ > net/Kconfig | 10 ++++++++++ > net/core/gro.c | 2 +- > net/core/gro_cells.c | 2 +- > net/core/skbuff.c | 4 ++++ > 6 files changed, 33 insertions(+), 4 deletions(-) > > diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h > index c83b390191d4..2ca0870b1221 100644 > --- a/include/linux/netdevice.h > +++ b/include/linux/netdevice.h > @@ -2415,11 +2415,16 @@ struct net_device { > ((dev)->devlink_port = (port)); \ > }) > > -static inline bool netif_elide_gro(const struct net_device *dev) > +static inline bool netif_elide_gro(const struct sk_buff *skb) > { > - if (!(dev->features & NETIF_F_GRO) || dev->xdp_prog) > + if (!(skb->dev->features & NETIF_F_GRO) || skb->dev->xdp_prog) > return true; > + > +#ifdef CONFIG_SKB_GRO_CONTROL > + return skb->gro_disabled; > +#else > return false; > +#endif Yet more branches in the hot path. Compile time configurability does not help, as that will be enabled by distros. For a fairly niche use case. Where functionality of GRO already works. So just a performance for a very rare case at the cost of a regression in the common case. A small regression perhaps, but death by a thousand cuts. > } > > #define NETDEV_ALIGN 32 > diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h > index f4cda3fbdb75..48b10ece95b5 100644 > --- a/include/linux/skbuff.h > +++ b/include/linux/skbuff.h > @@ -1008,6 +1008,9 @@ struct sk_buff { > #if IS_ENABLED(CONFIG_IP_SCTP) > __u8 csum_not_inet:1; > #endif > +#ifdef CONFIG_SKB_GRO_CONTROL > + __u8 gro_disabled:1; > +#endif > > #if defined(CONFIG_NET_SCHED) || defined(CONFIG_NET_XGRESS) > __u16 tc_index; /* traffic control index */ > @@ -1215,6 +1218,13 @@ static inline bool skb_wifi_acked_valid(const struct sk_buff *skb) > #endif > } > > +static inline void skb_disable_gro(struct sk_buff *skb) > +{ > +#ifdef CONFIG_SKB_GRO_CONTROL > + skb->gro_disabled = 1; > +#endif > +} > + > /** > * skb_unref - decrement the skb's reference count > * @skb: buffer > diff --git a/net/Kconfig b/net/Kconfig > index 9fe65fa26e48..47d1ee92df15 100644 > --- a/net/Kconfig > +++ b/net/Kconfig > @@ -289,6 +289,16 @@ config MAX_SKB_FRAGS > and in drivers using build_skb(). > If unsure, say 17. > > +config SKB_GRO_CONTROL > + bool "allow disable GRO on per-packet basis" > + default y > + help > + By default GRO can only be enabled or disabled per network device. > + This can be cumbersome for certain scenarios. > + Toggling this option will allow disabling GRO for selected packets, > + e.g. by XDP programs, so that it is more flexibile. > + Extra overhead should be minimal. > + > config RPS > bool "Receive packet steering" > depends on SMP && SYSFS > diff --git a/net/core/gro.c b/net/core/gro.c > index b3b43de1a650..46232a0d1983 100644 > --- a/net/core/gro.c > +++ b/net/core/gro.c > @@ -476,7 +476,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff > enum gro_result ret; > int same_flow; > > - if (netif_elide_gro(skb->dev)) > + if (netif_elide_gro(skb)) > goto normal; > > gro_list_prepare(&gro_list->list, skb); > diff --git a/net/core/gro_cells.c b/net/core/gro_cells.c > index ff8e5b64bf6b..1bf15783300f 100644 > --- a/net/core/gro_cells.c > +++ b/net/core/gro_cells.c > @@ -20,7 +20,7 @@ int gro_cells_receive(struct gro_cells *gcells, struct sk_buff *skb) > if (unlikely(!(dev->flags & IFF_UP))) > goto drop; > > - if (!gcells->cells || skb_cloned(skb) || netif_elide_gro(dev)) { > + if (!gcells->cells || skb_cloned(skb) || netif_elide_gro(skb)) { > res = netif_rx(skb); > goto unlock; > } > diff --git a/net/core/skbuff.c b/net/core/skbuff.c > index 2315c088e91d..82bd297921c1 100644 > --- a/net/core/skbuff.c > +++ b/net/core/skbuff.c > @@ -6030,6 +6030,10 @@ void skb_scrub_packet(struct sk_buff *skb, bool xnet) > ipvs_reset(skb); > skb->mark = 0; > skb_clear_tstamp(skb); > +#ifdef CONFIG_SKB_GRO_CONTROL > + /* hand back GRO control to next netns */ > + skb->gro_disabled = 0; > +#endif > } > EXPORT_SYMBOL_GPL(skb_scrub_packet); > > -- > 2.30.2 > >