(Cc. BPF list and other XDP maintainers)
On 14/03/2022 11.22, Felix Fietkau wrote:
Most ethernet drivers allocate a packet headroom of NET_SKB_PAD. Since it is
rounded up to L1 cache size, it ends up being at least 64 bytes on the most
common platforms.
On most ethernet drivers, having a guaranteed headroom of 256 bytes for XDP
adds an extra forced pskb_expand_head call when enabling SKB XDP, which can
be quite expensive.
Many XDP programs need only very little headroom, so it can be beneficial
to have a way to opt-out of the 256 bytes headroom requirement.
IMHO 64 bytes is too small.
We are using this area for struct xdp_frame and also for metadata
(XDP-hints). This will limit us from growing this structures for
the sake of generic-XDP.
I'm fine with reducting this to 192 bytes, as most Intel drivers
have this headroom, and have defacto established that this is
a valid XDP headroom, even for native-XDP.
We could go a small as two cachelines 128 bytes, as if xdp_frame
and metadata grows above a cache-line (64 bytes) each, then we have
done something wrong (performance wise).
Add an extra flag XDP_FLAGS_SMALL_HEADROOM that can be set when attaching
the XDP program, which reduces the minimum headroom to 64 bytes.
I don't like a flags approach.
Multiple disadvantages.
(a) Harder to use
(b) Now reading a new cache-line in net_device
Signed-off-by: Felix Fietkau <nbd@xxxxxxxx>
---
include/linux/netdevice.h | 1 +
include/uapi/linux/bpf.h | 1 +
include/uapi/linux/if_link.h | 4 +++-
net/core/dev.c | 9 ++++++++-
tools/include/uapi/linux/if_link.h | 4 +++-
5 files changed, 16 insertions(+), 3 deletions(-)
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 0d994710b335..f6f270a5e301 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2274,6 +2274,7 @@ struct net_device {
bool proto_down;
unsigned wol_enabled:1;
unsigned threaded:1;
+ unsigned xdp_small_headroom:1;
Looks like we need to read this cache-line, in a XDP (generic) fastpath.
struct list_head net_notifier_list;
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 4eebea830613..7635dfb02313 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -5688,6 +5688,7 @@ struct bpf_xdp_sock {
};
#define XDP_PACKET_HEADROOM 256
+#define XDP_PACKET_HEADROOM_SMALL 64
Define it 192 instead.
/* User return codes for XDP prog type.
* A valid XDP program must return one of these defined values. All other
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index e003a0b9b4b2..acb996334910 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -1275,11 +1275,13 @@ enum {
#define XDP_FLAGS_DRV_MODE (1U << 2)
#define XDP_FLAGS_HW_MODE (1U << 3)
#define XDP_FLAGS_REPLACE (1U << 4)
+#define XDP_FLAGS_SMALL_HEADROOM (1U << 5)
#define XDP_FLAGS_MODES (XDP_FLAGS_SKB_MODE | \
XDP_FLAGS_DRV_MODE | \
XDP_FLAGS_HW_MODE)
#define XDP_FLAGS_MASK (XDP_FLAGS_UPDATE_IF_NOEXIST | \
- XDP_FLAGS_MODES | XDP_FLAGS_REPLACE)
+ XDP_FLAGS_MODES | XDP_FLAGS_REPLACE | \
+ XDP_FLAGS_SMALL_HEADROOM)
/* These are stored into IFLA_XDP_ATTACHED on dump. */
enum {
diff --git a/net/core/dev.c b/net/core/dev.c
index 8d25ec5b3af7..cb12379b8f11 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4722,6 +4722,7 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb,
struct xdp_buff *xdp,
struct bpf_prog *xdp_prog)
{
+ int min_headroom = XDP_PACKET_HEADROOM;
u32 act = XDP_DROP;
/* Reinjected packets coming from act_mirred or similar should
@@ -4730,12 +4731,15 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb,
if (skb_is_redirected(skb))
return XDP_PASS;
+ if (skb->dev->xdp_small_headroom)
+ min_headroom = XDP_PACKET_HEADROOM_SMALL;
+
/* XDP packets must be linear and must have sufficient headroom
* of XDP_PACKET_HEADROOM bytes. This is the guarantee that also
* native XDP provides, thus we need to do it here as well.
*/
if (skb_cloned(skb) || skb_is_nonlinear(skb) ||
- skb_headroom(skb) < XDP_PACKET_HEADROOM) {
+ skb_headroom(skb) < min_headroom) {
Use define XDP_PACKET_HEADROOM_SMALL here directly.
int hroom = XDP_PACKET_HEADROOM - skb_headroom(skb);
int troom = skb->tail + skb->data_len - skb->end;
@@ -9135,6 +9139,9 @@ static int dev_xdp_attach(struct net_device *dev, struct netlink_ext_ack *extack
return err;
}
+ if (mode == XDP_MODE_SKB)
+ dev->xdp_small_headroom = !!(flags & XDP_FLAGS_SMALL_HEADROOM);
+
if (link)
dev_xdp_set_link(dev, mode, link);
else
diff --git a/tools/include/uapi/linux/if_link.h b/tools/include/uapi/linux/if_link.h
index e1ba2d51b717..0df737a6c489 100644
--- a/tools/include/uapi/linux/if_link.h
+++ b/tools/include/uapi/linux/if_link.h
@@ -1185,11 +1185,13 @@ enum {
#define XDP_FLAGS_DRV_MODE (1U << 2)
#define XDP_FLAGS_HW_MODE (1U << 3)
#define XDP_FLAGS_REPLACE (1U << 4)
+#define XDP_FLAGS_SMALL_HEADROOM (1U << 5)
#define XDP_FLAGS_MODES (XDP_FLAGS_SKB_MODE | \
XDP_FLAGS_DRV_MODE | \
XDP_FLAGS_HW_MODE)
#define XDP_FLAGS_MASK (XDP_FLAGS_UPDATE_IF_NOEXIST | \
- XDP_FLAGS_MODES | XDP_FLAGS_REPLACE)
+ XDP_FLAGS_MODES | XDP_FLAGS_REPLACE | \
+ XDP_FLAGS_SMALL_HEADROOM)
/* These are stored into IFLA_XDP_ATTACHED on dump. */
enum {
--Jesper