WORK IN PROGRESS: * bpf program loading works! * txq steering via bpf program return code works! * bpf program unloading not working. * bpf program attached query not working. --- include/linux/netdevice.h | 3 +++ include/uapi/linux/if_link.h | 12 +++++++++ net/core/dev.c | 61 ++++++++++++++++++++++++++++++++++++------- net/core/rtnetlink.c | 62 ++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 129 insertions(+), 9 deletions(-) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 9eda1c3..88e37d5 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1966,6 +1966,7 @@ struct net_device { #ifdef CONFIG_XPS struct xps_dev_maps __rcu *xps_cpus_map; struct xps_dev_maps __rcu *xps_rxqs_map; + struct bpf_prog __rcu *xps_prog; #endif #ifdef CONFIG_NET_CLS_ACT struct mini_Qdisc __rcu *miniq_egress; @@ -2147,6 +2148,8 @@ struct netdev_queue *netdev_core_pick_tx(struct net_device *dev, struct sk_buff *skb, struct net_device *sb_dev); +int dev_change_xps_fd(struct net_device *dev, int fd); + /* returns the headroom that the master device needs to take in account * when forwarding to this dev */ diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 4a8c02c..a23d241 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -167,6 +167,7 @@ enum { IFLA_NEW_IFINDEX, IFLA_MIN_MTU, IFLA_MAX_MTU, + IFLA_XPS, __IFLA_MAX }; @@ -979,6 +980,17 @@ enum { #define IFLA_XDP_MAX (__IFLA_XDP_MAX - 1) +/* XPS section */ + +enum { + IFLA_XPS_UNSPEC, + IFLA_XPS_FD, + IFLA_XPS_ATTACHED, + __IFLA_XPS_MAX, +}; + +#define IFLA_XPS_MAX (__IFLA_XPS_MAX - 1) + enum { IFLA_EVENT_NONE, IFLA_EVENT_REBOOT, /* internal reset / reboot */ diff --git a/net/core/dev.c b/net/core/dev.c index 71b18e8..a46d42b 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3663,26 +3663,34 @@ static int get_xps_queue(struct net_device *dev, struct net_device *sb_dev, { #ifdef CONFIG_XPS struct xps_dev_maps *dev_maps; + struct bpf_prog *prog; struct sock *sk = skb->sk; + int bpf_ret = -1; int queue_index = -1; if (!static_key_false(&xps_needed)) return -1; rcu_read_lock(); - if (!static_key_false(&xps_rxqs_needed)) - goto get_cpus_map; - dev_maps = rcu_dereference(sb_dev->xps_rxqs_map); - if (dev_maps) { - int tci = sk_rx_queue_get(sk); + prog = rcu_dereference(dev->xps_prog); + if (prog) { + bpf_ret = bpf_prog_run_clear_cb(prog, skb); + if (bpf_ret >= 0) + queue_index = bpf_ret % dev->num_tx_queues; + } - if (tci >= 0 && tci < dev->num_rx_queues) - queue_index = __get_xps_queue_idx(dev, skb, dev_maps, - tci); + if (queue_index < 0 && static_key_false(&xps_rxqs_needed)) { + dev_maps = rcu_dereference(sb_dev->xps_rxqs_map); + if (dev_maps) { + int tci = sk_rx_queue_get(sk); + + if (tci >= 0 && tci < dev->num_rx_queues) + queue_index = __get_xps_queue_idx(dev, skb, + dev_maps, tci); + } } -get_cpus_map: if (queue_index < 0) { dev_maps = rcu_dereference(sb_dev->xps_cpus_map); if (dev_maps) { @@ -8170,6 +8178,41 @@ int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack, return err; } +static void dev_xps_install(struct net_device *dev, struct bpf_prog *prog) +{ +#ifdef CONFIG_XPS + struct bpf_prog *old = rtnl_dereference(dev->xps_prog); + struct bpf_prog *new = prog; + + rcu_assign_pointer(dev->xps_prog, new); + if (old) + bpf_prog_put(old); +#endif +} + +/** + * dev_change_xps_fd - set or clear a bpf program for tx queue selection for a device + * @dev: device + * @fd: new program fd or negative value to clear + * + * Set or clear a bpf program for a device + */ +int dev_change_xps_fd(struct net_device *dev, int fd) +{ + struct bpf_prog *prog = NULL; + + ASSERT_RTNL(); + + prog = bpf_prog_get_type(fd, BPF_PROG_TYPE_SOCKET_FILTER); + + if (IS_ERR(prog)) + return PTR_ERR(prog); + + dev_xps_install(dev, prog); + + return 0; +} + /** * dev_new_index - allocate an ifindex * @net: the applicable net namespace diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 1ee6460..202b59a 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -980,6 +980,15 @@ static size_t rtnl_xdp_size(void) return xdp_size; } +static size_t rtnl_xps_size(void) +{ + size_t xps_size = nla_total_size(0) + /* nest IFLA_XPS */ + nla_total_size(1) + /* XPS_ATTACHED */ + nla_total_size(4); /* XPS_PROG_ID */ + + return xps_size; +} + static noinline size_t if_nlmsg_size(const struct net_device *dev, u32 ext_filter_mask) { @@ -1018,6 +1027,7 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev, + nla_total_size(MAX_PHYS_ITEM_ID_LEN) /* IFLA_PHYS_SWITCH_ID */ + nla_total_size(IFNAMSIZ) /* IFLA_PHYS_PORT_NAME */ + rtnl_xdp_size() /* IFLA_XDP */ + + rtnl_xps_size() /* IFLA_XPS */ + nla_total_size(4) /* IFLA_EVENT */ + nla_total_size(4) /* IFLA_NEW_NETNSID */ + nla_total_size(4) /* IFLA_NEW_IFINDEX */ @@ -1455,6 +1465,31 @@ static int rtnl_xdp_fill(struct sk_buff *skb, struct net_device *dev) return err; } +static int rtnl_xps_fill(struct sk_buff *skb, struct net_device *dev) +{ + struct nlattr *xps; + struct bpf_prog *xps_prog; + int err; + + ASSERT_RTNL(); + + xps = nla_nest_start(skb, IFLA_XPS); + if (!xps) + return -EMSGSIZE; + + xps_prog = rtnl_dereference(dev->xps_prog); + if (xps_prog) { + err = nla_put_u8(skb, IFLA_XPS_ATTACHED, xps_prog->aux->id); + if (err) { + nla_nest_cancel(skb, xps); + return err; + } + } + + nla_nest_end(skb, xps); + return 0; +} + static u32 rtnl_get_event(unsigned long event) { u32 rtnl_event_type = IFLA_EVENT_NONE; @@ -1697,6 +1732,9 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, goto nla_put_failure_rcu; rcu_read_unlock(); + if (rtnl_xps_fill(skb, dev)) + goto nla_put_failure; + nlmsg_end(skb, nlh); return 0; @@ -1750,6 +1788,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, [IFLA_CARRIER_DOWN_COUNT] = { .type = NLA_U32 }, [IFLA_MIN_MTU] = { .type = NLA_U32 }, [IFLA_MAX_MTU] = { .type = NLA_U32 }, + [IFLA_XPS] = { .type = NLA_NESTED }, }; static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = { @@ -1801,6 +1840,11 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, [IFLA_XDP_PROG_ID] = { .type = NLA_U32 }, }; +static const struct nla_policy ifla_xps_policy[IFLA_XPS_MAX + 1] = { + [IFLA_XPS_FD] = { .type = NLA_S32 }, + [IFLA_XPS_ATTACHED] = { .type = NLA_U8 }, +}; + static const struct rtnl_link_ops *linkinfo_to_kind_ops(const struct nlattr *nla) { const struct rtnl_link_ops *ops = NULL; @@ -2709,6 +2753,24 @@ static int do_setlink(const struct sk_buff *skb, } } + if (tb[IFLA_XPS]) { + struct nlattr *xps[IFLA_XPS_MAX + 1]; + + err = nla_parse_nested_deprecated(xps, IFLA_XPS_MAX, + tb[IFLA_XPS], + ifla_xps_policy, NULL); + if (err < 0) + goto errout; + + if (xps[IFLA_XPS_FD]) { + err = dev_change_xps_fd(dev, + nla_get_s32(xps[IFLA_XPS_FD])); + if (err) + goto errout; + status |= DO_SETLINK_NOTIFY; + } + } + errout: if (status & DO_SETLINK_MODIFIED) { if ((status & DO_SETLINK_NOTIFY) == DO_SETLINK_NOTIFY) -- 1.8.3.1