queue = rcu_dereference(tap->taps[rxq % numvtaps]);
goto out;
}
@@ -713,11 +748,12 @@ static ssize_t tap_put_user(struct tap_queue *q,
int total;
if (q->flags & IFF_VNET_HDR) {
- struct virtio_net_hdr_v1 vnet_hdr;
+ struct virtio_net_hdr_v1_hash vnet_hdr;
vnet_hdr_len = READ_ONCE(q->vnet_hdr_sz);
- ret = tun_vnet_hdr_from_skb(q->flags, NULL, skb, &vnet_hdr);
+ ret = tun_vnet_hdr_from_skb(vnet_hdr_len, q->flags, NULL, skb,
+ tap_find_hash, &vnet_hdr);
if (ret < 0)
goto done;
@@ -1025,7 +1061,13 @@ static long tap_ioctl(struct file *file, unsigned int cmd,
return ret;
default:
- return tun_vnet_ioctl(&q->vnet_hdr_sz, &q->flags, cmd, sp);
+ rtnl_lock();
+ tap = rtnl_dereference(q->tap);
+ ret = tun_vnet_ioctl(&q->vnet_hdr_sz, &q->flags,
+ tap ? &tap->vnet_hash : NULL, -EINVAL,
+ true, cmd, sp);
+ rtnl_unlock();
+ return ret;
}
}
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index f211d0580887..efdbd2f65100 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -209,6 +209,7 @@ struct tun_struct {
struct bpf_prog __rcu *xdp_prog;
struct tun_prog __rcu *steering_prog;
struct tun_prog __rcu *filter_prog;
+ struct tun_vnet_hash_container __rcu *vnet_hash;
struct ethtool_link_ksettings link_ksettings;
/* init args */
struct file *file;
@@ -451,20 +452,37 @@ static inline void tun_flow_save_rps_rxhash(struct tun_flow_entry *e, u32 hash)
e->rps_rxhash = hash;
}
+static struct virtio_net_hash *tun_add_hash(struct sk_buff *skb)
+{
+ return skb_ext_add(skb, SKB_EXT_TUN_VNET_HASH);
+}
+
+static const struct virtio_net_hash *tun_find_hash(const struct sk_buff *skb)
+{
+ return skb_ext_find(skb, SKB_EXT_TUN_VNET_HASH);
+}
+
/* We try to identify a flow through its rxhash. The reason that
* we do not check rxq no. is because some cards(e.g 82599), chooses
* the rxq based on the txq where the last packet of the flow comes. As
* the userspace application move between processors, we may get a
* different rxq no. here.
*/
-static u16 tun_automq_select_queue(struct tun_struct *tun, struct sk_buff *skb)
+static u16 tun_automq_select_queue(struct tun_struct *tun,
+ const struct tun_vnet_hash_container *vnet_hash,
+ struct sk_buff *skb)
{
+ struct flow_keys keys;
+ struct flow_keys_basic keys_basic;
struct tun_flow_entry *e;
u32 txq, numqueues;
numqueues = READ_ONCE(tun->numqueues);
- txq = __skb_get_hash_symmetric(skb);
+ memset(&keys, 0, sizeof(keys));
+ skb_flow_dissect(skb, &flow_keys_dissector_symmetric, &keys, 0);
+
+ txq = flow_hash_from_keys(&keys);
e = tun_flow_find(&tun->flows[tun_hashfn(txq)], txq);
if (e) {
tun_flow_save_rps_rxhash(e, txq);
@@ -473,6 +491,13 @@ static u16 tun_automq_select_queue(struct tun_struct *tun, struct sk_buff *skb)
txq = reciprocal_scale(txq, numqueues);
}
+ keys_basic = (struct flow_keys_basic) {
+ .control = keys.control,
+ .basic = keys.basic
+ };
+ tun_vnet_hash_report(vnet_hash, skb, &keys_basic, skb->l4_hash ? skb->hash : txq,
+ tun_add_hash);
+
return txq;
}
@@ -500,10 +525,17 @@ static u16 tun_select_queue(struct net_device *dev, struct sk_buff *skb,
u16 ret;
rcu_read_lock();
- if (rcu_dereference(tun->steering_prog))
+ if (rcu_dereference(tun->steering_prog)) {
ret = tun_ebpf_select_queue(tun, skb);
- else
- ret = tun_automq_select_queue(tun, skb);
+ } else {
+ struct tun_vnet_hash_container *vnet_hash = rcu_dereference(tun->vnet_hash);
+
+ if (vnet_hash && (vnet_hash->common.flags & TUN_VNET_HASH_RSS))
+ ret = tun_vnet_rss_select_queue(READ_ONCE(tun->numqueues), vnet_hash,
+ skb, tun_add_hash);
+ else
+ ret = tun_automq_select_queue(tun, vnet_hash, skb);
+ }
rcu_read_unlock();
return ret;
@@ -1991,8 +2023,8 @@ static ssize_t tun_put_user_xdp(struct tun_struct *tun,
size_t total;
if (tun->flags & IFF_VNET_HDR) {
- struct virtio_net_hdr_v1 gso = {
- .num_buffers = __virtio16_to_cpu(true, 1)
+ struct virtio_net_hdr_v1_hash gso = {
+ .hdr = { .num_buffers = __virtio16_to_cpu(true, 1) }
};
vnet_hdr_sz = READ_ONCE(tun->vnet_hdr_sz);
@@ -2021,7 +2053,6 @@ static ssize_t tun_put_user(struct tun_struct *tun,
int vlan_offset = 0;
int vlan_hlen = 0;
int vnet_hdr_sz = 0;
- int ret;
if (skb_vlan_tag_present(skb))
vlan_hlen = VLAN_HLEN;
@@ -2046,9 +2077,11 @@ static ssize_t tun_put_user(struct tun_struct *tun,
}
if (vnet_hdr_sz) {
- struct virtio_net_hdr_v1 gso;
+ struct virtio_net_hdr_v1_hash gso;
+ int ret;
- ret = tun_vnet_hdr_from_skb(tun->flags, tun->dev, skb, &gso);
+ ret = tun_vnet_hdr_from_skb(vnet_hdr_sz, tun->flags, tun->dev,
+ skb, tun_find_hash, &gso);
if (ret < 0)
goto done;
@@ -2229,6 +2262,9 @@ static void tun_free_netdev(struct net_device *dev)
security_tun_dev_free_security(tun->security);
__tun_set_ebpf(tun, &tun->steering_prog, NULL);
__tun_set_ebpf(tun, &tun->filter_prog, NULL);
+ rtnl_lock();
+ kfree_rcu_mightsleep(rtnl_dereference(tun->vnet_hash));
+ rtnl_unlock();
}
static void tun_setup(struct net_device *dev)
@@ -2927,13 +2963,9 @@ static int tun_set_queue(struct file *file, struct ifreq *ifr)
}
static int tun_set_ebpf(struct tun_struct *tun, struct tun_prog __rcu **prog_p,
- void __user *data)
+ int fd)
{
struct bpf_prog *prog;
- int fd;
-
- if (copy_from_user(&fd, data, sizeof(fd)))
- return -EFAULT;
if (fd == -1) {
prog = NULL;
@@ -3000,6 +3032,7 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
int sndbuf;
int ret;
bool do_notify = false;
+ struct tun_vnet_hash_container *vnet_hash;
if (cmd == TUNSETIFF || cmd == TUNSETQUEUE ||
(_IOC_TYPE(cmd) == SOCK_IOC_TYPE && cmd != SIOCGSKNS)) {
@@ -3058,9 +3091,10 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
goto unlock;
}
- ret = -EBADFD;
- if (!tun)
+ if (!tun) {
+ ret = tun_vnet_ioctl(NULL, NULL, NULL, -EBADFD, true, cmd, argp);
goto unlock;
+ }
netif_info(tun, drv, tun->dev, "tun_chr_ioctl cmd %u\n", cmd);
@@ -3236,11 +3270,27 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
break;
case TUNSETSTEERINGEBPF:
- ret = tun_set_ebpf(tun, &tun->steering_prog, argp);
+ if (get_user(ret, (int __user *)argp)) {
+ ret = -EFAULT;
+ break;
+ }
+
+ vnet_hash = rtnl_dereference(tun->vnet_hash);
+ if (ret != -1 && vnet_hash && (vnet_hash->common.flags & TUN_VNET_HASH_RSS)) {
+ ret = -EBUSY;
+ break;
+ }
+
+ ret = tun_set_ebpf(tun, &tun->steering_prog, ret);
break;
case TUNSETFILTEREBPF:
- ret = tun_set_ebpf(tun, &tun->filter_prog, argp);
+ if (get_user(ret, (int __user *)argp)) {
+ ret = -EFAULT;
+ break;
+ }
+
+ ret = tun_set_ebpf(tun, &tun->filter_prog, ret);
break;
case TUNSETCARRIER:
@@ -3259,7 +3309,10 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd,
break;
default:
- ret = tun_vnet_ioctl(&tun->vnet_hdr_sz, &tun->flags, cmd, argp);
+ ret = tun_vnet_ioctl(&tun->vnet_hdr_sz, &tun->flags,
+ &tun->vnet_hash, -EINVAL,
+ !rtnl_dereference(tun->steering_prog),
+ cmd, argp);
}
if (do_notify)
diff --git a/drivers/net/tun_vnet.c b/drivers/net/tun_vnet.c
index a7a7989fae56..d36ca3b23265 100644
--- a/drivers/net/tun_vnet.c
+++ b/drivers/net/tun_vnet.c
@@ -58,18 +58,33 @@ static __virtio16 cpu_to_tun_vnet16(unsigned int flags, u16 val)
}
long tun_vnet_ioctl(int *sz, unsigned int *flags,
- unsigned int cmd, int __user *sp)
+ struct tun_vnet_hash_container __rcu **hashp,
+ long fallback, bool can_rss,
+ unsigned int cmd, void __user *argp)
{
+ static const struct tun_vnet_hash cap = {
+ .flags = TUN_VNET_HASH_REPORT | TUN_VNET_HASH_RSS,
+ .types = VIRTIO_NET_SUPPORTED_HASH_TYPES
+ };
+ struct tun_vnet_hash hash_buf;
+ struct tun_vnet_hash_container *hash;
+ int __user *sp = argp;
int s;
switch (cmd) {
case TUNGETVNETHDRSZ:
+ if (!sz)
+ return -EBADFD;
+
s = *sz;
if (put_user(s, sp))
return -EFAULT;
return 0;
case TUNSETVNETHDRSZ:
+ if (!sz)
+ return -EBADFD;
+