Re: [PATCH net-next v9 1/6] virtio_net: Add functions for hashing

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 2025/03/10 12:55, Jason Wang wrote:
On Fri, Mar 7, 2025 at 7:01 PM Akihiko Odaki <akihiko.odaki@xxxxxxxxxx> wrote:

They are useful to implement VIRTIO_NET_F_RSS and
VIRTIO_NET_F_HASH_REPORT.

Signed-off-by: Akihiko Odaki <akihiko.odaki@xxxxxxxxxx>
Tested-by: Lei Yang <leiyang@xxxxxxxxxx>
---
  include/linux/virtio_net.h | 188 +++++++++++++++++++++++++++++++++++++++++++++
  1 file changed, 188 insertions(+)

diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h
index 02a9f4dc594d02372a6c1850cd600eff9d000d8d..426f33b4b82440d61b2af9fdc4c0b0d4c571b2c5 100644
--- a/include/linux/virtio_net.h
+++ b/include/linux/virtio_net.h
@@ -9,6 +9,194 @@
  #include <uapi/linux/tcp.h>
  #include <uapi/linux/virtio_net.h>

+struct virtio_net_hash {
+       u32 value;
+       u16 report;
+};
+
+struct virtio_net_toeplitz_state {
+       u32 hash;
+       const u32 *key;
+};
+
+#define VIRTIO_NET_SUPPORTED_HASH_TYPES (VIRTIO_NET_RSS_HASH_TYPE_IPv4 | \
+                                        VIRTIO_NET_RSS_HASH_TYPE_TCPv4 | \
+                                        VIRTIO_NET_RSS_HASH_TYPE_UDPv4 | \
+                                        VIRTIO_NET_RSS_HASH_TYPE_IPv6 | \
+                                        VIRTIO_NET_RSS_HASH_TYPE_TCPv6 | \
+                                        VIRTIO_NET_RSS_HASH_TYPE_UDPv6)

Let's explain why

#define VIRTIO_NET_HASH_REPORT_IPv6_EX         7
#define VIRTIO_NET_HASH_REPORT_TCPv6_EX        8
#define VIRTIO_NET_HASH_REPORT_UDPv6_EX        9

are missed here.

Because they require parsing IPv6 options and I'm not sure how many we need to parse. QEMU's eBPF program has a hard-coded limit of 30 options; it has some explanation for this limit, but it does not seem definitive either:
https://gitlab.com/qemu-project/qemu/-/commit/f3fa412de28ae3cb31d38811d30a77e4e20456cc#6ec48fc8af2f802e92f5127425e845c4c213ff60_0_165

In this patch series, I add an ioctl to query capability instead; it allows me leaving those hash types unimplemented and is crucial to assure extensibility for future additions of hash types anyway. Anyone who find these hash types useful can implement in the future.


And explain how we could maintain migration compatibility

1) Does those three work for userspace datapath in Qemu? If yes,
migration will be broken.

They work for userspace datapath so my RFC patch series for QEMU uses TUNGETVNETHASHCAP to prevent breaking migration:
https://patchew.org/QEMU/20240915-hash-v3-0-79cb08d28647@xxxxxxxxxx/

This patch series first adds configuration options for users to choose hash types. QEMU then automatically picks one implementation from the following (the earlier one is the more preferred):
1) The hash capability of vhost hardware
2) The hash capability I'm proposing here
3) The eBPF program
4) The pure userspace implementation

This decision depends on the following:
- The required hash types; supported ones are queried for 1) and 2)
- Whether vhost is enabled or not and what vhost backend is used
- Whether hash reporting is enabled; 3) is incompatible with this

The network device will not be realized if no implementation satisfies the requirements.

2) once we support those three in the future. For example, is the qemu
expected to probe this via TUNGETVNETHASHCAP in the destination and
fail the migration?

QEMU is expected to use TUNGETVNETHASHCAP, but it can selectively enable hash types with TUNSETVNETHASH to keep migration working.

In summary, this patch series provides a sufficient facility for the userspace to make extensibility and migration compatible; TUNGETVNETHASHCAP exposes all of the kernel capabilities and TUNSETVNETHASH allows the userspace to limit them.

Regards,
Akihiko Odaki


Thanks



+
+#define VIRTIO_NET_RSS_MAX_KEY_SIZE 40
+
+static inline void virtio_net_toeplitz_convert_key(u32 *input, size_t len)
+{
+       while (len >= sizeof(*input)) {
+               *input = be32_to_cpu((__force __be32)*input);
+               input++;
+               len -= sizeof(*input);
+       }
+}
+
+static inline void virtio_net_toeplitz_calc(struct virtio_net_toeplitz_state *state,
+                                           const __be32 *input, size_t len)
+{
+       while (len >= sizeof(*input)) {
+               for (u32 map = be32_to_cpu(*input); map; map &= (map - 1)) {
+                       u32 i = ffs(map);
+
+                       state->hash ^= state->key[0] << (32 - i) |
+                                      (u32)((u64)state->key[1] >> i);
+               }
+
+               state->key++;
+               input++;
+               len -= sizeof(*input);
+       }
+}
+
+static inline u8 virtio_net_hash_key_length(u32 types)
+{
+       size_t len = 0;
+
+       if (types & VIRTIO_NET_HASH_REPORT_IPv4)
+               len = max(len,
+                         sizeof(struct flow_dissector_key_ipv4_addrs));
+
+       if (types &
+           (VIRTIO_NET_HASH_REPORT_TCPv4 | VIRTIO_NET_HASH_REPORT_UDPv4))
+               len = max(len,
+                         sizeof(struct flow_dissector_key_ipv4_addrs) +
+                         sizeof(struct flow_dissector_key_ports));
+
+       if (types & VIRTIO_NET_HASH_REPORT_IPv6)
+               len = max(len,
+                         sizeof(struct flow_dissector_key_ipv6_addrs));
+
+       if (types &
+           (VIRTIO_NET_HASH_REPORT_TCPv6 | VIRTIO_NET_HASH_REPORT_UDPv6))
+               len = max(len,
+                         sizeof(struct flow_dissector_key_ipv6_addrs) +
+                         sizeof(struct flow_dissector_key_ports));
+
+       return len + sizeof(u32);
+}
+
+static inline u32 virtio_net_hash_report(u32 types,
+                                        const struct flow_keys_basic *keys)
+{
+       switch (keys->basic.n_proto) {
+       case cpu_to_be16(ETH_P_IP):
+               if (!(keys->control.flags & FLOW_DIS_IS_FRAGMENT)) {
+                       if (keys->basic.ip_proto == IPPROTO_TCP &&
+                           (types & VIRTIO_NET_RSS_HASH_TYPE_TCPv4))
+                               return VIRTIO_NET_HASH_REPORT_TCPv4;
+
+                       if (keys->basic.ip_proto == IPPROTO_UDP &&
+                           (types & VIRTIO_NET_RSS_HASH_TYPE_UDPv4))
+                               return VIRTIO_NET_HASH_REPORT_UDPv4;
+               }
+
+               if (types & VIRTIO_NET_RSS_HASH_TYPE_IPv4)
+                       return VIRTIO_NET_HASH_REPORT_IPv4;
+
+               return VIRTIO_NET_HASH_REPORT_NONE;
+
+       case cpu_to_be16(ETH_P_IPV6):
+               if (!(keys->control.flags & FLOW_DIS_IS_FRAGMENT)) {
+                       if (keys->basic.ip_proto == IPPROTO_TCP &&
+                           (types & VIRTIO_NET_RSS_HASH_TYPE_TCPv6))
+                               return VIRTIO_NET_HASH_REPORT_TCPv6;
+
+                       if (keys->basic.ip_proto == IPPROTO_UDP &&
+                           (types & VIRTIO_NET_RSS_HASH_TYPE_UDPv6))
+                               return VIRTIO_NET_HASH_REPORT_UDPv6;
+               }
+
+               if (types & VIRTIO_NET_RSS_HASH_TYPE_IPv6)
+                       return VIRTIO_NET_HASH_REPORT_IPv6;
+
+               return VIRTIO_NET_HASH_REPORT_NONE;
+
+       default:
+               return VIRTIO_NET_HASH_REPORT_NONE;
+       }
+}
+
+static inline void virtio_net_hash_rss(const struct sk_buff *skb,
+                                      u32 types, const u32 *key,
+                                      struct virtio_net_hash *hash)
+{
+       struct virtio_net_toeplitz_state toeplitz_state = { .key = key };
+       struct flow_keys flow;
+       struct flow_keys_basic flow_basic;
+       u16 report;
+
+       if (!skb_flow_dissect_flow_keys(skb, &flow, 0)) {
+               hash->report = VIRTIO_NET_HASH_REPORT_NONE;
+               return;
+       }
+
+       flow_basic = (struct flow_keys_basic) {
+               .control = flow.control,
+               .basic = flow.basic
+       };
+
+       report = virtio_net_hash_report(types, &flow_basic);
+
+       switch (report) {
+       case VIRTIO_NET_HASH_REPORT_IPv4:
+               virtio_net_toeplitz_calc(&toeplitz_state,
+                                        (__be32 *)&flow.addrs.v4addrs,
+                                        sizeof(flow.addrs.v4addrs));
+               break;
+
+       case VIRTIO_NET_HASH_REPORT_TCPv4:
+               virtio_net_toeplitz_calc(&toeplitz_state,
+                                        (__be32 *)&flow.addrs.v4addrs,
+                                        sizeof(flow.addrs.v4addrs));
+               virtio_net_toeplitz_calc(&toeplitz_state, &flow.ports.ports,
+                                        sizeof(flow.ports.ports));
+               break;
+
+       case VIRTIO_NET_HASH_REPORT_UDPv4:
+               virtio_net_toeplitz_calc(&toeplitz_state,
+                                        (__be32 *)&flow.addrs.v4addrs,
+                                        sizeof(flow.addrs.v4addrs));
+               virtio_net_toeplitz_calc(&toeplitz_state, &flow.ports.ports,
+                                        sizeof(flow.ports.ports));
+               break;
+
+       case VIRTIO_NET_HASH_REPORT_IPv6:
+               virtio_net_toeplitz_calc(&toeplitz_state,
+                                        (__be32 *)&flow.addrs.v6addrs,
+                                        sizeof(flow.addrs.v6addrs));
+               break;
+
+       case VIRTIO_NET_HASH_REPORT_TCPv6:
+               virtio_net_toeplitz_calc(&toeplitz_state,
+                                        (__be32 *)&flow.addrs.v6addrs,
+                                        sizeof(flow.addrs.v6addrs));
+               virtio_net_toeplitz_calc(&toeplitz_state, &flow.ports.ports,
+                                        sizeof(flow.ports.ports));
+               break;
+
+       case VIRTIO_NET_HASH_REPORT_UDPv6:
+               virtio_net_toeplitz_calc(&toeplitz_state,
+                                        (__be32 *)&flow.addrs.v6addrs,
+                                        sizeof(flow.addrs.v6addrs));
+               virtio_net_toeplitz_calc(&toeplitz_state, &flow.ports.ports,
+                                        sizeof(flow.ports.ports));
+               break;
+
+       default:
+               hash->report = VIRTIO_NET_HASH_REPORT_NONE;
+               return;
+       }
+
+       hash->value = toeplitz_state.hash;
+       hash->report = report;
+}
+
  static inline bool virtio_net_hdr_match_proto(__be16 protocol, __u8 gso_type)
  {
         switch (gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {

--
2.48.1







[Index of Archives]     [Linux Wireless]     [Linux Kernel]     [ATH6KL]     [Linux Bluetooth]     [Linux Netdev]     [Kernel Newbies]     [Share Photos]     [IDE]     [Security]     [Git]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux ATA RAID]     [Samba]     [Device Mapper]

  Powered by Linux