On 13/03/2023 16:53, Ido Schimmel wrote: > Integrate MDB support into the Tx path of the VXLAN driver, allowing it > to selectively forward IP multicast traffic according to the matched MDB > entry. > > If MDB entries are configured (i.e., 'VXLAN_F_MDB' is set) and the > packet is an IP multicast packet, perform up to three different lookups > according to the following priority: > > 1. For an (S, G) entry, using {Source VNI, Source IP, Destination IP}. > 2. For a (*, G) entry, using {Source VNI, Destination IP}. > 3. For the catchall MDB entry (0.0.0.0 or ::), using the source VNI. > > The catchall MDB entry is similar to the catchall FDB entry > (00:00:00:00:00:00) that is currently used to transmit BUM (broadcast, > unknown unicast and multicast) traffic. However, unlike the catchall FDB > entry, this entry is only used to transmit unregistered IP multicast > traffic that is not link-local. Therefore, when configured, the catchall > FDB entry will only transmit BULL (broadcast, unknown unicast, > link-local multicast) traffic. > > The catchall MDB entry is useful in deployments where inter-subnet > multicast forwarding is used and not all the VTEPs in a tenant domain > are members in all the broadcast domains. In such deployments it is > advantageous to transmit BULL (broadcast, unknown unicast and link-local > multicast) and unregistered IP multicast traffic on different tunnels. > If the same tunnel was used, a VTEP only interested in IP multicast > traffic would also pull all the BULL traffic and drop it as it is not a > member in the originating broadcast domain [1]. > > If the packet did not match an MDB entry (or if the packet is not an IP > multicast packet), return it to the Tx path, allowing it to be forwarded > according to the FDB. > > If the packet did match an MDB entry, forward it to the associated > remote VTEPs. However, if the entry is a (*, G) entry and the associated > remote is in INCLUDE mode, then skip over it as the source IP is not in > its source list (otherwise the packet would have matched on an (S, G) > entry). Similarly, if the associated remote is marked as BLOCKED (can > only be set on (S, G) entries), then skip over it as well as the remote > is in EXCLUDE mode and the source IP is in its source list. > > [1] https://datatracker.ietf.org/doc/html/draft-ietf-bess-evpn-irb-mcast#section-2.6 > > Signed-off-by: Ido Schimmel <idosch@xxxxxxxxxx> > --- > drivers/net/vxlan/vxlan_core.c | 15 ++++ > drivers/net/vxlan/vxlan_mdb.c | 114 ++++++++++++++++++++++++++++++ > drivers/net/vxlan/vxlan_private.h | 6 ++ > 3 files changed, 135 insertions(+) > [snip]> diff --git a/drivers/net/vxlan/vxlan_mdb.c b/drivers/net/vxlan/vxlan_mdb.c > index b32b1fb4a74a..ea63c5178718 100644 > --- a/drivers/net/vxlan/vxlan_mdb.c > +++ b/drivers/net/vxlan/vxlan_mdb.c > @@ -1298,6 +1298,120 @@ int vxlan_mdb_del(struct net_device *dev, struct nlattr *tb[], > return err; > } > > +struct vxlan_mdb_entry *vxlan_mdb_entry_skb_get(struct vxlan_dev *vxlan, > + struct sk_buff *skb, > + __be32 src_vni) > +{ > + struct vxlan_mdb_entry *mdb_entry; > + struct vxlan_mdb_entry_key group; > + > + if (!is_multicast_ether_addr(eth_hdr(skb)->h_dest) || > + is_broadcast_ether_addr(eth_hdr(skb)->h_dest)) > + return NULL; > + > + /* When not in collect metadata mode, 'src_vni' is zero, but MDB > + * entries are stored with the VNI of the VXLAN device. > + */ > + if (!(vxlan->cfg.flags & VXLAN_F_COLLECT_METADATA)) > + src_vni = vxlan->default_dst.remote_vni; > + > + memset(&group, 0, sizeof(group)); > + group.vni = src_vni; > + > + switch (ntohs(skb->protocol)) { drop the ntohs and.. > + case ETH_P_IP: htons(ETH_P_IP) > + if (!pskb_may_pull(skb, sizeof(struct iphdr))) > + return NULL; > + group.dst.sa.sa_family = AF_INET; > + group.dst.sin.sin_addr.s_addr = ip_hdr(skb)->daddr; > + group.src.sa.sa_family = AF_INET; > + group.src.sin.sin_addr.s_addr = ip_hdr(skb)->saddr; > + break; > +#if IS_ENABLED(CONFIG_IPV6) > + case ETH_P_IPV6: htons(ETH_P_IPV6) > + if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) > + return NULL; > + group.dst.sa.sa_family = AF_INET6; > + group.dst.sin6.sin6_addr = ipv6_hdr(skb)->daddr; > + group.src.sa.sa_family = AF_INET6; > + group.src.sin6.sin6_addr = ipv6_hdr(skb)->saddr; > + break; > +#endif > + default: > + return NULL; > + } > + > + mdb_entry = vxlan_mdb_entry_lookup(vxlan, &group); > + if (mdb_entry) > + return mdb_entry; > + > + memset(&group.src, 0, sizeof(group.src)); > + mdb_entry = vxlan_mdb_entry_lookup(vxlan, &group); > + if (mdb_entry) > + return mdb_entry; > + > + /* No (S, G) or (*, G) found. Look up the all-zeros entry, but only if > + * the destination IP address is not link-local multicast since we want > + * to transmit such traffic together with broadcast and unknown unicast > + * traffic. > + */ > + switch (ntohs(skb->protocol)) { > + case ETH_P_IP: ditto > + if (ipv4_is_local_multicast(group.dst.sin.sin_addr.s_addr)) > + return NULL; > + group.dst.sin.sin_addr.s_addr = 0; > + break; > +#if IS_ENABLED(CONFIG_IPV6) > + case ETH_P_IPV6: ditto > + if (ipv6_addr_type(&group.dst.sin6.sin6_addr) & > + IPV6_ADDR_LINKLOCAL) > + return NULL; > + memset(&group.dst.sin6.sin6_addr, 0, > + sizeof(group.dst.sin6.sin6_addr)); > + break; > +#endif > + default: > + return NULL; > + } > + > + return vxlan_mdb_entry_lookup(vxlan, &group); > +} > + [snip]