On 01/02/2023 19:28, Petr Machata wrote: > The MDB maintained by the bridge is limited. When the bridge is configured > for IGMP / MLD snooping, a buggy or malicious client can easily exhaust its > capacity. In SW datapath, the capacity is configurable through the > IFLA_BR_MCAST_HASH_MAX parameter, but ultimately is finite. Obviously a > similar limit exists in the HW datapath for purposes of offloading. > > In order to prevent the issue of unilateral exhaustion of MDB resources, > introduce two parameters in each of two contexts: > > - Per-port and per-port-VLAN number of MDB entries that the port > is member in. > > - Per-port and (when BROPT_MCAST_VLAN_SNOOPING_ENABLED is enabled) > per-port-VLAN maximum permitted number of MDB entries, or 0 for > no limit. > > The per-port multicast context is used for tracking of MDB entries for the > port as a whole. This is available for all bridges. > > The per-port-VLAN multicast context is then only available on > VLAN-filtering bridges on VLANs that have multicast snooping on. > > With these changes in place, it will be possible to configure MDB limit for > bridge as a whole, or any one port as a whole, or any single port-VLAN. > > Note that unlike the global limit, exhaustion of the per-port and > per-port-VLAN maximums does not cause disablement of multicast snooping. > It is also permitted to configure the local limit larger than hash_max, > even though that is not useful. > > In this patch, introduce only the accounting for number of entries, and the > max field itself, but not the means to toggle the max. The next patch > introduces the netlink APIs to toggle and read the values. > > Signed-off-by: Petr Machata <petrm@xxxxxxxxxx> > --- > > Notes: > v2: > - In br_multicast_port_ngroups_inc_one(), bounce > if n>=max, not if n==max > - Adjust extack messages to mention ngroups, now that > the bounces appear when n>=max, not n==max > - In __br_multicast_enable_port_ctx(), do not reset > max to 0. Also do not count number of entries by > going through _inc, as that would end up incorrectly > bouncing the entries. > > net/bridge/br_multicast.c | 132 +++++++++++++++++++++++++++++++++++++- > net/bridge/br_private.h | 2 + > 2 files changed, 133 insertions(+), 1 deletion(-) > > diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c > index 51b622afdb67..e7ae339a8757 100644 > --- a/net/bridge/br_multicast.c > +++ b/net/bridge/br_multicast.c > @@ -31,6 +31,7 @@ > #include <net/ip6_checksum.h> > #include <net/addrconf.h> > #endif > +#include <trace/events/bridge.h> > > #include "br_private.h" > #include "br_private_mcast_eht.h" > @@ -234,6 +235,29 @@ br_multicast_pg_to_port_ctx(const struct net_bridge_port_group *pg) > return pmctx; > } > > +static struct net_bridge_mcast_port * > +br_multicast_port_vid_to_port_ctx(struct net_bridge_port *port, u16 vid) > +{ > + struct net_bridge_mcast_port *pmctx = NULL; > + struct net_bridge_vlan *vlan; > + > + lockdep_assert_held_once(&port->br->multicast_lock); > + > + if (!br_opt_get(port->br, BROPT_MCAST_VLAN_SNOOPING_ENABLED)) > + return NULL; > + > + /* Take RCU to access the vlan. */ > + rcu_read_lock(); > + > + vlan = br_vlan_find(nbp_vlan_group_rcu(port), vid); > + if (vlan && !br_multicast_port_ctx_vlan_disabled(&vlan->port_mcast_ctx)) > + pmctx = &vlan->port_mcast_ctx; > + > + rcu_read_unlock(); > + > + return pmctx; > +} > + > /* when snooping we need to check if the contexts should be used > * in the following order: > * - if pmctx is non-NULL (port), check if it should be used > @@ -668,6 +692,82 @@ void br_multicast_del_group_src(struct net_bridge_group_src *src, > __br_multicast_del_group_src(src); > } > > +static int > +br_multicast_port_ngroups_inc_one(struct net_bridge_mcast_port *pmctx, > + struct netlink_ext_ack *extack) > +{ > + if (pmctx->mdb_max_entries && > + pmctx->mdb_n_entries >= pmctx->mdb_max_entries) These should be using *_ONCE() because of the next patch. KCSAN might be sad otherwise. :) > + return -E2BIG; > + > + pmctx->mdb_n_entries++; WRITE_ONCE() > + return 0; > +} > + > +static void br_multicast_port_ngroups_dec_one(struct net_bridge_mcast_port *pmctx) > +{ > + WARN_ON_ONCE(pmctx->mdb_n_entries-- == 0); READ_ONCE() > +} > + > +static int br_multicast_port_ngroups_inc(struct net_bridge_port *port, > + const struct br_ip *group, > + struct netlink_ext_ack *extack) > +{ > + struct net_bridge_mcast_port *pmctx; > + int err; > + > + lockdep_assert_held_once(&port->br->multicast_lock); > + > + /* Always count on the port context. */ > + err = br_multicast_port_ngroups_inc_one(&port->multicast_ctx, extack); > + if (err) { > + NL_SET_ERR_MSG_FMT_MOD(extack, "Port is already in %u groups, and mcast_max_groups=%u", > + port->multicast_ctx.mdb_n_entries, > + port->multicast_ctx.mdb_max_entries); READ_ONCE() > + trace_br_mdb_full(port->dev, group); > + return err; > + } > + > + /* Only count on the VLAN context if VID is given, and if snooping on > + * that VLAN is enabled. > + */ > + if (!group->vid) > + return 0; > + > + pmctx = br_multicast_port_vid_to_port_ctx(port, group->vid); > + if (!pmctx) > + return 0; > + > + err = br_multicast_port_ngroups_inc_one(pmctx, extack); > + if (err) { > + NL_SET_ERR_MSG_FMT_MOD(extack, "Port-VLAN is already in %u groups, and mcast_max_groups=%u", > + pmctx->mdb_n_entries, > + pmctx->mdb_max_entries); READ_ONCE() > + trace_br_mdb_full(port->dev, group); > + goto dec_one_out; > + } > + > + return 0; > + > +dec_one_out: > + br_multicast_port_ngroups_dec_one(&port->multicast_ctx); > + return err; > +} > + > +static void br_multicast_port_ngroups_dec(struct net_bridge_port *port, u16 vid) > +{ > + struct net_bridge_mcast_port *pmctx; > + > + lockdep_assert_held_once(&port->br->multicast_lock); > + > + if (vid) { > + pmctx = br_multicast_port_vid_to_port_ctx(port, vid); > + if (pmctx) > + br_multicast_port_ngroups_dec_one(pmctx); > + } > + br_multicast_port_ngroups_dec_one(&port->multicast_ctx); > +} > + > static void br_multicast_destroy_port_group(struct net_bridge_mcast_gc *gc) > { > struct net_bridge_port_group *pg; > @@ -702,6 +802,7 @@ void br_multicast_del_pg(struct net_bridge_mdb_entry *mp, > } else { > br_multicast_star_g_handle_mode(pg, MCAST_INCLUDE); > } > + br_multicast_port_ngroups_dec(pg->key.port, pg->key.addr.vid); > hlist_add_head(&pg->mcast_gc.gc_node, &br->mcast_gc_list); > queue_work(system_long_wq, &br->mcast_gc_work); > > @@ -1165,6 +1266,7 @@ struct net_bridge_mdb_entry *br_multicast_new_group(struct net_bridge *br, > return mp; > > if (atomic_read(&br->mdb_hash_tbl.nelems) >= br->hash_max) { > + trace_br_mdb_full(br->dev, group); > br_mc_disabled_update(br->dev, false, NULL); > br_opt_toggle(br, BROPT_MULTICAST_ENABLED, false); > return ERR_PTR(-E2BIG); > @@ -1288,11 +1390,16 @@ struct net_bridge_port_group *br_multicast_new_port_group( > struct netlink_ext_ack *extack) > { > struct net_bridge_port_group *p; > + int err; > + > + err = br_multicast_port_ngroups_inc(port, group, extack); > + if (err) > + return NULL; > > p = kzalloc(sizeof(*p), GFP_ATOMIC); > if (unlikely(!p)) { > NL_SET_ERR_MSG_MOD(extack, "Couldn't allocate new port group"); > - return NULL; > + goto dec_out; > } > > p->key.addr = *group; > @@ -1326,18 +1433,22 @@ struct net_bridge_port_group *br_multicast_new_port_group( > > free_out: > kfree(p); > +dec_out: > + br_multicast_port_ngroups_dec(port, group->vid); > return NULL; > } > > void br_multicast_del_port_group(struct net_bridge_port_group *p) > { > struct net_bridge_port *port = p->key.port; > + __u16 vid = p->key.addr.vid; > > hlist_del_init(&p->mglist); > if (!br_multicast_is_star_g(&p->key.addr)) > rhashtable_remove_fast(&port->br->sg_port_tbl, &p->rhnode, > br_sg_port_rht_params); > kfree(p); > + br_multicast_port_ngroups_dec(port, vid); > } > > void br_multicast_host_join(const struct net_bridge_mcast *brmctx, > @@ -1951,6 +2062,25 @@ static void __br_multicast_enable_port_ctx(struct net_bridge_mcast_port *pmctx) > br_ip4_multicast_add_router(brmctx, pmctx); > br_ip6_multicast_add_router(brmctx, pmctx); > } > + > + if (br_multicast_port_ctx_is_vlan(pmctx)) { > + struct net_bridge_port_group *pg; > + u32 n = 0; > + > + /* The mcast_n_groups counter might be wrong. First, > + * BR_VLFLAG_MCAST_ENABLED is toggled before temporary entries > + * are flushed, thus mcast_n_groups after the toggle does not > + * reflect the true values. And second, permanent entries added > + * while BR_VLFLAG_MCAST_ENABLED was disabled, are not reflected > + * either. Thus we have to refresh the counter. > + */ > + > + hlist_for_each_entry(pg, &pmctx->port->mglist, mglist) { > + if (pg->key.addr.vid == pmctx->vlan->vid) > + n++; > + } > + pmctx->mdb_n_entries = n; WRITE_ONCE() > + } > } > > void br_multicast_enable_port(struct net_bridge_port *port) > diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h > index e4069e27b5c6..49f411a0a1f1 100644 > --- a/net/bridge/br_private.h > +++ b/net/bridge/br_private.h > @@ -126,6 +126,8 @@ struct net_bridge_mcast_port { > struct hlist_node ip6_rlist; > #endif /* IS_ENABLED(CONFIG_IPV6) */ > unsigned char multicast_router; > + u32 mdb_n_entries; > + u32 mdb_max_entries; > #endif /* CONFIG_BRIDGE_IGMP_SNOOPING */ > }; >