Re: [PATCH net-next mlxsw v2 07/16] net: bridge: Maintain number of MDB entries in net_bridge_mcast_port

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 01/02/2023 19:28, Petr Machata wrote:
> The MDB maintained by the bridge is limited. When the bridge is configured
> for IGMP / MLD snooping, a buggy or malicious client can easily exhaust its
> capacity. In SW datapath, the capacity is configurable through the
> IFLA_BR_MCAST_HASH_MAX parameter, but ultimately is finite. Obviously a
> similar limit exists in the HW datapath for purposes of offloading.
> 
> In order to prevent the issue of unilateral exhaustion of MDB resources,
> introduce two parameters in each of two contexts:
> 
> - Per-port and per-port-VLAN number of MDB entries that the port
>   is member in.
> 
> - Per-port and (when BROPT_MCAST_VLAN_SNOOPING_ENABLED is enabled)
>   per-port-VLAN maximum permitted number of MDB entries, or 0 for
>   no limit.
> 
> The per-port multicast context is used for tracking of MDB entries for the
> port as a whole. This is available for all bridges.
> 
> The per-port-VLAN multicast context is then only available on
> VLAN-filtering bridges on VLANs that have multicast snooping on.
> 
> With these changes in place, it will be possible to configure MDB limit for
> bridge as a whole, or any one port as a whole, or any single port-VLAN.
> 
> Note that unlike the global limit, exhaustion of the per-port and
> per-port-VLAN maximums does not cause disablement of multicast snooping.
> It is also permitted to configure the local limit larger than hash_max,
> even though that is not useful.
> 
> In this patch, introduce only the accounting for number of entries, and the
> max field itself, but not the means to toggle the max. The next patch
> introduces the netlink APIs to toggle and read the values.
> 
> Signed-off-by: Petr Machata <petrm@xxxxxxxxxx>
> ---
> 
> Notes:
>     v2:
>     - In br_multicast_port_ngroups_inc_one(), bounce
>       if n>=max, not if n==max
>     - Adjust extack messages to mention ngroups, now that
>       the bounces appear when n>=max, not n==max
>     - In __br_multicast_enable_port_ctx(), do not reset
>       max to 0. Also do not count number of entries by
>       going through _inc, as that would end up incorrectly
>       bouncing the entries.
> 
>  net/bridge/br_multicast.c | 132 +++++++++++++++++++++++++++++++++++++-
>  net/bridge/br_private.h   |   2 +
>  2 files changed, 133 insertions(+), 1 deletion(-)
> 
> diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
> index 51b622afdb67..e7ae339a8757 100644
> --- a/net/bridge/br_multicast.c
> +++ b/net/bridge/br_multicast.c
> @@ -31,6 +31,7 @@
>  #include <net/ip6_checksum.h>
>  #include <net/addrconf.h>
>  #endif
> +#include <trace/events/bridge.h>
>  
>  #include "br_private.h"
>  #include "br_private_mcast_eht.h"
> @@ -234,6 +235,29 @@ br_multicast_pg_to_port_ctx(const struct net_bridge_port_group *pg)
>  	return pmctx;
>  }
>  
> +static struct net_bridge_mcast_port *
> +br_multicast_port_vid_to_port_ctx(struct net_bridge_port *port, u16 vid)
> +{
> +	struct net_bridge_mcast_port *pmctx = NULL;
> +	struct net_bridge_vlan *vlan;
> +
> +	lockdep_assert_held_once(&port->br->multicast_lock);
> +
> +	if (!br_opt_get(port->br, BROPT_MCAST_VLAN_SNOOPING_ENABLED))
> +		return NULL;
> +
> +	/* Take RCU to access the vlan. */
> +	rcu_read_lock();
> +
> +	vlan = br_vlan_find(nbp_vlan_group_rcu(port), vid);
> +	if (vlan && !br_multicast_port_ctx_vlan_disabled(&vlan->port_mcast_ctx))
> +		pmctx = &vlan->port_mcast_ctx;
> +
> +	rcu_read_unlock();
> +
> +	return pmctx;
> +}
> +
>  /* when snooping we need to check if the contexts should be used
>   * in the following order:
>   * - if pmctx is non-NULL (port), check if it should be used
> @@ -668,6 +692,82 @@ void br_multicast_del_group_src(struct net_bridge_group_src *src,
>  	__br_multicast_del_group_src(src);
>  }
>  
> +static int
> +br_multicast_port_ngroups_inc_one(struct net_bridge_mcast_port *pmctx,
> +				  struct netlink_ext_ack *extack)
> +{
> +	if (pmctx->mdb_max_entries &&
> +	    pmctx->mdb_n_entries >= pmctx->mdb_max_entries)

These should be using *_ONCE() because of the next patch.
KCSAN might be sad otherwise. :)

> +		return -E2BIG;
> +
> +	pmctx->mdb_n_entries++;

WRITE_ONCE()

> +	return 0;
> +}
> +
> +static void br_multicast_port_ngroups_dec_one(struct net_bridge_mcast_port *pmctx)
> +{
> +	WARN_ON_ONCE(pmctx->mdb_n_entries-- == 0);

READ_ONCE()

> +}
> +
> +static int br_multicast_port_ngroups_inc(struct net_bridge_port *port,
> +					 const struct br_ip *group,
> +					 struct netlink_ext_ack *extack)
> +{
> +	struct net_bridge_mcast_port *pmctx;
> +	int err;
> +
> +	lockdep_assert_held_once(&port->br->multicast_lock);
> +
> +	/* Always count on the port context. */
> +	err = br_multicast_port_ngroups_inc_one(&port->multicast_ctx, extack);
> +	if (err) {
> +		NL_SET_ERR_MSG_FMT_MOD(extack, "Port is already in %u groups, and mcast_max_groups=%u",
> +				       port->multicast_ctx.mdb_n_entries,
> +				       port->multicast_ctx.mdb_max_entries);

READ_ONCE()

> +		trace_br_mdb_full(port->dev, group);
> +		return err;
> +	}
> +
> +	/* Only count on the VLAN context if VID is given, and if snooping on
> +	 * that VLAN is enabled.
> +	 */
> +	if (!group->vid)
> +		return 0;
> +
> +	pmctx = br_multicast_port_vid_to_port_ctx(port, group->vid);
> +	if (!pmctx)
> +		return 0;
> +
> +	err = br_multicast_port_ngroups_inc_one(pmctx, extack);
> +	if (err) {
> +		NL_SET_ERR_MSG_FMT_MOD(extack, "Port-VLAN is already in %u groups, and mcast_max_groups=%u",
> +				       pmctx->mdb_n_entries,
> +				       pmctx->mdb_max_entries);

READ_ONCE()

> +		trace_br_mdb_full(port->dev, group);
> +		goto dec_one_out;
> +	}
> +
> +	return 0;
> +
> +dec_one_out:
> +	br_multicast_port_ngroups_dec_one(&port->multicast_ctx);
> +	return err;
> +}
> +
> +static void br_multicast_port_ngroups_dec(struct net_bridge_port *port, u16 vid)
> +{
> +	struct net_bridge_mcast_port *pmctx;
> +
> +	lockdep_assert_held_once(&port->br->multicast_lock);
> +
> +	if (vid) {
> +		pmctx = br_multicast_port_vid_to_port_ctx(port, vid);
> +		if (pmctx)
> +			br_multicast_port_ngroups_dec_one(pmctx);
> +	}
> +	br_multicast_port_ngroups_dec_one(&port->multicast_ctx);
> +}
> +
>  static void br_multicast_destroy_port_group(struct net_bridge_mcast_gc *gc)
>  {
>  	struct net_bridge_port_group *pg;
> @@ -702,6 +802,7 @@ void br_multicast_del_pg(struct net_bridge_mdb_entry *mp,
>  	} else {
>  		br_multicast_star_g_handle_mode(pg, MCAST_INCLUDE);
>  	}
> +	br_multicast_port_ngroups_dec(pg->key.port, pg->key.addr.vid);
>  	hlist_add_head(&pg->mcast_gc.gc_node, &br->mcast_gc_list);
>  	queue_work(system_long_wq, &br->mcast_gc_work);
>  
> @@ -1165,6 +1266,7 @@ struct net_bridge_mdb_entry *br_multicast_new_group(struct net_bridge *br,
>  		return mp;
>  
>  	if (atomic_read(&br->mdb_hash_tbl.nelems) >= br->hash_max) {
> +		trace_br_mdb_full(br->dev, group);
>  		br_mc_disabled_update(br->dev, false, NULL);
>  		br_opt_toggle(br, BROPT_MULTICAST_ENABLED, false);
>  		return ERR_PTR(-E2BIG);
> @@ -1288,11 +1390,16 @@ struct net_bridge_port_group *br_multicast_new_port_group(
>  			struct netlink_ext_ack *extack)
>  {
>  	struct net_bridge_port_group *p;
> +	int err;
> +
> +	err = br_multicast_port_ngroups_inc(port, group, extack);
> +	if (err)
> +		return NULL;
>  
>  	p = kzalloc(sizeof(*p), GFP_ATOMIC);
>  	if (unlikely(!p)) {
>  		NL_SET_ERR_MSG_MOD(extack, "Couldn't allocate new port group");
> -		return NULL;
> +		goto dec_out;
>  	}
>  
>  	p->key.addr = *group;
> @@ -1326,18 +1433,22 @@ struct net_bridge_port_group *br_multicast_new_port_group(
>  
>  free_out:
>  	kfree(p);
> +dec_out:
> +	br_multicast_port_ngroups_dec(port, group->vid);
>  	return NULL;
>  }
>  
>  void br_multicast_del_port_group(struct net_bridge_port_group *p)
>  {
>  	struct net_bridge_port *port = p->key.port;
> +	__u16 vid = p->key.addr.vid;
>  
>  	hlist_del_init(&p->mglist);
>  	if (!br_multicast_is_star_g(&p->key.addr))
>  		rhashtable_remove_fast(&port->br->sg_port_tbl, &p->rhnode,
>  				       br_sg_port_rht_params);
>  	kfree(p);
> +	br_multicast_port_ngroups_dec(port, vid);
>  }
>  
>  void br_multicast_host_join(const struct net_bridge_mcast *brmctx,
> @@ -1951,6 +2062,25 @@ static void __br_multicast_enable_port_ctx(struct net_bridge_mcast_port *pmctx)
>  		br_ip4_multicast_add_router(brmctx, pmctx);
>  		br_ip6_multicast_add_router(brmctx, pmctx);
>  	}
> +
> +	if (br_multicast_port_ctx_is_vlan(pmctx)) {
> +		struct net_bridge_port_group *pg;
> +		u32 n = 0;
> +
> +		/* The mcast_n_groups counter might be wrong. First,
> +		 * BR_VLFLAG_MCAST_ENABLED is toggled before temporary entries
> +		 * are flushed, thus mcast_n_groups after the toggle does not
> +		 * reflect the true values. And second, permanent entries added
> +		 * while BR_VLFLAG_MCAST_ENABLED was disabled, are not reflected
> +		 * either. Thus we have to refresh the counter.
> +		 */
> +
> +		hlist_for_each_entry(pg, &pmctx->port->mglist, mglist) {
> +			if (pg->key.addr.vid == pmctx->vlan->vid)
> +				n++;
> +		}
> +		pmctx->mdb_n_entries = n;

WRITE_ONCE()

> +	}
>  }
>  
>  void br_multicast_enable_port(struct net_bridge_port *port)
> diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
> index e4069e27b5c6..49f411a0a1f1 100644
> --- a/net/bridge/br_private.h
> +++ b/net/bridge/br_private.h
> @@ -126,6 +126,8 @@ struct net_bridge_mcast_port {
>  	struct hlist_node		ip6_rlist;
>  #endif /* IS_ENABLED(CONFIG_IPV6) */
>  	unsigned char			multicast_router;
> +	u32				mdb_n_entries;
> +	u32				mdb_max_entries;
>  #endif /* CONFIG_BRIDGE_IGMP_SNOOPING */
>  };
>  




[Index of Archives]     [Netdev]     [AoE Tools]     [Linux Wireless]     [Kernel Newbies]     [Security]     [Linux for Hams]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux RAID]     [Linux Admin]     [Samba]     [Video 4 Linux]

  Powered by Linux