RE: [PATCH mlx5-next 05/15] net/mlx5: E-Switch, Tag packet with vport number in VF vports and uplink ingress ACLs

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 




> -----Original Message-----
> From: netdev-owner@xxxxxxxxxxxxxxx <netdev-owner@xxxxxxxxxxxxxxx> On
> Behalf Of Saeed Mahameed
> Sent: Tuesday, June 18, 2019 12:53 AM
> To: Saeed Mahameed <saeedm@xxxxxxxxxxxx>; Leon Romanovsky
> <leonro@xxxxxxxxxxxx>
> Cc: netdev@xxxxxxxxxxxxxxx; linux-rdma@xxxxxxxxxxxxxxx; Jianbo Liu
> <jianbol@xxxxxxxxxxxx>; Eli Britstein <elibr@xxxxxxxxxxxx>; Roi Dayan
> <roid@xxxxxxxxxxxx>; Mark Bloch <markb@xxxxxxxxxxxx>
> Subject: [PATCH mlx5-next 05/15] net/mlx5: E-Switch, Tag packet with vport
> number in VF vports and uplink ingress ACLs
> 
> From: Jianbo Liu <jianbol@xxxxxxxxxxxx>
> 
> When a dual-port VHCA sends a RoCE packet on its non-native port, and the
> packet arrives to its affiliated vport FDB, a mismatch might occur on the rules
> that match the packet source vport as it is not represented by single VHCA only
> in this case. So we change to match on metadata instead of source vport.
> To do that, a rule is created in all vports and uplink ingress ACLs, to save the
> source vport number and vhca id in the packet's metadata in order to match on
> it later.
> The metadata register used is the first of the 32-bit type C registers. It can be
> used for matching and header modify operations. The higher 16 bits of this
> register are for vhca id, and the lower 16 ones is for vport number.
> This change is not for dual-port RoCE only. If HW and FW allow, the vport
> metadata matching is enabled by default.
> 
> Signed-off-by: Jianbo Liu <jianbol@xxxxxxxxxxxx>
> Reviewed-by: Eli Britstein <elibr@xxxxxxxxxxxx>
> Reviewed-by: Roi Dayan <roid@xxxxxxxxxxxx>
> Reviewed-by: Mark Bloch <markb@xxxxxxxxxxxx>
> Signed-off-by: Saeed Mahameed <saeedm@xxxxxxxxxxxx>
> ---
>  .../net/ethernet/mellanox/mlx5/core/eswitch.c |   2 +
>  .../net/ethernet/mellanox/mlx5/core/eswitch.h |   9 +
>  .../mellanox/mlx5/core/eswitch_offloads.c     | 183 ++++++++++++++----
>  include/linux/mlx5/eswitch.h                  |   3 +
>  4 files changed, 161 insertions(+), 36 deletions(-)
> 
> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
> b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
> index a42a23e505df..1235fd84ae3a 100644
> --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
> @@ -1168,6 +1168,8 @@ void esw_vport_cleanup_ingress_rules(struct
> mlx5_eswitch *esw,
> 
>  	vport->ingress.drop_rule = NULL;
>  	vport->ingress.allow_rule = NULL;
> +
> +	esw_vport_del_ingress_acl_modify_metadata(esw, vport);
>  }
> 
>  void esw_vport_disable_ingress_acl(struct mlx5_eswitch *esw, diff --git
> a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
> b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
> index 8b9f2cf58e91..4417a195832e 100644
> --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
> @@ -68,6 +68,8 @@ struct vport_ingress {
>  	struct mlx5_flow_group *allow_spoofchk_only_grp;
>  	struct mlx5_flow_group *allow_untagged_only_grp;
>  	struct mlx5_flow_group *drop_grp;
> +	int                      modify_metadata_id;
No need for random alignment. Just have one white space after int.

> +	struct mlx5_flow_handle  *modify_metadata_rule;
>  	struct mlx5_flow_handle  *allow_rule;
>  	struct mlx5_flow_handle  *drop_rule;
>  	struct mlx5_fc           *drop_counter;
> @@ -196,6 +198,10 @@ struct mlx5_esw_functions {
>  	u16			num_vfs;
>  };
> 
> +enum {
> +	MLX5_ESWITCH_VPORT_MATCH_METADATA = BIT(0), };
> +
>  struct mlx5_eswitch {
>  	struct mlx5_core_dev    *dev;
>  	struct mlx5_nb          nb;
> @@ -203,6 +209,7 @@ struct mlx5_eswitch {
>  	struct hlist_head       mc_table[MLX5_L2_ADDR_HASH_SIZE];
>  	struct workqueue_struct *work_queue;
>  	struct mlx5_vport       *vports;
> +	u32                     flags;
Same as above, no need for extra aligment.

>  	int                     total_vports;
>  	int                     enabled_vports;
>  	/* Synchronize between vport change events @@ -240,6 +247,8 @@
> void esw_vport_disable_egress_acl(struct mlx5_eswitch *esw,
>  				  struct mlx5_vport *vport);
>  void esw_vport_disable_ingress_acl(struct mlx5_eswitch *esw,
>  				   struct mlx5_vport *vport);
> +void esw_vport_del_ingress_acl_modify_metadata(struct mlx5_eswitch *esw,
> +					       struct mlx5_vport *vport);
> 
>  /* E-Switch API */
>  int mlx5_eswitch_init(struct mlx5_core_dev *dev); diff --git
> a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
> b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
> index 17abb98b48af..871ae44dc132 100644
> --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
> @@ -1555,32 +1555,16 @@ static void esw_offloads_devcom_cleanup(struct
> mlx5_eswitch *esw)  static int esw_vport_ingress_prio_tag_config(struct
> mlx5_eswitch *esw,
>  					     struct mlx5_vport *vport)
>  {
> -	struct mlx5_core_dev *dev = esw->dev;
>  	struct mlx5_flow_act flow_act = {0};
>  	struct mlx5_flow_spec *spec;
>  	int err = 0;
> 
>  	/* For prio tag mode, there is only 1 FTEs:
> -	 * 1) Untagged packets - push prio tag VLAN, allow
> +	 * 1) Untagged packets - push prio tag VLAN and modify metadata if
> +	 * required, allow
>  	 * Unmatched traffic is allowed by default
>  	 */
> 
> -	if (!MLX5_CAP_ESW_INGRESS_ACL(dev, ft_support))
> -		return -EOPNOTSUPP;
> -
> -	esw_vport_cleanup_ingress_rules(esw, vport);
> -
> -	err = esw_vport_enable_ingress_acl(esw, vport);
> -	if (err) {
> -		mlx5_core_warn(esw->dev,
> -			       "failed to enable prio tag ingress acl (%d) on
> vport[%d]\n",
> -			       err, vport->vport);
> -		return err;
> -	}
> -
> -	esw_debug(esw->dev,
> -		  "vport[%d] configure ingress rules\n", vport->vport);
> -
>  	spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
>  	if (!spec) {
>  		err = -ENOMEM;
> @@ -1596,6 +1580,12 @@ static int esw_vport_ingress_prio_tag_config(struct
> mlx5_eswitch *esw,
>  	flow_act.vlan[0].ethtype = ETH_P_8021Q;
>  	flow_act.vlan[0].vid = 0;
>  	flow_act.vlan[0].prio = 0;
> +
> +	if (vport->ingress.modify_metadata_rule) {
> +		flow_act.action |=
> MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
> +		flow_act.modify_id = vport->ingress.modify_metadata_id;
> +	}
> +
>  	vport->ingress.allow_rule =
>  		mlx5_add_flow_rules(vport->ingress.acl, spec,
>  				    &flow_act, NULL, 0);
> @@ -1616,6 +1606,59 @@ static int esw_vport_ingress_prio_tag_config(struct
> mlx5_eswitch *esw,
>  	return err;
>  }
> 
> +static int esw_vport_add_ingress_acl_modify_metadata(struct mlx5_eswitch
> *esw,
> +						     struct mlx5_vport *vport)
> +{
> +	u8 action[MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto)] =
> {};
> +	struct mlx5_flow_act flow_act = {};
> +	struct mlx5_flow_spec spec = {};
> +	int err = 0;
> +
> +	MLX5_SET(set_action_in, action, action_type,
> MLX5_ACTION_TYPE_SET);
> +	MLX5_SET(set_action_in, action, field,
> MLX5_ACTION_IN_FIELD_METADATA_REG_C_0);
> +	MLX5_SET(set_action_in, action, data,
> +		 mlx5_eswitch_get_vport_metadata_for_match(esw, vport-
> >vport));
> +
> +	err = mlx5_modify_header_alloc(esw->dev,
> MLX5_FLOW_NAMESPACE_ESW_INGRESS,
> +				       1, action, &vport-
> >ingress.modify_metadata_id);
> +
> +	if (err) {
> +		esw_warn(esw->dev,
> +			 "failed to alloc modify header for vport %d ingress acl
> (%d)\n",
> +			 vport->vport, err);
> +		return err;
> +	}
> +
> +	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_MOD_HDR |
> MLX5_FLOW_CONTEXT_ACTION_ALLOW;
> +	flow_act.modify_id = vport->ingress.modify_metadata_id;
> +	vport->ingress.modify_metadata_rule = mlx5_add_flow_rules(vport-
> >ingress.acl,
> +								  &spec,
> &flow_act, NULL, 0);
> +	if (IS_ERR(vport->ingress.modify_metadata_rule)) {
> +		err = PTR_ERR(vport->ingress.modify_metadata_rule);
> +		esw_warn(esw->dev,
> +			 "failed to add setting metadata rule for vport %d
> ingress acl, err(%d)\n",
> +			 vport->vport, err);
> +		vport->ingress.modify_metadata_rule = NULL;
> +		goto out;
> +	}
> +
> +out:
> +	if (err)
> +		mlx5_modify_header_dealloc(esw->dev, vport-
> >ingress.modify_metadata_id);
> +	return err;
> +}
> +
> +void esw_vport_del_ingress_acl_modify_metadata(struct mlx5_eswitch *esw,
> +					       struct mlx5_vport *vport)
> +{
> +	if (vport->ingress.modify_metadata_rule) {
> +		mlx5_del_flow_rules(vport->ingress.modify_metadata_rule);
> +		mlx5_modify_header_dealloc(esw->dev,
> +vport->ingress.modify_metadata_id);
> +
> +		vport->ingress.modify_metadata_rule = NULL;
> +	}
> +}
> +
>  static int esw_vport_egress_prio_tag_config(struct mlx5_eswitch *esw,
>  					    struct mlx5_vport *vport)
>  {
> @@ -1623,6 +1666,9 @@ static int esw_vport_egress_prio_tag_config(struct
> mlx5_eswitch *esw,
>  	struct mlx5_flow_spec *spec;
>  	int err = 0;
> 
> +	if (!MLX5_CAP_GEN(esw->dev, prio_tag_required))
> +		return 0;
> +
>  	/* For prio tag mode, there is only 1 FTEs:
>  	 * 1) prio tag packets - pop the prio tag VLAN, allow
>  	 * Unmatched traffic is allowed by default @@ -1676,27 +1722,77 @@
> static int esw_vport_egress_prio_tag_config(struct mlx5_eswitch *esw,
>  	return err;
>  }
> 
> -static int esw_prio_tag_acls_config(struct mlx5_eswitch *esw, int nvports)
> +static int esw_vport_ingress_common_config(struct mlx5_eswitch *esw,
> +					   struct mlx5_vport *vport)
>  {
> -	struct mlx5_vport *vport = NULL;
> -	int i, j;
>  	int err;
> 
> -	mlx5_esw_for_each_vf_vport(esw, i, vport, nvports) {
> +	if (!mlx5_eswitch_vport_match_metadata_enabled(esw) &&
> +	    !MLX5_CAP_GEN(esw->dev, prio_tag_required))
> +		return 0;
> +
> +	esw_vport_cleanup_ingress_rules(esw, vport);
> +
> +	err = esw_vport_enable_ingress_acl(esw, vport);
> +	if (err) {
> +		esw_warn(esw->dev,
> +			 "failed to enable ingress acl (%d) on vport[%d]\n",
> +			 err, vport->vport);
> +		return err;
> +	}
> +
> +	esw_debug(esw->dev,
> +		  "vport[%d] configure ingress rules\n", vport->vport);
> +
> +	if (mlx5_eswitch_vport_match_metadata_enabled(esw)) {
> +		err = esw_vport_add_ingress_acl_modify_metadata(esw,
> vport);
> +		if (err)
> +			goto out;
> +	}
> +
> +	if (MLX5_CAP_GEN(esw->dev, prio_tag_required) &&
> +	    (vport->vport >= MLX5_VPORT_FIRST_VF &&
> +	     vport->vport <= esw->dev->priv.sriov.num_vfs)) {
>  		err = esw_vport_ingress_prio_tag_config(esw, vport);
>  		if (err)
> -			goto err_ingress;
> -		err = esw_vport_egress_prio_tag_config(esw, vport);
> +			goto out;
> +	}
> +
> +out:
> +	if (err)
> +		esw_vport_disable_ingress_acl(esw, vport);
> +	return err;
> +}
> +
> +static int esw_create_offloads_acl_tables(struct mlx5_eswitch *esw) {
> +	struct mlx5_vport *vport;
> +	int i, j;
> +	int err;
> +
> +	mlx5_esw_for_all_vports(esw, i, vport) {
> +		err = esw_vport_ingress_common_config(esw, vport);
>  		if (err)
> -			goto err_egress;
> +			goto err_ingress;
> +
> +		if (vport->vport >= MLX5_VPORT_FIRST_VF &&
> +		    vport->vport <= esw->dev->priv.sriov.num_vfs) {
Add an helper API mlx5_esw_is_vport(const struct mlx5_esw *esw, const struct mlx5_vport *vport) 
and use at two places in ingress and egress config.

> +			err = esw_vport_egress_prio_tag_config(esw, vport);
> +			if (err)
> +				goto err_egress;
> +		}
>  	}
> 
> +	if (mlx5_eswitch_vport_match_metadata_enabled(esw))
> +		esw_info(esw->dev, "Use metadata reg_c as source vport to
> match\n");
> +
>  	return 0;
> 
>  err_egress:
>  	esw_vport_disable_ingress_acl(esw, vport);
>  err_ingress:
> -	mlx5_esw_for_each_vf_vport_reverse(esw, j, vport, i - 1) {
> +	for (j = MLX5_VPORT_PF; j < i; j++) {
Keep the reverse order as before.

> +		vport = &esw->vports[j];
>  		esw_vport_disable_egress_acl(esw, vport);
>  		esw_vport_disable_ingress_acl(esw, vport);
>  	}
> @@ -1704,15 +1800,17 @@ static int esw_prio_tag_acls_config(struct
> mlx5_eswitch *esw, int nvports)
>  	return err;
>  }
> 
> -static void esw_prio_tag_acls_cleanup(struct mlx5_eswitch *esw)
> +static void esw_destroy_offloads_acl_tables(struct mlx5_eswitch *esw)
>  {
>  	struct mlx5_vport *vport;
>  	int i;
> 
> -	mlx5_esw_for_each_vf_vport(esw, i, vport, esw->nvports) {
> +	mlx5_esw_for_all_vports(esw, i, vport) {
If you are changing this, please do in reverse order to keep it exact mirror of create/enable sequence.

>  		esw_vport_disable_egress_acl(esw, vport);
>  		esw_vport_disable_ingress_acl(esw, vport);
>  	}
> +
> +	esw->flags &= ~MLX5_ESWITCH_VPORT_MATCH_METADATA;
>  }
> 
>  static int esw_offloads_steering_init(struct mlx5_eswitch *esw, int nvports)
> @@ -1722,15 +1820,13 @@ static int esw_offloads_steering_init(struct
> mlx5_eswitch *esw, int nvports)
>  	memset(&esw->fdb_table.offloads, 0, sizeof(struct offloads_fdb));
>  	mutex_init(&esw->fdb_table.offloads.fdb_prio_lock);
> 
> -	if (MLX5_CAP_GEN(esw->dev, prio_tag_required)) {
> -		err = esw_prio_tag_acls_config(esw, nvports);
> -		if (err)
> -			return err;
> -	}
> +	err = esw_create_offloads_acl_tables(esw);
> +	if (err)
> +		return err;
> 
>  	err = esw_create_offloads_fdb_tables(esw, nvports);
>  	if (err)
> -		return err;
> +		goto create_fdb_err;
> 
>  	err = esw_create_offloads_table(esw, nvports);
>  	if (err)
> @@ -1748,6 +1844,9 @@ static int esw_offloads_steering_init(struct
> mlx5_eswitch *esw, int nvports)
>  create_ft_err:
>  	esw_destroy_offloads_fdb_tables(esw);
> 
> +create_fdb_err:
> +	esw_destroy_offloads_acl_tables(esw);
> +
>  	return err;
>  }
> 
> @@ -1756,8 +1855,7 @@ static void esw_offloads_steering_cleanup(struct
> mlx5_eswitch *esw)
>  	esw_destroy_vport_rx_group(esw);
>  	esw_destroy_offloads_table(esw);
>  	esw_destroy_offloads_fdb_tables(esw);
> -	if (MLX5_CAP_GEN(esw->dev, prio_tag_required))
> -		esw_prio_tag_acls_cleanup(esw);
> +	esw_destroy_offloads_acl_tables(esw);
>  }
> 
>  static void esw_functions_changed_event_handler(struct work_struct *work)
> @@ -2290,3 +2388,16 @@ struct mlx5_eswitch_rep
> *mlx5_eswitch_vport_rep(struct mlx5_eswitch *esw,
>  	return mlx5_eswitch_get_rep(esw, vport);  }
> EXPORT_SYMBOL(mlx5_eswitch_vport_rep);
> +
> +u32 mlx5_eswitch_vport_match_metadata_enabled(struct mlx5_eswitch
> *esw)
> +{
> +	return esw->flags & MLX5_ESWITCH_VPORT_MATCH_METADATA;
> +}
> +EXPORT_SYMBOL(mlx5_eswitch_vport_match_metadata_enabled);
> +
Return type should book and const *esw.

> +u32 mlx5_eswitch_get_vport_metadata_for_match(struct mlx5_eswitch *esw,
> +					      u16 vport)
> +{
> +	return ((MLX5_CAP_GEN(esw->dev, vhca_id) & 0xffff) << 16) | vport; }
> +EXPORT_SYMBOL(mlx5_eswitch_get_vport_metadata_for_match);
This one too.

> diff --git a/include/linux/mlx5/eswitch.h b/include/linux/mlx5/eswitch.h index
> 174eec0871d9..d729f5e4d70a 100644
> --- a/include/linux/mlx5/eswitch.h
> +++ b/include/linux/mlx5/eswitch.h
> @@ -64,6 +64,9 @@ struct mlx5_flow_handle *
> mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw,
>  				    int vport, u32 sqn);
> 
> +u32 mlx5_eswitch_vport_match_metadata_enabled(struct mlx5_eswitch
> +*esw);
> +u32 mlx5_eswitch_get_vport_metadata_for_match(struct mlx5_eswitch *esw,
> +u16 vport);
> +
>  #ifdef CONFIG_MLX5_ESWITCH
>  enum devlink_eswitch_encap_mode
>  mlx5_eswitch_get_encap_mode(const struct mlx5_core_dev *dev);
> --
> 2.21.0





[Index of Archives]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Photo]     [Yosemite News]     [Yosemite Photos]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux