> -----Original Message----- > From: netdev-owner@xxxxxxxxxxxxxxx <netdev-owner@xxxxxxxxxxxxxxx> On > Behalf Of Saeed Mahameed > Sent: Tuesday, June 18, 2019 12:53 AM > To: Saeed Mahameed <saeedm@xxxxxxxxxxxx>; Leon Romanovsky > <leonro@xxxxxxxxxxxx> > Cc: netdev@xxxxxxxxxxxxxxx; linux-rdma@xxxxxxxxxxxxxxx; Jianbo Liu > <jianbol@xxxxxxxxxxxx>; Eli Britstein <elibr@xxxxxxxxxxxx>; Roi Dayan > <roid@xxxxxxxxxxxx>; Mark Bloch <markb@xxxxxxxxxxxx> > Subject: [PATCH mlx5-next 05/15] net/mlx5: E-Switch, Tag packet with vport > number in VF vports and uplink ingress ACLs > > From: Jianbo Liu <jianbol@xxxxxxxxxxxx> > > When a dual-port VHCA sends a RoCE packet on its non-native port, and the > packet arrives to its affiliated vport FDB, a mismatch might occur on the rules > that match the packet source vport as it is not represented by single VHCA only > in this case. So we change to match on metadata instead of source vport. > To do that, a rule is created in all vports and uplink ingress ACLs, to save the > source vport number and vhca id in the packet's metadata in order to match on > it later. > The metadata register used is the first of the 32-bit type C registers. It can be > used for matching and header modify operations. The higher 16 bits of this > register are for vhca id, and the lower 16 ones is for vport number. > This change is not for dual-port RoCE only. If HW and FW allow, the vport > metadata matching is enabled by default. > > Signed-off-by: Jianbo Liu <jianbol@xxxxxxxxxxxx> > Reviewed-by: Eli Britstein <elibr@xxxxxxxxxxxx> > Reviewed-by: Roi Dayan <roid@xxxxxxxxxxxx> > Reviewed-by: Mark Bloch <markb@xxxxxxxxxxxx> > Signed-off-by: Saeed Mahameed <saeedm@xxxxxxxxxxxx> > --- > .../net/ethernet/mellanox/mlx5/core/eswitch.c | 2 + > .../net/ethernet/mellanox/mlx5/core/eswitch.h | 9 + > .../mellanox/mlx5/core/eswitch_offloads.c | 183 ++++++++++++++---- > include/linux/mlx5/eswitch.h | 3 + > 4 files changed, 161 insertions(+), 36 deletions(-) > > diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c > b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c > index a42a23e505df..1235fd84ae3a 100644 > --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c > +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c > @@ -1168,6 +1168,8 @@ void esw_vport_cleanup_ingress_rules(struct > mlx5_eswitch *esw, > > vport->ingress.drop_rule = NULL; > vport->ingress.allow_rule = NULL; > + > + esw_vport_del_ingress_acl_modify_metadata(esw, vport); > } > > void esw_vport_disable_ingress_acl(struct mlx5_eswitch *esw, diff --git > a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h > b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h > index 8b9f2cf58e91..4417a195832e 100644 > --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h > +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h > @@ -68,6 +68,8 @@ struct vport_ingress { > struct mlx5_flow_group *allow_spoofchk_only_grp; > struct mlx5_flow_group *allow_untagged_only_grp; > struct mlx5_flow_group *drop_grp; > + int modify_metadata_id; No need for random alignment. Just have one white space after int. > + struct mlx5_flow_handle *modify_metadata_rule; > struct mlx5_flow_handle *allow_rule; > struct mlx5_flow_handle *drop_rule; > struct mlx5_fc *drop_counter; > @@ -196,6 +198,10 @@ struct mlx5_esw_functions { > u16 num_vfs; > }; > > +enum { > + MLX5_ESWITCH_VPORT_MATCH_METADATA = BIT(0), }; > + > struct mlx5_eswitch { > struct mlx5_core_dev *dev; > struct mlx5_nb nb; > @@ -203,6 +209,7 @@ struct mlx5_eswitch { > struct hlist_head mc_table[MLX5_L2_ADDR_HASH_SIZE]; > struct workqueue_struct *work_queue; > struct mlx5_vport *vports; > + u32 flags; Same as above, no need for extra aligment. > int total_vports; > int enabled_vports; > /* Synchronize between vport change events @@ -240,6 +247,8 @@ > void esw_vport_disable_egress_acl(struct mlx5_eswitch *esw, > struct mlx5_vport *vport); > void esw_vport_disable_ingress_acl(struct mlx5_eswitch *esw, > struct mlx5_vport *vport); > +void esw_vport_del_ingress_acl_modify_metadata(struct mlx5_eswitch *esw, > + struct mlx5_vport *vport); > > /* E-Switch API */ > int mlx5_eswitch_init(struct mlx5_core_dev *dev); diff --git > a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c > b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c > index 17abb98b48af..871ae44dc132 100644 > --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c > +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c > @@ -1555,32 +1555,16 @@ static void esw_offloads_devcom_cleanup(struct > mlx5_eswitch *esw) static int esw_vport_ingress_prio_tag_config(struct > mlx5_eswitch *esw, > struct mlx5_vport *vport) > { > - struct mlx5_core_dev *dev = esw->dev; > struct mlx5_flow_act flow_act = {0}; > struct mlx5_flow_spec *spec; > int err = 0; > > /* For prio tag mode, there is only 1 FTEs: > - * 1) Untagged packets - push prio tag VLAN, allow > + * 1) Untagged packets - push prio tag VLAN and modify metadata if > + * required, allow > * Unmatched traffic is allowed by default > */ > > - if (!MLX5_CAP_ESW_INGRESS_ACL(dev, ft_support)) > - return -EOPNOTSUPP; > - > - esw_vport_cleanup_ingress_rules(esw, vport); > - > - err = esw_vport_enable_ingress_acl(esw, vport); > - if (err) { > - mlx5_core_warn(esw->dev, > - "failed to enable prio tag ingress acl (%d) on > vport[%d]\n", > - err, vport->vport); > - return err; > - } > - > - esw_debug(esw->dev, > - "vport[%d] configure ingress rules\n", vport->vport); > - > spec = kvzalloc(sizeof(*spec), GFP_KERNEL); > if (!spec) { > err = -ENOMEM; > @@ -1596,6 +1580,12 @@ static int esw_vport_ingress_prio_tag_config(struct > mlx5_eswitch *esw, > flow_act.vlan[0].ethtype = ETH_P_8021Q; > flow_act.vlan[0].vid = 0; > flow_act.vlan[0].prio = 0; > + > + if (vport->ingress.modify_metadata_rule) { > + flow_act.action |= > MLX5_FLOW_CONTEXT_ACTION_MOD_HDR; > + flow_act.modify_id = vport->ingress.modify_metadata_id; > + } > + > vport->ingress.allow_rule = > mlx5_add_flow_rules(vport->ingress.acl, spec, > &flow_act, NULL, 0); > @@ -1616,6 +1606,59 @@ static int esw_vport_ingress_prio_tag_config(struct > mlx5_eswitch *esw, > return err; > } > > +static int esw_vport_add_ingress_acl_modify_metadata(struct mlx5_eswitch > *esw, > + struct mlx5_vport *vport) > +{ > + u8 action[MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto)] = > {}; > + struct mlx5_flow_act flow_act = {}; > + struct mlx5_flow_spec spec = {}; > + int err = 0; > + > + MLX5_SET(set_action_in, action, action_type, > MLX5_ACTION_TYPE_SET); > + MLX5_SET(set_action_in, action, field, > MLX5_ACTION_IN_FIELD_METADATA_REG_C_0); > + MLX5_SET(set_action_in, action, data, > + mlx5_eswitch_get_vport_metadata_for_match(esw, vport- > >vport)); > + > + err = mlx5_modify_header_alloc(esw->dev, > MLX5_FLOW_NAMESPACE_ESW_INGRESS, > + 1, action, &vport- > >ingress.modify_metadata_id); > + > + if (err) { > + esw_warn(esw->dev, > + "failed to alloc modify header for vport %d ingress acl > (%d)\n", > + vport->vport, err); > + return err; > + } > + > + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_MOD_HDR | > MLX5_FLOW_CONTEXT_ACTION_ALLOW; > + flow_act.modify_id = vport->ingress.modify_metadata_id; > + vport->ingress.modify_metadata_rule = mlx5_add_flow_rules(vport- > >ingress.acl, > + &spec, > &flow_act, NULL, 0); > + if (IS_ERR(vport->ingress.modify_metadata_rule)) { > + err = PTR_ERR(vport->ingress.modify_metadata_rule); > + esw_warn(esw->dev, > + "failed to add setting metadata rule for vport %d > ingress acl, err(%d)\n", > + vport->vport, err); > + vport->ingress.modify_metadata_rule = NULL; > + goto out; > + } > + > +out: > + if (err) > + mlx5_modify_header_dealloc(esw->dev, vport- > >ingress.modify_metadata_id); > + return err; > +} > + > +void esw_vport_del_ingress_acl_modify_metadata(struct mlx5_eswitch *esw, > + struct mlx5_vport *vport) > +{ > + if (vport->ingress.modify_metadata_rule) { > + mlx5_del_flow_rules(vport->ingress.modify_metadata_rule); > + mlx5_modify_header_dealloc(esw->dev, > +vport->ingress.modify_metadata_id); > + > + vport->ingress.modify_metadata_rule = NULL; > + } > +} > + > static int esw_vport_egress_prio_tag_config(struct mlx5_eswitch *esw, > struct mlx5_vport *vport) > { > @@ -1623,6 +1666,9 @@ static int esw_vport_egress_prio_tag_config(struct > mlx5_eswitch *esw, > struct mlx5_flow_spec *spec; > int err = 0; > > + if (!MLX5_CAP_GEN(esw->dev, prio_tag_required)) > + return 0; > + > /* For prio tag mode, there is only 1 FTEs: > * 1) prio tag packets - pop the prio tag VLAN, allow > * Unmatched traffic is allowed by default @@ -1676,27 +1722,77 @@ > static int esw_vport_egress_prio_tag_config(struct mlx5_eswitch *esw, > return err; > } > > -static int esw_prio_tag_acls_config(struct mlx5_eswitch *esw, int nvports) > +static int esw_vport_ingress_common_config(struct mlx5_eswitch *esw, > + struct mlx5_vport *vport) > { > - struct mlx5_vport *vport = NULL; > - int i, j; > int err; > > - mlx5_esw_for_each_vf_vport(esw, i, vport, nvports) { > + if (!mlx5_eswitch_vport_match_metadata_enabled(esw) && > + !MLX5_CAP_GEN(esw->dev, prio_tag_required)) > + return 0; > + > + esw_vport_cleanup_ingress_rules(esw, vport); > + > + err = esw_vport_enable_ingress_acl(esw, vport); > + if (err) { > + esw_warn(esw->dev, > + "failed to enable ingress acl (%d) on vport[%d]\n", > + err, vport->vport); > + return err; > + } > + > + esw_debug(esw->dev, > + "vport[%d] configure ingress rules\n", vport->vport); > + > + if (mlx5_eswitch_vport_match_metadata_enabled(esw)) { > + err = esw_vport_add_ingress_acl_modify_metadata(esw, > vport); > + if (err) > + goto out; > + } > + > + if (MLX5_CAP_GEN(esw->dev, prio_tag_required) && > + (vport->vport >= MLX5_VPORT_FIRST_VF && > + vport->vport <= esw->dev->priv.sriov.num_vfs)) { > err = esw_vport_ingress_prio_tag_config(esw, vport); > if (err) > - goto err_ingress; > - err = esw_vport_egress_prio_tag_config(esw, vport); > + goto out; > + } > + > +out: > + if (err) > + esw_vport_disable_ingress_acl(esw, vport); > + return err; > +} > + > +static int esw_create_offloads_acl_tables(struct mlx5_eswitch *esw) { > + struct mlx5_vport *vport; > + int i, j; > + int err; > + > + mlx5_esw_for_all_vports(esw, i, vport) { > + err = esw_vport_ingress_common_config(esw, vport); > if (err) > - goto err_egress; > + goto err_ingress; > + > + if (vport->vport >= MLX5_VPORT_FIRST_VF && > + vport->vport <= esw->dev->priv.sriov.num_vfs) { Add an helper API mlx5_esw_is_vport(const struct mlx5_esw *esw, const struct mlx5_vport *vport) and use at two places in ingress and egress config. > + err = esw_vport_egress_prio_tag_config(esw, vport); > + if (err) > + goto err_egress; > + } > } > > + if (mlx5_eswitch_vport_match_metadata_enabled(esw)) > + esw_info(esw->dev, "Use metadata reg_c as source vport to > match\n"); > + > return 0; > > err_egress: > esw_vport_disable_ingress_acl(esw, vport); > err_ingress: > - mlx5_esw_for_each_vf_vport_reverse(esw, j, vport, i - 1) { > + for (j = MLX5_VPORT_PF; j < i; j++) { Keep the reverse order as before. > + vport = &esw->vports[j]; > esw_vport_disable_egress_acl(esw, vport); > esw_vport_disable_ingress_acl(esw, vport); > } > @@ -1704,15 +1800,17 @@ static int esw_prio_tag_acls_config(struct > mlx5_eswitch *esw, int nvports) > return err; > } > > -static void esw_prio_tag_acls_cleanup(struct mlx5_eswitch *esw) > +static void esw_destroy_offloads_acl_tables(struct mlx5_eswitch *esw) > { > struct mlx5_vport *vport; > int i; > > - mlx5_esw_for_each_vf_vport(esw, i, vport, esw->nvports) { > + mlx5_esw_for_all_vports(esw, i, vport) { If you are changing this, please do in reverse order to keep it exact mirror of create/enable sequence. > esw_vport_disable_egress_acl(esw, vport); > esw_vport_disable_ingress_acl(esw, vport); > } > + > + esw->flags &= ~MLX5_ESWITCH_VPORT_MATCH_METADATA; > } > > static int esw_offloads_steering_init(struct mlx5_eswitch *esw, int nvports) > @@ -1722,15 +1820,13 @@ static int esw_offloads_steering_init(struct > mlx5_eswitch *esw, int nvports) > memset(&esw->fdb_table.offloads, 0, sizeof(struct offloads_fdb)); > mutex_init(&esw->fdb_table.offloads.fdb_prio_lock); > > - if (MLX5_CAP_GEN(esw->dev, prio_tag_required)) { > - err = esw_prio_tag_acls_config(esw, nvports); > - if (err) > - return err; > - } > + err = esw_create_offloads_acl_tables(esw); > + if (err) > + return err; > > err = esw_create_offloads_fdb_tables(esw, nvports); > if (err) > - return err; > + goto create_fdb_err; > > err = esw_create_offloads_table(esw, nvports); > if (err) > @@ -1748,6 +1844,9 @@ static int esw_offloads_steering_init(struct > mlx5_eswitch *esw, int nvports) > create_ft_err: > esw_destroy_offloads_fdb_tables(esw); > > +create_fdb_err: > + esw_destroy_offloads_acl_tables(esw); > + > return err; > } > > @@ -1756,8 +1855,7 @@ static void esw_offloads_steering_cleanup(struct > mlx5_eswitch *esw) > esw_destroy_vport_rx_group(esw); > esw_destroy_offloads_table(esw); > esw_destroy_offloads_fdb_tables(esw); > - if (MLX5_CAP_GEN(esw->dev, prio_tag_required)) > - esw_prio_tag_acls_cleanup(esw); > + esw_destroy_offloads_acl_tables(esw); > } > > static void esw_functions_changed_event_handler(struct work_struct *work) > @@ -2290,3 +2388,16 @@ struct mlx5_eswitch_rep > *mlx5_eswitch_vport_rep(struct mlx5_eswitch *esw, > return mlx5_eswitch_get_rep(esw, vport); } > EXPORT_SYMBOL(mlx5_eswitch_vport_rep); > + > +u32 mlx5_eswitch_vport_match_metadata_enabled(struct mlx5_eswitch > *esw) > +{ > + return esw->flags & MLX5_ESWITCH_VPORT_MATCH_METADATA; > +} > +EXPORT_SYMBOL(mlx5_eswitch_vport_match_metadata_enabled); > + Return type should book and const *esw. > +u32 mlx5_eswitch_get_vport_metadata_for_match(struct mlx5_eswitch *esw, > + u16 vport) > +{ > + return ((MLX5_CAP_GEN(esw->dev, vhca_id) & 0xffff) << 16) | vport; } > +EXPORT_SYMBOL(mlx5_eswitch_get_vport_metadata_for_match); This one too. > diff --git a/include/linux/mlx5/eswitch.h b/include/linux/mlx5/eswitch.h index > 174eec0871d9..d729f5e4d70a 100644 > --- a/include/linux/mlx5/eswitch.h > +++ b/include/linux/mlx5/eswitch.h > @@ -64,6 +64,9 @@ struct mlx5_flow_handle * > mlx5_eswitch_add_send_to_vport_rule(struct mlx5_eswitch *esw, > int vport, u32 sqn); > > +u32 mlx5_eswitch_vport_match_metadata_enabled(struct mlx5_eswitch > +*esw); > +u32 mlx5_eswitch_get_vport_metadata_for_match(struct mlx5_eswitch *esw, > +u16 vport); > + > #ifdef CONFIG_MLX5_ESWITCH > enum devlink_eswitch_encap_mode > mlx5_eswitch_get_encap_mode(const struct mlx5_core_dev *dev); > -- > 2.21.0