On Fri, Apr 26, 2019 at 09:58:23PM +0000, Saeed Mahameed wrote: > From: Maor Gottlieb <maorg@xxxxxxxxxxxx> > > When in switchdev mode, we would like to treat loopback RoCE > traffic (on eswitch manager) as RDMA and not as regular > Ethernet traffic > In order to enable it we add flow steering rule that forward RoCE > loopback traffic to the HW RoCE filter (by adding allow rule). > In addition we add RoCE address in GID index 0, which will be > set in the RoCE loopback packet. > > Signed-off-by: Maor Gottlieb <maorg@xxxxxxxxxxxx> > Reviewed-by: Mark Bloch <markb@xxxxxxxxxxxx> > Signed-off-by: Saeed Mahameed <saeedm@xxxxxxxxxxxx> > --- > .../net/ethernet/mellanox/mlx5/core/Makefile | 2 +- > .../mellanox/mlx5/core/eswitch_offloads.c | 9 + > .../net/ethernet/mellanox/mlx5/core/rdma.c | 181 ++++++++++++++++++ > .../net/ethernet/mellanox/mlx5/core/rdma.h | 20 ++ > include/linux/mlx5/driver.h | 7 + > 5 files changed, 218 insertions(+), 1 deletion(-) > create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/rdma.c > create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/rdma.h > > diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile > index 1a16f6d73cbc..5f0be9b36a04 100644 > --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile > +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile > @@ -35,7 +35,7 @@ mlx5_core-$(CONFIG_MLX5_ESWITCH) += en_rep.o en_tc.o en/tc_tun.o lib/port_tu > # > # Core extra > # > -mlx5_core-$(CONFIG_MLX5_ESWITCH) += eswitch.o eswitch_offloads.o ecpf.o > +mlx5_core-$(CONFIG_MLX5_ESWITCH) += eswitch.o eswitch_offloads.o ecpf.o rdma.o > mlx5_core-$(CONFIG_MLX5_MPFS) += lib/mpfs.o > mlx5_core-$(CONFIG_VXLAN) += lib/vxlan.o > mlx5_core-$(CONFIG_PTP_1588_CLOCK) += lib/clock.o > diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c > index 6c8a17ca236e..4b48bb98981e 100644 > --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c > +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c > @@ -37,6 +37,7 @@ > #include <linux/mlx5/fs.h> > #include "mlx5_core.h" > #include "eswitch.h" > +#include "rdma.h" > #include "en.h" > #include "fs_core.h" > #include "lib/devcom.h" > @@ -1713,6 +1714,13 @@ int esw_offloads_init(struct mlx5_eswitch *esw, int vf_nvports, > esw->host_info.num_vfs = vf_nvports; > } > > + err = mlx5_rdma_enable_roce(esw->dev); > + if (err) { > + esw_debug(esw->dev, "Failed to enable RoCE, err: %d\n", > + err); You are already printing errors in all flows of mlx5_rdma_enable_roce(), there is no need in extra debug print. > + err = 0; If you are not interested in return value, better to declare function as void. > + } > + > return 0; > > err_reps: > @@ -1751,6 +1759,7 @@ void esw_offloads_cleanup(struct mlx5_eswitch *esw) > num_vfs = esw->dev->priv.sriov.num_vfs; > } > > + mlx5_rdma_disable_roce(esw->dev); > esw_offloads_devcom_cleanup(esw); > esw_offloads_unload_all_reps(esw, num_vfs); > esw_offloads_steering_cleanup(esw); > diff --git a/drivers/net/ethernet/mellanox/mlx5/core/rdma.c b/drivers/net/ethernet/mellanox/mlx5/core/rdma.c > new file mode 100644 > index 000000000000..f6c5e4f91aa8 > --- /dev/null > +++ b/drivers/net/ethernet/mellanox/mlx5/core/rdma.c > @@ -0,0 +1,181 @@ > +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB > +/* Copyright (c) 2019 Mellanox Technologies */ > + > +#include <linux/mlx5/vport.h> > +#include <rdma/ib_verbs.h> > +#include <net/addrconf.h> > + > +#include "lib/mlx5.h" > +#include "eswitch.h" > +#include "fs_core.h" > + > +void mlx5_rdma_disable_roce_steering(struct mlx5_core_dev *dev) > +{ > + struct mlx5_core_roce *roce = &dev->priv.roce; > + > + if (IS_ERR_OR_NULL(roce->ft)) roce->ft shouldn't be error, or NULL or proper pointer. > + return; > + > + mlx5_del_flow_rules(roce->allow_rule); > + mlx5_destroy_flow_group(roce->fg); > + mlx5_destroy_flow_table(roce->ft); > +} > + > +int mlx5_rdma_enable_roce_steering(struct mlx5_core_dev *dev) > +{ > + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); > + struct mlx5_core_roce *roce = &dev->priv.roce; > + struct mlx5_flow_handle *flow_rule = NULL; > + struct mlx5_flow_table_attr ft_attr = {}; > + struct mlx5_flow_namespace *ns = NULL; > + struct mlx5_flow_act flow_act = {0}; {0} -> {} > + struct mlx5_flow_spec *spec; > + struct mlx5_flow_table *ft; > + struct mlx5_flow_group *fg; > + void *match_criteria; > + u32 *flow_group_in; > + void *misc; > + int err; > + > + if (!(MLX5_CAP_FLOWTABLE_RDMA_RX(dev, ft_support) && > + MLX5_CAP_FLOWTABLE_RDMA_RX(dev, table_miss_action_domain))) > + return -EOPNOTSUPP; > + > + flow_group_in = kvzalloc(inlen, GFP_KERNEL); > + if (!flow_group_in) > + return -ENOMEM; > + spec = kvzalloc(sizeof(*spec), GFP_KERNEL); > + if (!spec) { > + kvfree(flow_group_in); > + return -ENOMEM; > + } > + > + ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_RDMA_RX); > + if (!ns) { > + mlx5_core_err(dev, "Failed to get RDMA RX namespace"); > + err = -EOPNOTSUPP; > + goto free; > + } > + > + ft_attr.max_fte = 1; > + ft = mlx5_create_flow_table(ns, &ft_attr); > + if (IS_ERR(ft)) { > + mlx5_core_err(dev, "Failed to create RDMA RX flow table"); > + err = PTR_ERR(ft); > + goto free; > + } > + > + MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, > + MLX5_MATCH_MISC_PARAMETERS); > + match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, > + match_criteria); > + MLX5_SET_TO_ONES(fte_match_param, match_criteria, > + misc_parameters.source_port); > + > + fg = mlx5_create_flow_group(ft, flow_group_in); > + if (IS_ERR(fg)) { > + err = PTR_ERR(fg); > + mlx5_core_err(dev, "Failed to create RDMA RX flow group err(%d)\n", err); > + goto destroy_flow_table; > + } > + > + spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS; > + misc = MLX5_ADDR_OF(fte_match_param, spec->match_value, > + misc_parameters); > + MLX5_SET(fte_match_set_misc, misc, source_port, > + dev->priv.eswitch->manager_vport); > + misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria, > + misc_parameters); > + MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port); > + > + flow_act.action = MLX5_FLOW_CONTEXT_ACTION_ALLOW; > + flow_rule = mlx5_add_flow_rules(ft, spec, &flow_act, NULL, 0); > + if (IS_ERR(flow_rule)) { > + err = PTR_ERR(flow_rule); > + mlx5_core_err(dev, "Failed to add RoCE allow rule, err=%d\n", > + err); > + goto destroy_flow_group; > + } > + > + kvfree(spec); > + kvfree(flow_group_in); > + roce->ft = ft; > + roce->fg = fg; > + roce->allow_rule = flow_rule; > + > + return 0; > + > +destroy_flow_table: > + mlx5_destroy_flow_table(ft); > +destroy_flow_group: > + mlx5_destroy_flow_group(fg); > +free: > + kvfree(spec); > + kvfree(flow_group_in); > + return err; > +} > + > +static void mlx5_rdma_del_roce_addr(struct mlx5_core_dev *dev) > +{ > + mlx5_core_roce_gid_set(dev, 0, 0, 0, > + NULL, NULL, false, 0, 0); > +} > + > +static void mlx5_rdma_make_default_gid(struct mlx5_core_dev *dev, union ib_gid *gid) > +{ > + u8 hw_id[ETH_ALEN]; > + > + mlx5_query_nic_vport_mac_address(dev, 0, hw_id); > + gid->global.subnet_prefix = cpu_to_be64(0xfe80000000000000LL); > + addrconf_addr_eui48(&gid->raw[8], hw_id); > +} > + > +static int mlx5_rdma_add_roce_addr(struct mlx5_core_dev *dev) > +{ > + union ib_gid gid; > + u8 mac[ETH_ALEN]; > + > + mlx5_rdma_make_default_gid(dev, &gid); > + return mlx5_core_roce_gid_set(dev, 0, > + MLX5_ROCE_VERSION_1, > + 0, gid.raw, mac, > + false, 0, 1); > +} > + > +void mlx5_rdma_disable_roce(struct mlx5_core_dev *dev) > +{ > + mlx5_rdma_disable_roce_steering(dev); > + mlx5_rdma_del_roce_addr(dev); > + mlx5_nic_vport_disable_roce(dev); > +} > + > +int mlx5_rdma_enable_roce(struct mlx5_core_dev *dev) > +{ > + int err; > + > + err = mlx5_nic_vport_enable_roce(dev); > + if (err) { > + mlx5_core_err(dev, "Failed to enable RoCE: %d\n", err); > + return err; > + } > + > + err = mlx5_rdma_add_roce_addr(dev); > + if (err) { > + mlx5_core_err(dev, "Failed to add RoCE address: %d\n", err); > + goto disable_roce; > + } > + > + err = mlx5_rdma_enable_roce_steering(dev); > + if (err) { > + mlx5_core_err(dev, "Failed to enable RoCE steering: %d\n", err); > + goto del_roce_addr; > + } > + > + return 0; > + > +del_roce_addr: > + mlx5_rdma_del_roce_addr(dev); > +disable_roce: > + mlx5_nic_vport_disable_roce(dev); > + return err; > +} > diff --git a/drivers/net/ethernet/mellanox/mlx5/core/rdma.h b/drivers/net/ethernet/mellanox/mlx5/core/rdma.h > new file mode 100644 > index 000000000000..3d9e76c3d42f > --- /dev/null > +++ b/drivers/net/ethernet/mellanox/mlx5/core/rdma.h > @@ -0,0 +1,20 @@ > +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ > +/* Copyright (c) 2019 Mellanox Technologies. */ > + > +#ifndef __MLX5_RDMA_H__ > +#define __MLX5_RDMA_H__ > + > +#include "mlx5_core.h" > + > +#ifdef CONFIG_MLX5_ESWITCH > + > +int mlx5_rdma_enable_roce(struct mlx5_core_dev *dev); > +void mlx5_rdma_disable_roce(struct mlx5_core_dev *dev); > + > +#else /* CONFIG_MLX5_ESWITCH */ > + > +static inline int mlx5_rdma_enable_roce(struct mlx5_core_dev *dev) { return 0; } > +static inline void mlx5_rdma_disable_roce(struct mlx5_core_dev *dev) {} > + > +#endif /* CONFIG_MLX5_ESWITCH */ > +#endif /* __MLX5_RDMA_H__ */ > diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h > index 582a9680b182..7fa95270dd59 100644 > --- a/include/linux/mlx5/driver.h > +++ b/include/linux/mlx5/driver.h > @@ -512,6 +512,12 @@ struct mlx5_rl_table { > struct mlx5_rl_entry *rl_entry; > }; > > +struct mlx5_core_roce { > + struct mlx5_flow_table *ft; > + struct mlx5_flow_group *fg; > + struct mlx5_flow_handle *allow_rule; > +}; > + > struct mlx5_priv { > struct mlx5_eq_table *eq_table; > > @@ -565,6 +571,7 @@ struct mlx5_priv { > struct mlx5_lag *lag; > struct mlx5_devcom *devcom; > unsigned long pci_dev_data; > + struct mlx5_core_roce roce; > struct mlx5_fc_stats fc_stats; > struct mlx5_rl_table rl_table; > > -- > 2.20.1 >