Re: [PATCH mlx5-next 09/11] net/mlx5: Eswitch, enable RoCE loopback traffic

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Fri, Apr 26, 2019 at 09:58:23PM +0000, Saeed Mahameed wrote:
> From: Maor Gottlieb <maorg@xxxxxxxxxxxx>
>
> When in switchdev mode, we would like to treat loopback RoCE
> traffic (on eswitch manager) as RDMA and not as regular
> Ethernet traffic
> In order to enable it we add flow steering rule that forward RoCE
> loopback traffic to the HW RoCE filter (by adding allow rule).
> In addition we add RoCE address in GID index 0, which will be
> set in the RoCE loopback packet.
>
> Signed-off-by: Maor Gottlieb <maorg@xxxxxxxxxxxx>
> Reviewed-by: Mark Bloch <markb@xxxxxxxxxxxx>
> Signed-off-by: Saeed Mahameed <saeedm@xxxxxxxxxxxx>
> ---
>  .../net/ethernet/mellanox/mlx5/core/Makefile  |   2 +-
>  .../mellanox/mlx5/core/eswitch_offloads.c     |   9 +
>  .../net/ethernet/mellanox/mlx5/core/rdma.c    | 181 ++++++++++++++++++
>  .../net/ethernet/mellanox/mlx5/core/rdma.h    |  20 ++
>  include/linux/mlx5/driver.h                   |   7 +
>  5 files changed, 218 insertions(+), 1 deletion(-)
>  create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/rdma.c
>  create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/rdma.h
>
> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
> index 1a16f6d73cbc..5f0be9b36a04 100644
> --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
> @@ -35,7 +35,7 @@ mlx5_core-$(CONFIG_MLX5_ESWITCH)     += en_rep.o en_tc.o en/tc_tun.o lib/port_tu
>  #
>  # Core extra
>  #
> -mlx5_core-$(CONFIG_MLX5_ESWITCH)   += eswitch.o eswitch_offloads.o ecpf.o
> +mlx5_core-$(CONFIG_MLX5_ESWITCH)   += eswitch.o eswitch_offloads.o ecpf.o rdma.o
>  mlx5_core-$(CONFIG_MLX5_MPFS)      += lib/mpfs.o
>  mlx5_core-$(CONFIG_VXLAN)          += lib/vxlan.o
>  mlx5_core-$(CONFIG_PTP_1588_CLOCK) += lib/clock.o
> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
> index 6c8a17ca236e..4b48bb98981e 100644
> --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
> @@ -37,6 +37,7 @@
>  #include <linux/mlx5/fs.h>
>  #include "mlx5_core.h"
>  #include "eswitch.h"
> +#include "rdma.h"
>  #include "en.h"
>  #include "fs_core.h"
>  #include "lib/devcom.h"
> @@ -1713,6 +1714,13 @@ int esw_offloads_init(struct mlx5_eswitch *esw, int vf_nvports,
>  		esw->host_info.num_vfs = vf_nvports;
>  	}
>
> +	err = mlx5_rdma_enable_roce(esw->dev);
> +	if (err) {
> +		esw_debug(esw->dev, "Failed to enable RoCE, err: %d\n",
> +			  err);

You are already printing errors in all flows of mlx5_rdma_enable_roce(),
there is no need in extra debug print.

> +		err = 0;

If you are not interested in return value, better to declare function as void.

> +	}
> +
>  	return 0;
>
>  err_reps:
> @@ -1751,6 +1759,7 @@ void esw_offloads_cleanup(struct mlx5_eswitch *esw)
>  		num_vfs = esw->dev->priv.sriov.num_vfs;
>  	}
>
> +	mlx5_rdma_disable_roce(esw->dev);
>  	esw_offloads_devcom_cleanup(esw);
>  	esw_offloads_unload_all_reps(esw, num_vfs);
>  	esw_offloads_steering_cleanup(esw);
> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/rdma.c b/drivers/net/ethernet/mellanox/mlx5/core/rdma.c
> new file mode 100644
> index 000000000000..f6c5e4f91aa8
> --- /dev/null
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/rdma.c
> @@ -0,0 +1,181 @@
> +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
> +/* Copyright (c) 2019 Mellanox Technologies */
> +
> +#include <linux/mlx5/vport.h>
> +#include <rdma/ib_verbs.h>
> +#include <net/addrconf.h>
> +
> +#include "lib/mlx5.h"
> +#include "eswitch.h"
> +#include "fs_core.h"
> +
> +void mlx5_rdma_disable_roce_steering(struct mlx5_core_dev *dev)
> +{
> +	struct mlx5_core_roce *roce = &dev->priv.roce;
> +
> +	if (IS_ERR_OR_NULL(roce->ft))

roce->ft shouldn't be error, or NULL or proper pointer.

> +		return;
> +
> +	mlx5_del_flow_rules(roce->allow_rule);
> +	mlx5_destroy_flow_group(roce->fg);
> +	mlx5_destroy_flow_table(roce->ft);
> +}
> +
> +int mlx5_rdma_enable_roce_steering(struct mlx5_core_dev *dev)
> +{
> +	int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
> +	struct mlx5_core_roce *roce = &dev->priv.roce;
> +	struct mlx5_flow_handle *flow_rule = NULL;
> +	struct mlx5_flow_table_attr ft_attr = {};
> +	struct mlx5_flow_namespace *ns = NULL;
> +	struct mlx5_flow_act flow_act = {0};

{0} -> {}

> +	struct mlx5_flow_spec *spec;
> +	struct mlx5_flow_table *ft;
> +	struct mlx5_flow_group *fg;
> +	void *match_criteria;
> +	u32 *flow_group_in;
> +	void *misc;
> +	int err;
> +
> +	if (!(MLX5_CAP_FLOWTABLE_RDMA_RX(dev, ft_support) &&
> +	      MLX5_CAP_FLOWTABLE_RDMA_RX(dev, table_miss_action_domain)))
> +		return -EOPNOTSUPP;
> +
> +	flow_group_in = kvzalloc(inlen, GFP_KERNEL);
> +	if (!flow_group_in)
> +		return -ENOMEM;
> +	spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
> +	if (!spec) {
> +		kvfree(flow_group_in);
> +		return -ENOMEM;
> +	}
> +
> +	ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_RDMA_RX);
> +	if (!ns) {
> +		mlx5_core_err(dev, "Failed to get RDMA RX namespace");
> +		err = -EOPNOTSUPP;
> +		goto free;
> +	}
> +
> +	ft_attr.max_fte = 1;
> +	ft = mlx5_create_flow_table(ns, &ft_attr);
> +	if (IS_ERR(ft)) {
> +		mlx5_core_err(dev, "Failed to create RDMA RX flow table");
> +		err = PTR_ERR(ft);
> +		goto free;
> +	}
> +
> +	MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
> +		 MLX5_MATCH_MISC_PARAMETERS);
> +	match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in,
> +				      match_criteria);
> +	MLX5_SET_TO_ONES(fte_match_param, match_criteria,
> +			 misc_parameters.source_port);
> +
> +	fg = mlx5_create_flow_group(ft, flow_group_in);
> +	if (IS_ERR(fg)) {
> +		err = PTR_ERR(fg);
> +		mlx5_core_err(dev, "Failed to create RDMA RX flow group err(%d)\n", err);
> +		goto destroy_flow_table;
> +	}
> +
> +	spec->match_criteria_enable = MLX5_MATCH_MISC_PARAMETERS;
> +	misc = MLX5_ADDR_OF(fte_match_param, spec->match_value,
> +			    misc_parameters);
> +	MLX5_SET(fte_match_set_misc, misc, source_port,
> +		 dev->priv.eswitch->manager_vport);
> +	misc = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
> +			    misc_parameters);
> +	MLX5_SET_TO_ONES(fte_match_set_misc, misc, source_port);
> +
> +	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_ALLOW;
> +	flow_rule = mlx5_add_flow_rules(ft, spec, &flow_act, NULL, 0);
> +	if (IS_ERR(flow_rule)) {
> +		err = PTR_ERR(flow_rule);
> +		mlx5_core_err(dev, "Failed to add RoCE allow rule, err=%d\n",
> +			      err);
> +		goto destroy_flow_group;
> +	}
> +
> +	kvfree(spec);
> +	kvfree(flow_group_in);
> +	roce->ft = ft;
> +	roce->fg = fg;
> +	roce->allow_rule = flow_rule;
> +
> +	return 0;
> +
> +destroy_flow_table:
> +	mlx5_destroy_flow_table(ft);
> +destroy_flow_group:
> +	mlx5_destroy_flow_group(fg);
> +free:
> +	kvfree(spec);
> +	kvfree(flow_group_in);
> +	return err;
> +}
> +
> +static void mlx5_rdma_del_roce_addr(struct mlx5_core_dev *dev)
> +{
> +	mlx5_core_roce_gid_set(dev, 0, 0, 0,
> +			       NULL, NULL, false, 0, 0);
> +}
> +
> +static void mlx5_rdma_make_default_gid(struct mlx5_core_dev *dev, union ib_gid *gid)
> +{
> +	u8 hw_id[ETH_ALEN];
> +
> +	mlx5_query_nic_vport_mac_address(dev, 0, hw_id);
> +	gid->global.subnet_prefix = cpu_to_be64(0xfe80000000000000LL);
> +	addrconf_addr_eui48(&gid->raw[8], hw_id);
> +}
> +
> +static int mlx5_rdma_add_roce_addr(struct mlx5_core_dev *dev)
> +{
> +	union ib_gid gid;
> +	u8 mac[ETH_ALEN];
> +
> +	mlx5_rdma_make_default_gid(dev, &gid);
> +	return mlx5_core_roce_gid_set(dev, 0,
> +				      MLX5_ROCE_VERSION_1,
> +				      0, gid.raw, mac,
> +				      false, 0, 1);
> +}
> +
> +void mlx5_rdma_disable_roce(struct mlx5_core_dev *dev)
> +{
> +	mlx5_rdma_disable_roce_steering(dev);
> +	mlx5_rdma_del_roce_addr(dev);
> +	mlx5_nic_vport_disable_roce(dev);
> +}
> +
> +int mlx5_rdma_enable_roce(struct mlx5_core_dev *dev)
> +{
> +	int err;
> +
> +	err = mlx5_nic_vport_enable_roce(dev);
> +	if (err) {
> +		mlx5_core_err(dev, "Failed to enable RoCE: %d\n", err);
> +		return err;
> +	}
> +
> +	err = mlx5_rdma_add_roce_addr(dev);
> +	if (err) {
> +		mlx5_core_err(dev, "Failed to add RoCE address: %d\n", err);
> +		goto disable_roce;
> +	}
> +
> +	err = mlx5_rdma_enable_roce_steering(dev);
> +	if (err) {
> +		mlx5_core_err(dev, "Failed to enable RoCE steering: %d\n", err);
> +		goto del_roce_addr;
> +	}
> +
> +	return 0;
> +
> +del_roce_addr:
> +	mlx5_rdma_del_roce_addr(dev);
> +disable_roce:
> +	mlx5_nic_vport_disable_roce(dev);
> +	return err;
> +}
> diff --git a/drivers/net/ethernet/mellanox/mlx5/core/rdma.h b/drivers/net/ethernet/mellanox/mlx5/core/rdma.h
> new file mode 100644
> index 000000000000..3d9e76c3d42f
> --- /dev/null
> +++ b/drivers/net/ethernet/mellanox/mlx5/core/rdma.h
> @@ -0,0 +1,20 @@
> +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
> +/* Copyright (c) 2019 Mellanox Technologies. */
> +
> +#ifndef __MLX5_RDMA_H__
> +#define __MLX5_RDMA_H__
> +
> +#include "mlx5_core.h"
> +
> +#ifdef CONFIG_MLX5_ESWITCH
> +
> +int mlx5_rdma_enable_roce(struct mlx5_core_dev *dev);
> +void mlx5_rdma_disable_roce(struct mlx5_core_dev *dev);
> +
> +#else /* CONFIG_MLX5_ESWITCH */
> +
> +static inline int mlx5_rdma_enable_roce(struct mlx5_core_dev *dev) { return 0; }
> +static inline void mlx5_rdma_disable_roce(struct mlx5_core_dev *dev) {}
> +
> +#endif /* CONFIG_MLX5_ESWITCH */
> +#endif /* __MLX5_RDMA_H__ */
> diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
> index 582a9680b182..7fa95270dd59 100644
> --- a/include/linux/mlx5/driver.h
> +++ b/include/linux/mlx5/driver.h
> @@ -512,6 +512,12 @@ struct mlx5_rl_table {
>  	struct mlx5_rl_entry   *rl_entry;
>  };
>
> +struct mlx5_core_roce {
> +	struct mlx5_flow_table *ft;
> +	struct mlx5_flow_group *fg;
> +	struct mlx5_flow_handle *allow_rule;
> +};
> +
>  struct mlx5_priv {
>  	struct mlx5_eq_table	*eq_table;
>
> @@ -565,6 +571,7 @@ struct mlx5_priv {
>  	struct mlx5_lag		*lag;
>  	struct mlx5_devcom	*devcom;
>  	unsigned long		pci_dev_data;
> +	struct mlx5_core_roce	roce;
>  	struct mlx5_fc_stats		fc_stats;
>  	struct mlx5_rl_table            rl_table;
>
> --
> 2.20.1
>




[Index of Archives]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Photo]     [Yosemite News]     [Yosemite Photos]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux