Re: [PATCH] RDMA: Fix storage of PortInfo CapabilityMask in the kernel

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Thu, May 17, 2018 at 02:45:13PM -0600, Jason Gunthorpe wrote:
> The internal flag IP_BASED_GIDS was added to a field that was being used
> to hold the port Info CapabilityMask without considering the effects
> this will have. Since most drivers just use the value from the HW MAD
> it means IP_BASED_GIDS will also become set on any HW that sets the IBA
> flag IsOtherLocalChangesNoticeSupported - which is not intended.
>
> Fix this by keeping port_cap_flags only for the IBA CapabilityMask value
> and store unrelated flags externally. Move the bit definitions for this to
> ib_mad.h to make it clear what is happening.
>
> To keep the uAPI unchanged define a new set of flags in the uapi header
> that are only used by ib_uverbs_query_port_resp.port_cap_flags which match
> the current flags supported in rdma-core, and the values exposed by the
> current kernel.
>
> Fixes: b4a26a27287a ("IB: Report using RoCE IP based gids in port caps")
> Signed-off-by: Jason Gunthorpe <jgg@xxxxxxxxxxxx>
> ---
>  drivers/infiniband/core/uverbs_cmd.c          | 23 ++++++++++-
>  drivers/infiniband/hw/bnxt_re/ib_verbs.c      |  4 +-
>  drivers/infiniband/hw/mlx4/main.c             |  3 +-
>  drivers/infiniband/hw/mlx5/main.c             |  4 +-
>  drivers/infiniband/hw/ocrdma/ocrdma_verbs.c   |  9 ++---
>  drivers/infiniband/hw/qedr/verbs.c            |  2 +-
>  drivers/infiniband/hw/qib/qib_verbs.h         |  3 --
>  .../infiniband/hw/vmw_pvrdma/pvrdma_verbs.c   |  3 +-
>  include/rdma/ib_mad.h                         | 33 ++++++++++++++++
>  include/rdma/ib_verbs.h                       | 31 ++-------------
>  include/uapi/rdma/ib_user_ioctl_verbs.h       | 38 +++++++++++++++++++
>  include/uapi/rdma/ib_user_verbs.h             |  2 +-
>  12 files changed, 110 insertions(+), 45 deletions(-)
>
> This is the patch I alluded to for the IB_PORT_GRH_RQUIRED series.
>
> Since all old kernels will set bits in port_cap_mask based on
> IBAvalues and HW support, all bits in that field are already taken.
>
> Leon, this also obsoletes your patch moving to the uapi header.
>

Hi Jason,

It is not enough, since you are breaking ABI, let's do it till the end.

1. Please create new uapi header file, let's call it ib_iba.h.
   This file will include IBA spec definitions, but PortInfo is
   enough for now.
2. Provide patch for RDMAtool to reuse this file and update the
   tool to present PortInfo.


> diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
> index e74262ee104c61..490cebe79f5065 100644
> --- a/drivers/infiniband/core/uverbs_cmd.c
> +++ b/drivers/infiniband/core/uverbs_cmd.c
> @@ -243,6 +243,27 @@ ssize_t ib_uverbs_query_device(struct ib_uverbs_file *file,
>  	return in_len;
>  }
>
> +/*
> + * ib_uverbs_query_port_resp.port_cap_flags started out as just a copy of the
> + * PortInfo CapabilitMask, but was extended with unique bits.
> + */
> +static u32 make_port_cap_flags(const struct ib_port_attr *attr)
> +{
> +	u32 res;
> +
> +	/* All IBA CapabilityMask bits are passed through here, except bit 26,
> +	 * which is overridden with IP_BASED_GIDS. This is due to a historical
> +	 * mistake in the implementation of IP_BASED_GIDS. Otherwise all other
> +	 * bits match the IBA definition across all kernel versions.
> +	 */
> +	res = attr->port_cap_flags & ~(u32)IB_UVERBS_PCF_IP_BASED_GIDS;
> +
> +	if (attr->ip_gids)
> +		res |= IB_UVERBS_PCF_IP_BASED_GIDS;
> +
> +	return res;
> +}
> +
>  ssize_t ib_uverbs_query_port(struct ib_uverbs_file *file,
>  			     struct ib_device *ib_dev,
>  			     const char __user *buf,
> @@ -269,7 +290,7 @@ ssize_t ib_uverbs_query_port(struct ib_uverbs_file *file,
>  	resp.max_mtu 	     = attr.max_mtu;
>  	resp.active_mtu      = attr.active_mtu;
>  	resp.gid_tbl_len     = attr.gid_tbl_len;
> -	resp.port_cap_flags  = attr.port_cap_flags;
> +	resp.port_cap_flags  = make_port_cap_flags(&attr);
>  	resp.max_msg_sz      = attr.max_msg_sz;
>  	resp.bad_pkey_cntr   = attr.bad_pkey_cntr;
>  	resp.qkey_viol_cntr  = attr.qkey_viol_cntr;
> diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
> index a76e206704d48d..c658d0240ffbaa 100644
> --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c
> +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c
> @@ -243,8 +243,8 @@ int bnxt_re_query_port(struct ib_device *ibdev, u8 port_num,
>  	port_attr->gid_tbl_len = dev_attr->max_sgid;
>  	port_attr->port_cap_flags = IB_PORT_CM_SUP | IB_PORT_REINIT_SUP |
>  				    IB_PORT_DEVICE_MGMT_SUP |
> -				    IB_PORT_VENDOR_CLASS_SUP |
> -				    IB_PORT_IP_BASED_GIDS;
> +				    IB_PORT_VENDOR_CLASS_SUP;
> +	port_attr->ip_gids = true;
>
>  	port_attr->max_msg_sz = (u32)BNXT_RE_MAX_MR_SIZE_LOW;
>  	port_attr->bad_pkey_cntr = 0;
> diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
> index 5b70744f414ac7..9799530f4ce57e 100644
> --- a/drivers/infiniband/hw/mlx4/main.c
> +++ b/drivers/infiniband/hw/mlx4/main.c
> @@ -767,7 +767,8 @@ static int eth_link_query_port(struct ib_device *ibdev, u8 port,
>  					   IB_WIDTH_4X : IB_WIDTH_1X;
>  	props->active_speed	=  (((u8 *)mailbox->buf)[5] == 0x20 /*56Gb*/) ?
>  					   IB_SPEED_FDR : IB_SPEED_QDR;
> -	props->port_cap_flags	= IB_PORT_CM_SUP | IB_PORT_IP_BASED_GIDS;
> +	props->port_cap_flags	= IB_PORT_CM_SUP;
> +	props->ip_gids = true;
>  	props->gid_tbl_len	= mdev->dev->caps.gid_table_len[port];
>  	props->max_msg_sz	= mdev->dev->caps.max_msg_sz;
>  	props->pkey_tbl_len	= 1;
> diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
> index ab8cd5c034a2a0..79abe4b56e5d46 100644
> --- a/drivers/infiniband/hw/mlx5/main.c
> +++ b/drivers/infiniband/hw/mlx5/main.c
> @@ -419,8 +419,8 @@ static int mlx5_query_port_roce(struct ib_device *device, u8 port_num,
>  	translate_eth_proto_oper(eth_prot_oper, &props->active_speed,
>  				 &props->active_width);
>
> -	props->port_cap_flags  |= IB_PORT_CM_SUP;
> -	props->port_cap_flags  |= IB_PORT_IP_BASED_GIDS;
> +	props->port_cap_flags |= IB_PORT_CM_SUP;
> +	props->ip_gids = true;
>
>  	props->gid_tbl_len      = MLX5_CAP_ROCE(dev->mdev,
>  						roce_address_table_size);
> diff --git a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
> index 784ed6b09a469e..0c5db28d2b8031 100644
> --- a/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
> +++ b/drivers/infiniband/hw/ocrdma/ocrdma_verbs.c
> @@ -196,11 +196,10 @@ int ocrdma_query_port(struct ib_device *ibdev,
>  	props->sm_lid = 0;
>  	props->sm_sl = 0;
>  	props->state = port_state;
> -	props->port_cap_flags =
> -	    IB_PORT_CM_SUP |
> -	    IB_PORT_REINIT_SUP |
> -	    IB_PORT_DEVICE_MGMT_SUP | IB_PORT_VENDOR_CLASS_SUP |
> -	    IB_PORT_IP_BASED_GIDS;
> +	props->port_cap_flags = IB_PORT_CM_SUP | IB_PORT_REINIT_SUP |
> +				IB_PORT_DEVICE_MGMT_SUP |
> +				IB_PORT_VENDOR_CLASS_SUP;
> +	props->ip_gids = true;
>  	props->gid_tbl_len = OCRDMA_MAX_SGID;
>  	props->pkey_tbl_len = 1;
>  	props->bad_pkey_cntr = 0;
> diff --git a/drivers/infiniband/hw/qedr/verbs.c b/drivers/infiniband/hw/qedr/verbs.c
> index 35f3b6f8fd456b..338d9622e7f7b7 100644
> --- a/drivers/infiniband/hw/qedr/verbs.c
> +++ b/drivers/infiniband/hw/qedr/verbs.c
> @@ -224,7 +224,7 @@ int qedr_query_port(struct ib_device *ibdev, u8 port, struct ib_port_attr *attr)
>  	attr->lmc = 0;
>  	attr->sm_lid = 0;
>  	attr->sm_sl = 0;
> -	attr->port_cap_flags = IB_PORT_IP_BASED_GIDS;
> +	attr->ip_gids = true;
>  	if (rdma_protocol_iwarp(&dev->ibdev, 1)) {
>  		attr->gid_tbl_len = 1;
>  		attr->pkey_tbl_len = 1;
> diff --git a/drivers/infiniband/hw/qib/qib_verbs.h b/drivers/infiniband/hw/qib/qib_verbs.h
> index f9a46768a19a4a..e72562a8959aed 100644
> --- a/drivers/infiniband/hw/qib/qib_verbs.h
> +++ b/drivers/infiniband/hw/qib/qib_verbs.h
> @@ -78,9 +78,6 @@ struct qib_verbs_txreq;
>
>  #define QIB_VENDOR_IPG		cpu_to_be16(0xFFA0)
>
> -/* XXX Should be defined in ib_verbs.h enum ib_port_cap_flags */
> -#define IB_PORT_OTHER_LOCAL_CHANGES_SUP (1 << 26)
> -
>  #define IB_DEFAULT_GID_PREFIX	cpu_to_be64(0xfe80000000000000ULL)
>
>  /* Values for set/get portinfo VLCap OperationalVLs */
> diff --git a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c
> index a51463cd2f3747..99c70d49b5e32a 100644
> --- a/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c
> +++ b/drivers/infiniband/hw/vmw_pvrdma/pvrdma_verbs.c
> @@ -154,7 +154,8 @@ int pvrdma_query_port(struct ib_device *ibdev, u8 port,
>  	props->gid_tbl_len = resp->attrs.gid_tbl_len;
>  	props->port_cap_flags =
>  		pvrdma_port_cap_flags_to_ib(resp->attrs.port_cap_flags);
> -	props->port_cap_flags |= IB_PORT_CM_SUP | IB_PORT_IP_BASED_GIDS;
> +	props->port_cap_flags |= IB_PORT_CM_SUP;
> +	props->ip_gids = true;
>  	props->max_msg_sz = resp->attrs.max_msg_sz;
>  	props->bad_pkey_cntr = resp->attrs.bad_pkey_cntr;
>  	props->qkey_viol_cntr = resp->attrs.qkey_viol_cntr;
> diff --git a/include/rdma/ib_mad.h b/include/rdma/ib_mad.h
> index 2f4f1768ded4f7..f6ba366051c734 100644
> --- a/include/rdma/ib_mad.h
> +++ b/include/rdma/ib_mad.h
> @@ -262,6 +262,39 @@ struct ib_class_port_info {
>  	__be32			trap_qkey;
>  };
>
> +/* PortInfo CapabilityMask */
> +enum ib_port_capability_mask_bits {
> +	IB_PORT_SM = 1 << 1,
> +	IB_PORT_NOTICE_SUP = 1 << 2,
> +	IB_PORT_TRAP_SUP = 1 << 3,
> +	IB_PORT_OPT_IPD_SUP = 1 << 4,
> +	IB_PORT_AUTO_MIGR_SUP = 1 << 5,
> +	IB_PORT_SL_MAP_SUP = 1 << 6,
> +	IB_PORT_MKEY_NVRAM = 1 << 7,
> +	IB_PORT_PKEY_NVRAM = 1 << 8,
> +	IB_PORT_LED_INFO_SUP = 1 << 9,
> +	IB_PORT_SM_DISABLED = 1 << 10,
> +	IB_PORT_SYS_IMAGE_GUID_SUP = 1 << 11,
> +	IB_PORT_PKEY_SW_EXT_PORT_TRAP_SUP = 1 << 12,
> +	IB_PORT_EXTENDED_SPEEDS_SUP = 1 << 14,
> +	IB_PORT_CM_SUP = 1 << 16,
> +	IB_PORT_SNMP_TUNNEL_SUP = 1 << 17,
> +	IB_PORT_REINIT_SUP = 1 << 18,
> +	IB_PORT_DEVICE_MGMT_SUP = 1 << 19,
> +	IB_PORT_VENDOR_CLASS_SUP = 1 << 20,
> +	IB_PORT_DR_NOTICE_SUP = 1 << 21,
> +	IB_PORT_CAP_MASK_NOTICE_SUP = 1 << 22,
> +	IB_PORT_BOOT_MGMT_SUP = 1 << 23,
> +	IB_PORT_LINK_LATENCY_SUP = 1 << 24,
> +	IB_PORT_CLIENT_REG_SUP = 1 << 25,
> +	IB_PORT_OTHER_LOCAL_CHANGES_SUP = 1 << 26,
> +	IB_PORT_LINK_SPEED_WIDTH_TABLE_SUP = 1 << 27,
> +	IB_PORT_VENDOR_SPECIFIC_MADS_TABLE_SUP = 1 << 28,
> +	IB_PORT_MCAST_PKEY_TRAP_SUPPRESSION_SUP = 1 << 29,
> +	IB_PORT_MCAST_FDB_TOP_SUP = 1 << 30,
> +	IB_PORT_HIERARCHY_INFO_SUP = 1ULL << 31,
> +};

The main advantage of enums is type-checks during compilation, so let's
do not do enum with bitshifts.

> +
>  #define OPA_CLASS_PORT_INFO_PR_SUPPORT BIT(26)
>
>  struct opa_class_port_info {
> diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
> index e849bd0fc61851..690983b4ff3748 100644
> --- a/include/rdma/ib_verbs.h
> +++ b/include/rdma/ib_verbs.h
> @@ -430,33 +430,6 @@ enum ib_port_state {
>  	IB_PORT_ACTIVE_DEFER	= 5
>  };
>
> -enum ib_port_cap_flags {
> -	IB_PORT_SM				= 1 <<  1,
> -	IB_PORT_NOTICE_SUP			= 1 <<  2,
> -	IB_PORT_TRAP_SUP			= 1 <<  3,
> -	IB_PORT_OPT_IPD_SUP                     = 1 <<  4,
> -	IB_PORT_AUTO_MIGR_SUP			= 1 <<  5,
> -	IB_PORT_SL_MAP_SUP			= 1 <<  6,
> -	IB_PORT_MKEY_NVRAM			= 1 <<  7,
> -	IB_PORT_PKEY_NVRAM			= 1 <<  8,
> -	IB_PORT_LED_INFO_SUP			= 1 <<  9,
> -	IB_PORT_SM_DISABLED			= 1 << 10,
> -	IB_PORT_SYS_IMAGE_GUID_SUP		= 1 << 11,
> -	IB_PORT_PKEY_SW_EXT_PORT_TRAP_SUP	= 1 << 12,
> -	IB_PORT_EXTENDED_SPEEDS_SUP             = 1 << 14,
> -	IB_PORT_CM_SUP				= 1 << 16,
> -	IB_PORT_SNMP_TUNNEL_SUP			= 1 << 17,
> -	IB_PORT_REINIT_SUP			= 1 << 18,
> -	IB_PORT_DEVICE_MGMT_SUP			= 1 << 19,
> -	IB_PORT_VENDOR_CLASS_SUP		= 1 << 20,
> -	IB_PORT_DR_NOTICE_SUP			= 1 << 21,
> -	IB_PORT_CAP_MASK_NOTICE_SUP		= 1 << 22,
> -	IB_PORT_BOOT_MGMT_SUP			= 1 << 23,
> -	IB_PORT_LINK_LATENCY_SUP		= 1 << 24,
> -	IB_PORT_CLIENT_REG_SUP			= 1 << 25,
> -	IB_PORT_IP_BASED_GIDS			= 1 << 26,
> -};
> -
>  enum ib_port_width {
>  	IB_WIDTH_1X	= 1,
>  	IB_WIDTH_4X	= 2,
> @@ -595,6 +568,9 @@ struct ib_port_attr {
>  	enum ib_mtu		max_mtu;
>  	enum ib_mtu		active_mtu;
>  	int			gid_tbl_len;
> +	unsigned int		grh_required:1;
> +	unsigned int		ip_gids:1;
> +	/* This is the value from PortInfo CapabilityMask, defined by IBA */
>  	u32			port_cap_flags;
>  	u32			max_msg_sz;
>  	u32			bad_pkey_cntr;
> @@ -610,7 +586,6 @@ struct ib_port_attr {
>  	u8			active_width;
>  	u8			active_speed;
>  	u8                      phys_state;
> -	bool			grh_required;
>  };
>
>  enum ib_device_modify_flags {
> diff --git a/include/uapi/rdma/ib_user_ioctl_verbs.h b/include/uapi/rdma/ib_user_ioctl_verbs.h
> index 625545d862d7e4..1220f1df3deddf 100644
> --- a/include/uapi/rdma/ib_user_ioctl_verbs.h
> +++ b/include/uapi/rdma/ib_user_ioctl_verbs.h
> @@ -40,6 +40,44 @@
>  #define RDMA_UAPI_PTR(_type, _name)	__aligned_u64 _name
>  #endif
>
> +enum ib_uverbs_query_port_cap_flags {
> +	IB_UVERBS_PCF_SM = 1 << 1,
> +	IB_UVERBS_PCF_NOTICE_SUP = 1 << 2,
> +	IB_UVERBS_PCF_TRAP_SUP = 1 << 3,
> +	IB_UVERBS_PCF_OPT_IPD_SUP = 1 << 4,
> +	IB_UVERBS_PCF_AUTO_MIGR_SUP = 1 << 5,
> +	IB_UVERBS_PCF_SL_MAP_SUP = 1 << 6,
> +	IB_UVERBS_PCF_MKEY_NVRAM = 1 << 7,
> +	IB_UVERBS_PCF_PKEY_NVRAM = 1 << 8,
> +	IB_UVERBS_PCF_LED_INFO_SUP = 1 << 9,
> +	IB_UVERBS_PCF_SM_DISABLED = 1 << 10,
> +	IB_UVERBS_PCF_SYS_IMAGE_GUID_SUP = 1 << 11,
> +	IB_UVERBS_PCF_PKEY_SW_EXT_PORT_TRAP_SUP = 1 << 12,
> +	IB_UVERBS_PCF_EXTENDED_SPEEDS_SUP = 1 << 14,
> +	IB_UVERBS_PCF_CM_SUP = 1 << 16,
> +	IB_UVERBS_PCF_SNMP_TUNNEL_SUP = 1 << 17,
> +	IB_UVERBS_PCF_REINIT_SUP = 1 << 18,
> +	IB_UVERBS_PCF_DEVICE_MGMT_SUP = 1 << 19,
> +	IB_UVERBS_PCF_VENDOR_CLASS_SUP = 1 << 20,
> +	IB_UVERBS_PCF_DR_NOTICE_SUP = 1 << 21,
> +	IB_UVERBS_PCF_CAP_MASK_NOTICE_SUP = 1 << 22,
> +	IB_UVERBS_PCF_BOOT_MGMT_SUP = 1 << 23,
> +	IB_UVERBS_PCF_LINK_LATENCY_SUP = 1 << 24,
> +	IB_UVERBS_PCF_CLIENT_REG_SUP = 1 << 25,
> +	/*
> +	 * IsOtherLocalChangesNoticeSupported is aliased by IP_BASED_GIDS and
> +	 * is inaccessible
> +	 */
> +	IB_UVERBS_PCF_LINK_SPEED_WIDTH_TABLE_SUP = 1 << 27,
> +	IB_UVERBS_PCF_VENDOR_SPECIFIC_MADS_TABLE_SUP = 1 << 28,
> +	IB_UVERBS_PCF_MCAST_PKEY_TRAP_SUPPRESSION_SUP = 1 << 29,
> +	IB_UVERBS_PCF_MCAST_FDB_TOP_SUP = 1 << 30,
> +	IB_UVERBS_PCF_HIERARCHY_INFO_SUP = 1ULL << 31,
> +
> +	/* NOTE this is an internal flag, not an IBA flag */
> +	IB_UVERBS_PCF_IP_BASED_GIDS = 1 << 26,
> +};

ditto

> +
>  enum ib_uverbs_flow_action_esp_keymat {
>  	IB_UVERBS_FLOW_ACTION_ESP_KEYMAT_AES_GCM,
>  };
> diff --git a/include/uapi/rdma/ib_user_verbs.h b/include/uapi/rdma/ib_user_verbs.h
> index 409507f83b9188..3029c2382ec811 100644
> --- a/include/uapi/rdma/ib_user_verbs.h
> +++ b/include/uapi/rdma/ib_user_verbs.h
> @@ -279,7 +279,7 @@ struct ib_uverbs_query_port {
>  };
>
>  struct ib_uverbs_query_port_resp {
> -	__u32 port_cap_flags;
> +	__u32 port_cap_flags;		/* see ib_uverbs_query_port_cap_flags */
>  	__u32 max_msg_sz;
>  	__u32 bad_pkey_cntr;
>  	__u32 qkey_viol_cntr;
> --
> 2.17.0
>

Attachment: signature.asc
Description: PGP signature


[Index of Archives]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Photo]     [Yosemite News]     [Yosemite Photos]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux