From: Avihai Horon <avihaih@xxxxxxxxxx> Introduce the ibv_query_gid_table verb which queries the GID tables of the given device and stores the queried data in a buffer provided by the user. If the kernel doesn't support ioctl or the needed uverbs method, the API will try to query the GID tables via sysfs. This API provides a faster way to query the GID tables of a device using a single call over ioctl, instead of multiple calls to open, close and read multiple sysfs files for a single GID table entry. Signed-off-by: Avihai Horon <avihaih@xxxxxxxxxx> Signed-off-by: Yishai Hadas <yishaih@xxxxxxxxxx> --- debian/libibverbs1.symbols | 1 + libibverbs/cmd_device.c | 117 +++++++++++++++++++++++++++++--- libibverbs/libibverbs.map.in | 1 + libibverbs/man/CMakeLists.txt | 1 + libibverbs/man/ibv_query_gid_table.3.md | 73 ++++++++++++++++++++ libibverbs/verbs.h | 16 +++++ 6 files changed, 199 insertions(+), 10 deletions(-) create mode 100644 libibverbs/man/ibv_query_gid_table.3.md diff --git a/debian/libibverbs1.symbols b/debian/libibverbs1.symbols index 536d543..99257de 100644 --- a/debian/libibverbs1.symbols +++ b/debian/libibverbs1.symbols @@ -11,6 +11,7 @@ libibverbs.so.1 libibverbs1 #MINVER# IBVERBS_1.11@IBVERBS_1.11 32 (symver)IBVERBS_PRIVATE_25 25 _ibv_query_gid_ex@IBVERBS_1.11 32 + _ibv_query_gid_table@IBVERBS_1.11 32 ibv_ack_async_event@IBVERBS_1.0 1.1.6 ibv_ack_async_event@IBVERBS_1.1 1.1.6 ibv_ack_cq_events@IBVERBS_1.0 1.1.6 diff --git a/libibverbs/cmd_device.c b/libibverbs/cmd_device.c index 4f85010..8bcfbb4 100644 --- a/libibverbs/cmd_device.c +++ b/libibverbs/cmd_device.c @@ -321,7 +321,7 @@ static int query_sysfs_gid_type(struct ibv_context *context, uint8_t port_num, static int query_sysfs_gid_entry(struct ibv_context *context, uint32_t port_num, uint32_t gid_index, struct ibv_gid_entry *entry, - uint32_t attr_mask) + uint32_t attr_mask, int link_layer) { enum ibv_gid_type_sysfs gid_type; struct ibv_port_attr port_attr = {}; @@ -342,14 +342,18 @@ static int query_sysfs_gid_entry(struct ibv_context *context, uint32_t port_num, return EINVAL; if (gid_type == IBV_GID_TYPE_SYSFS_IB_ROCE_V1) { - ret = ibv_query_port(context, port_num, &port_attr); - if (ret) - goto out; + if (link_layer < 0) { + ret = ibv_query_port(context, port_num, + &port_attr); + if (ret) + goto out; + + link_layer = port_attr.link_layer; + } - if (port_attr.link_layer == IBV_LINK_LAYER_INFINIBAND) { + if (link_layer == IBV_LINK_LAYER_INFINIBAND) { entry->gid_type = IBV_GID_TYPE_IB; - } else if (port_attr.link_layer == - IBV_LINK_LAYER_ETHERNET) { + } else if (link_layer == IBV_LINK_LAYER_ETHERNET) { entry->gid_type = IBV_GID_TYPE_ROCE_V1; } else { ret = EINVAL; @@ -368,8 +372,64 @@ out: return ret; } -/* Using async_event cmd_name because query_gid_ex is not in - * verbs_context_ops while async_event is and doesn't use ioctl. +static int query_gid_table_fb(struct ibv_context *context, + struct ibv_gid_entry *entries, size_t max_entries, + uint64_t *num_entries, size_t entry_size) +{ + struct ibv_device_attr dev_attr = {}; + struct ibv_port_attr port_attr = {}; + struct ibv_gid_entry entry = {}; + int attr_mask; + void *tmp; + int i, j; + int ret; + + ret = ibv_query_device(context, &dev_attr); + if (ret) + goto out; + + tmp = entries; + *num_entries = 0; + attr_mask = VERBS_QUERY_GID_ATTR_GID | VERBS_QUERY_GID_ATTR_TYPE | + VERBS_QUERY_GID_ATTR_NDEV_IFINDEX; + for (i = 0; i < dev_attr.phys_port_cnt; i++) { + ret = ibv_query_port(context, i + 1, &port_attr); + if (ret) + goto out; + + for (j = 0; j < port_attr.gid_tbl_len; j++) { + /* In case we already reached max_entries, query to some + * temp entry, in case all other entries are zeros the + * API should succceed. + */ + if (*num_entries == max_entries) + tmp = &entry; + ret = query_sysfs_gid_entry(context, i + 1, j, + tmp, + attr_mask, + port_attr.link_layer); + if (ret) + goto out; + if (is_zero_gid(&((struct ibv_gid_entry *)tmp)->gid)) + continue; + if (*num_entries == max_entries) { + ret = EINVAL; + goto out; + } + + (*num_entries)++; + tmp += entry_size; + } + } + +out: + return ret; +} + +/* Using async_event cmd_name because query_gid_ex and query_gid_table are not + * in verbs_context_ops while async_event is and doesn't use ioctl. + * If one of them is not supported, so is the other. Hence, we can use a single + * cmd_name for both of them. */ #define query_gid_kernel_cap async_event int __ibv_query_gid_ex(struct ibv_context *context, uint32_t port_num, @@ -395,7 +455,7 @@ int __ibv_query_gid_ex(struct ibv_context *context, uint32_t port_num, return EOPNOTSUPP; ret = query_sysfs_gid_entry(context, port_num, gid_index, - entry, fallback_attr_mask); + entry, fallback_attr_mask, -1); if (ret) return ret; @@ -419,3 +479,40 @@ int _ibv_query_gid_ex(struct ibv_context *context, uint32_t port_num, VERBS_QUERY_GID_ATTR_TYPE | VERBS_QUERY_GID_ATTR_NDEV_IFINDEX); } + +ssize_t _ibv_query_gid_table(struct ibv_context *context, + struct ibv_gid_entry *entries, + size_t max_entries, uint32_t flags, + size_t entry_size) +{ + DECLARE_COMMAND_BUFFER(cmdb, UVERBS_OBJECT_DEVICE, + UVERBS_METHOD_QUERY_GID_TABLE, 4); + uint64_t num_entries; + int ret; + + fill_attr_const_in(cmdb, UVERBS_ATTR_QUERY_GID_TABLE_ENTRY_SIZE, + entry_size); + fill_attr_in_uint32(cmdb, UVERBS_ATTR_QUERY_GID_TABLE_FLAGS, flags); + fill_attr_out(cmdb, UVERBS_ATTR_QUERY_GID_TABLE_RESP_ENTRIES, entries, + _array_len(entry_size, max_entries)); + fill_attr_out_ptr(cmdb, UVERBS_ATTR_QUERY_GID_TABLE_RESP_NUM_ENTRIES, + &num_entries); + + switch (execute_ioctl_fallback(context, query_gid_kernel_cap, cmdb, + &ret)) { + case TRY_WRITE: + if (flags) + return -EOPNOTSUPP; + + ret = query_gid_table_fb(context, entries, max_entries, + &num_entries, entry_size); + break; + default: + break; + } + + if (ret) + return -ret; + + return num_entries; +} diff --git a/libibverbs/libibverbs.map.in b/libibverbs/libibverbs.map.in index dae4963..7429016 100644 --- a/libibverbs/libibverbs.map.in +++ b/libibverbs/libibverbs.map.in @@ -145,6 +145,7 @@ IBVERBS_1.10 { IBVERBS_1.11 { global: _ibv_query_gid_ex; + _ibv_query_gid_table; } IBVERBS_1.10; /* If any symbols in this stanza change ABI then the entire staza gets a new symbol diff --git a/libibverbs/man/CMakeLists.txt b/libibverbs/man/CMakeLists.txt index 2dea4ff..1fb5ac1 100644 --- a/libibverbs/man/CMakeLists.txt +++ b/libibverbs/man/CMakeLists.txt @@ -58,6 +58,7 @@ rdma_man_pages( ibv_query_ece.3.md ibv_query_gid.3.md ibv_query_gid_ex.3.md + ibv_query_gid_table.3.md ibv_query_pkey.3.md ibv_query_port.3 ibv_query_qp.3 diff --git a/libibverbs/man/ibv_query_gid_table.3.md b/libibverbs/man/ibv_query_gid_table.3.md new file mode 100644 index 0000000..e10f51c --- /dev/null +++ b/libibverbs/man/ibv_query_gid_table.3.md @@ -0,0 +1,73 @@ +--- +date: 2020-04-24 +footer: libibverbs +header: "Libibverbs Programmer's Manual" +layout: page +license: 'Licensed under the OpenIB.org BSD license (FreeBSD Variant) - See COPYING.md' +section: 3 +title: IBV_QUERY_GID_TABLE +--- + +# NAME + +ibv_query_gid_table - query an InfiniBand device's GID table + +# SYNOPSIS + +```c +#include <infiniband/verbs.h> + +ssize_t ibv_query_gid_table(struct ibv_context *context, + struct ibv_gid_entry *entries, + size_t max_entries, + uint32_t flags); +``` + +# DESCRIPTION + +**ibv_query_gid_table()** returns the valid GID table entries of the RDMA +device context *context* at the pointer *entries*. + +A caller must allocate *entries* array for the GID table entries it +desires to query. This API returns only valid GID table entries. + +A caller must pass non zero number of entries at *max_entries* that corresponds +to the size of *entries* array. + +*entries* array must be allocated such that it can contain all the valid +GID table entries of the device. If there are more valid GID entries than +the provided value of *max_entries* and *entries* array, the call will fail. +For example, if a RDMA device *context* has a total of 10 valid +GID entries, *entries* should be allocated for at least 10 entries, and +*max_entries* should be set appropriately. + +# ARGUMENTS + +*context* +: The context of the device to query. + +*entries* +: Array of ibv_gid_entry structs where the GID entries are returned. + Please see **ibv_query_gid_ex**(3) man page for *ibv_gid_entry*. + +*max_entries* +: Maximum number of entries that can be returned. + +*flags* +: Extra fields to query post *entries->ndev_ifindex*, for now must be 0. + +# RETURN VALUE + +**ibv_query_gid_table()** returns the number of entries that were read on success or negative errno value on error. +Number of entries returned is <= max_entries. + +# SEE ALSO + +**ibv_open_device**(3), +**ibv_query_device**(3), +**ibv_query_port**(3), +**ibv_query_gid_ex**(3) + +# AUTHOR + +Parav Pandit <parav@xxxxxxxxxx> diff --git a/libibverbs/verbs.h b/libibverbs/verbs.h index e5bf900..caf626c 100644 --- a/libibverbs/verbs.h +++ b/libibverbs/verbs.h @@ -43,6 +43,7 @@ #include <string.h> #include <linux/types.h> #include <stdint.h> +#include <sys/types.h> #include <infiniband/verbs_api.h> #ifdef __cplusplus @@ -2359,6 +2360,21 @@ static inline int ibv_query_gid_ex(struct ibv_context *context, sizeof(*entry)); } +ssize_t _ibv_query_gid_table(struct ibv_context *context, + struct ibv_gid_entry *entries, size_t max_entries, + uint32_t flags, size_t entry_size); + +/* + * ibv_query_gid_table - Get all valid GID table entries + */ +static inline ssize_t ibv_query_gid_table(struct ibv_context *context, + struct ibv_gid_entry *entries, + size_t max_entries, uint32_t flags) +{ + return _ibv_query_gid_table(context, entries, max_entries, flags, + sizeof(*entries)); +} + /** * ibv_query_pkey - Get a P_Key table entry */ -- 1.8.3.1