From: Avihai Horon <avihaih@xxxxxxxxxx> ibv_query_gid and ibv_query_gid_type are implemented as ioctl first and fallback to sysfs. Currently, if the fallback path is taken, all of the gid entry attributes are retrieved over sysfs. For example, if ibv_query_gid is called and the fallback path is taken, the gid type and the gid ndev ifindex will also be read over sysfs, even though we only need the gid. In order to eliminate these unnecessary sysfs reads, we add an attribute mask to ibv_cmd_query_gid_entry that will allow us to mark the specific gid entry attributes that we would like to query in fallback. Signed-off-by: Avihai Horon <avihaih@xxxxxxxxxx> Signed-off-by: Yishai Hadas <yishaih@xxxxxxxxxx> --- libibverbs/cmd_device.c | 65 +++++++++++++++++++++++++++++-------------------- libibverbs/driver.h | 9 ++++++- libibverbs/verbs.c | 10 +++++--- 3 files changed, 53 insertions(+), 31 deletions(-) diff --git a/libibverbs/cmd_device.c b/libibverbs/cmd_device.c index fb166bb..9aa9dff 100644 --- a/libibverbs/cmd_device.c +++ b/libibverbs/cmd_device.c @@ -232,41 +232,49 @@ static int query_sysfs_gid_ndev_ifindex(struct ibv_context *context, static int query_sysfs_gid_entry(struct ibv_context *context, uint32_t port_num, uint32_t gid_index, - struct ibv_gid_entry *entry) + struct ibv_gid_entry *entry, + uint32_t attr_mask) { enum ibv_gid_type_sysfs gid_type; struct ibv_port_attr port_attr = {}; - int ret; + int ret = 0; entry->gid_index = gid_index; entry->port_num = port_num; - ret = _ibv_query_gid(context, port_num, gid_index, &entry->gid); - if (ret) - return EINVAL; - - ret = _ibv_query_gid_type(context, port_num, gid_index, &gid_type); - if (ret) - return EINVAL; - - if (gid_type == IBV_GID_TYPE_SYSFS_IB_ROCE_V1) { - ret = ibv_query_port(context, port_num, &port_attr); + if (attr_mask & VERBS_QUERY_GID_ATTR_GID) { + ret = _ibv_query_gid(context, port_num, gid_index, &entry->gid); if (ret) - goto out; + return EINVAL; + } - if (port_attr.link_layer == IBV_LINK_LAYER_INFINIBAND) { - entry->gid_type = IBV_GID_TYPE_IB; - } else if (port_attr.link_layer == IBV_LINK_LAYER_ETHERNET) { - entry->gid_type = IBV_GID_TYPE_ROCE_V1; + if (attr_mask & VERBS_QUERY_GID_ATTR_TYPE) { + ret = _ibv_query_gid_type(context, port_num, gid_index, + &gid_type); + if (ret) + return EINVAL; + + if (gid_type == IBV_GID_TYPE_SYSFS_IB_ROCE_V1) { + ret = ibv_query_port(context, port_num, &port_attr); + if (ret) + goto out; + + if (port_attr.link_layer == IBV_LINK_LAYER_INFINIBAND) { + entry->gid_type = IBV_GID_TYPE_IB; + } else if (port_attr.link_layer == + IBV_LINK_LAYER_ETHERNET) { + entry->gid_type = IBV_GID_TYPE_ROCE_V1; + } else { + ret = EINVAL; + goto out; + } } else { - ret = EINVAL; - goto out; + entry->gid_type = IBV_GID_TYPE_ROCE_V2; } - } else { - entry->gid_type = IBV_GID_TYPE_ROCE_V2; } - ret = query_sysfs_gid_ndev_ifindex(context, port_num, gid_index, - &entry->ndev_ifindex); + if (attr_mask & VERBS_QUERY_GID_ATTR_NDEV_IFINDEX) + ret = query_sysfs_gid_ndev_ifindex(context, port_num, gid_index, + &entry->ndev_ifindex); out: return ret; @@ -278,7 +286,8 @@ out: #define query_gid_kernel_cap async_event int ibv_cmd_query_gid_entry(struct ibv_context *context, uint32_t port_num, uint32_t gid_index, struct ibv_gid_entry *entry, - uint32_t flags, size_t entry_size) + uint32_t flags, size_t entry_size, + uint32_t fallback_attr_mask) { DECLARE_COMMAND_BUFFER(cmdb, UVERBS_OBJECT_DEVICE, UVERBS_METHOD_QUERY_GID_ENTRY, 4); @@ -298,11 +307,15 @@ int ibv_cmd_query_gid_entry(struct ibv_context *context, uint32_t port_num, return EOPNOTSUPP; ret = query_sysfs_gid_entry(context, port_num, gid_index, - entry); + entry, fallback_attr_mask); if (ret) return ret; - return is_zero_gid(&entry->gid) ? ENODATA : 0; + if (fallback_attr_mask & VERBS_QUERY_GID_ATTR_GID && + is_zero_gid(&entry->gid)) + return ENODATA; + + return 0; default: return ret; } diff --git a/libibverbs/driver.h b/libibverbs/driver.h index 2ab0a89..c998b5b 100644 --- a/libibverbs/driver.h +++ b/libibverbs/driver.h @@ -77,6 +77,12 @@ enum ibv_gid_type_sysfs { IBV_GID_TYPE_SYSFS_ROCE_V2, }; +enum verbs_query_gid_attr_mask { + VERBS_QUERY_GID_ATTR_GID = 1 << 0, + VERBS_QUERY_GID_ATTR_TYPE = 1 << 1, + VERBS_QUERY_GID_ATTR_NDEV_IFINDEX = 1 << 2, +}; + enum ibv_mr_type { IBV_MR_TYPE_MR, IBV_MR_TYPE_NULL_MR, @@ -635,7 +641,8 @@ int ibv_cmd_reg_dm_mr(struct ibv_pd *pd, struct verbs_dm *dm, int ibv_cmd_query_gid_entry(struct ibv_context *context, uint32_t port_num, uint32_t gid_index, struct ibv_gid_entry *entry, - uint32_t flags, size_t entry_size); + uint32_t flags, size_t entry_size, + uint32_t fallback_attr_mask); /* * sysfs helper functions diff --git a/libibverbs/verbs.c b/libibverbs/verbs.c index 9dec4e6..237c56b 100644 --- a/libibverbs/verbs.c +++ b/libibverbs/verbs.c @@ -247,7 +247,7 @@ LATEST_SYMVER_FUNC(ibv_query_gid, 1_1, "IBVERBS_1.1", int ret; ret = ibv_cmd_query_gid_entry(context, port_num, index, &entry, 0, - sizeof(entry)); + sizeof(entry), VERBS_QUERY_GID_ATTR_GID); /* Preserve API behavior for empty GID */ if (ret == ENODATA) { memset(gid, 0, sizeof(*gid)); @@ -265,8 +265,10 @@ int _ibv_query_gid_ex(struct ibv_context *context, uint32_t port_num, uint32_t gid_index, struct ibv_gid_entry *entry, uint32_t flags, size_t entry_size) { - return ibv_cmd_query_gid_entry(context, port_num, gid_index, entry, - flags, entry_size); + return ibv_cmd_query_gid_entry( + context, port_num, gid_index, entry, flags, entry_size, + VERBS_QUERY_GID_ATTR_GID | VERBS_QUERY_GID_ATTR_TYPE | + VERBS_QUERY_GID_ATTR_NDEV_IFINDEX); } LATEST_SYMVER_FUNC(ibv_query_pkey, 1_1, "IBVERBS_1.1", @@ -796,7 +798,7 @@ int ibv_query_gid_type(struct ibv_context *context, uint8_t port_num, int ret; ret = ibv_cmd_query_gid_entry(context, port_num, index, &entry, 0, - sizeof(entry)); + sizeof(entry), VERBS_QUERY_GID_ATTR_TYPE); /* Preserve API behavior for empty GID */ if (ret == ENODATA) { *type = IBV_GID_TYPE_SYSFS_IB_ROCE_V1; -- 1.8.3.1