From: Avihai Horon <avihaih@xxxxxxxxxx> ibv_query_gid and ibv_query_gid_type are implemented as ioctl first and fallback to sysfs. Currently, if the fallback path is taken, all of the gid entry attributes are retrieved over sysfs. For example, if ibv_query_gid is called and the fallback path is taken, the gid type and the gid ndev ifindex will also be read over sysfs, even though we only need the gid. In order to eliminate these unnecessary sysfs reads, we add an attribute mask to ibv_cmd_query_gid_entry that will allow us to mark the specific gid entry attributes that we would like to query in fallback. Signed-off-by: Avihai Horon <avihaih@xxxxxxxxxx> Signed-off-by: Yishai Hadas <yishaih@xxxxxxxxxx> --- libibverbs/cmd_device.c | 76 ++++++++++++++++++++++++++++++++----------------- libibverbs/driver.h | 11 +++++++ libibverbs/verbs.c | 8 +++--- 3 files changed, 65 insertions(+), 30 deletions(-) diff --git a/libibverbs/cmd_device.c b/libibverbs/cmd_device.c index f707273..4f85010 100644 --- a/libibverbs/cmd_device.c +++ b/libibverbs/cmd_device.c @@ -320,41 +320,49 @@ static int query_sysfs_gid_type(struct ibv_context *context, uint8_t port_num, static int query_sysfs_gid_entry(struct ibv_context *context, uint32_t port_num, uint32_t gid_index, - struct ibv_gid_entry *entry) + struct ibv_gid_entry *entry, + uint32_t attr_mask) { enum ibv_gid_type_sysfs gid_type; struct ibv_port_attr port_attr = {}; - int ret; + int ret = 0; entry->gid_index = gid_index; entry->port_num = port_num; - ret = query_sysfs_gid(context, port_num, gid_index, &entry->gid); - if (ret) - return EINVAL; - - ret = query_sysfs_gid_type(context, port_num, gid_index, &gid_type); - if (ret) - return EINVAL; - if (gid_type == IBV_GID_TYPE_SYSFS_IB_ROCE_V1) { - ret = ibv_query_port(context, port_num, &port_attr); + if (attr_mask & VERBS_QUERY_GID_ATTR_GID) { + ret = query_sysfs_gid(context, port_num, gid_index, &entry->gid); if (ret) - goto out; + return EINVAL; + } - if (port_attr.link_layer == IBV_LINK_LAYER_INFINIBAND) { - entry->gid_type = IBV_GID_TYPE_IB; - } else if (port_attr.link_layer == IBV_LINK_LAYER_ETHERNET) { - entry->gid_type = IBV_GID_TYPE_ROCE_V1; + if (attr_mask & VERBS_QUERY_GID_ATTR_TYPE) { + ret = query_sysfs_gid_type(context, port_num, gid_index, &gid_type); + if (ret) + return EINVAL; + + if (gid_type == IBV_GID_TYPE_SYSFS_IB_ROCE_V1) { + ret = ibv_query_port(context, port_num, &port_attr); + if (ret) + goto out; + + if (port_attr.link_layer == IBV_LINK_LAYER_INFINIBAND) { + entry->gid_type = IBV_GID_TYPE_IB; + } else if (port_attr.link_layer == + IBV_LINK_LAYER_ETHERNET) { + entry->gid_type = IBV_GID_TYPE_ROCE_V1; + } else { + ret = EINVAL; + goto out; + } } else { - ret = EINVAL; - goto out; + entry->gid_type = IBV_GID_TYPE_ROCE_V2; } - } else { - entry->gid_type = IBV_GID_TYPE_ROCE_V2; } - ret = query_sysfs_gid_ndev_ifindex(context, port_num, gid_index, - &entry->ndev_ifindex); + if (attr_mask & VERBS_QUERY_GID_ATTR_NDEV_IFINDEX) + ret = query_sysfs_gid_ndev_ifindex(context, port_num, gid_index, + &entry->ndev_ifindex); out: return ret; @@ -364,9 +372,10 @@ out: * verbs_context_ops while async_event is and doesn't use ioctl. */ #define query_gid_kernel_cap async_event -int _ibv_query_gid_ex(struct ibv_context *context, uint32_t port_num, +int __ibv_query_gid_ex(struct ibv_context *context, uint32_t port_num, uint32_t gid_index, struct ibv_gid_entry *entry, - uint32_t flags, size_t entry_size) + uint32_t flags, size_t entry_size, + uint32_t fallback_attr_mask) { DECLARE_COMMAND_BUFFER(cmdb, UVERBS_OBJECT_DEVICE, UVERBS_METHOD_QUERY_GID_ENTRY, 4); @@ -386,12 +395,27 @@ int _ibv_query_gid_ex(struct ibv_context *context, uint32_t port_num, return EOPNOTSUPP; ret = query_sysfs_gid_entry(context, port_num, gid_index, - entry); + entry, fallback_attr_mask); if (ret) return ret; - return is_zero_gid(&entry->gid) ? ENODATA : 0; + if (fallback_attr_mask & VERBS_QUERY_GID_ATTR_GID && + is_zero_gid(&entry->gid)) + return ENODATA; + + return 0; default: return ret; } } + +int _ibv_query_gid_ex(struct ibv_context *context, uint32_t port_num, + uint32_t gid_index, struct ibv_gid_entry *entry, + uint32_t flags, size_t entry_size) +{ + return __ibv_query_gid_ex(context, port_num, gid_index, entry, + flags, entry_size, + VERBS_QUERY_GID_ATTR_GID | + VERBS_QUERY_GID_ATTR_TYPE | + VERBS_QUERY_GID_ATTR_NDEV_IFINDEX); +} diff --git a/libibverbs/driver.h b/libibverbs/driver.h index 046c07d..479e16e 100644 --- a/libibverbs/driver.h +++ b/libibverbs/driver.h @@ -77,6 +77,12 @@ enum ibv_gid_type_sysfs { IBV_GID_TYPE_SYSFS_ROCE_V2, }; +enum verbs_query_gid_attr_mask { + VERBS_QUERY_GID_ATTR_GID = 1 << 0, + VERBS_QUERY_GID_ATTR_TYPE = 1 << 1, + VERBS_QUERY_GID_ATTR_NDEV_IFINDEX = 1 << 2, +}; + enum ibv_mr_type { IBV_MR_TYPE_MR, IBV_MR_TYPE_NULL_MR, @@ -633,6 +639,11 @@ int ibv_cmd_reg_dm_mr(struct ibv_pd *pd, struct verbs_dm *dm, unsigned int access, struct verbs_mr *vmr, struct ibv_command_buffer *link); +int __ibv_query_gid_ex(struct ibv_context *context, uint32_t port_num, + uint32_t gid_index, struct ibv_gid_entry *entry, + uint32_t flags, size_t entry_size, + uint32_t fallback_attr_mask); + /* * sysfs helper functions */ diff --git a/libibverbs/verbs.c b/libibverbs/verbs.c index 99a48d5..e50ad94 100644 --- a/libibverbs/verbs.c +++ b/libibverbs/verbs.c @@ -224,8 +224,8 @@ LATEST_SYMVER_FUNC(ibv_query_gid, 1_1, "IBVERBS_1.1", struct ibv_gid_entry entry = {}; int ret; - ret = _ibv_query_gid_ex(context, port_num, index, &entry, 0, - sizeof(entry)); + ret = __ibv_query_gid_ex(context, port_num, index, &entry, 0, + sizeof(entry), VERBS_QUERY_GID_ATTR_GID); /* Preserve API behavior for empty GID */ if (ret == ENODATA) { memset(gid, 0, sizeof(*gid)); @@ -703,8 +703,8 @@ int ibv_query_gid_type(struct ibv_context *context, uint8_t port_num, struct ibv_gid_entry entry = {}; int ret; - ret = _ibv_query_gid_ex(context, port_num, index, &entry, 0, - sizeof(entry)); + ret = __ibv_query_gid_ex(context, port_num, index, &entry, 0, + sizeof(entry), VERBS_QUERY_GID_ATTR_TYPE); /* Preserve API behavior for empty GID */ if (ret == ENODATA) { *type = IBV_GID_TYPE_SYSFS_IB_ROCE_V1; -- 1.8.3.1