> -----Original Message----- > From: linux-rdma-owner@xxxxxxxxxxxxxxx <linux-rdma- > owner@xxxxxxxxxxxxxxx> On Behalf Of Leon Romanovsky > Sent: Friday, August 30, 2019 1:46 PM > To: Doug Ledford <dledford@xxxxxxxxxx>; Jason Gunthorpe > <jgg@xxxxxxxxxxxx> > Cc: Leon Romanovsky <leonro@xxxxxxxxxxxx>; RDMA mailing list <linux- > rdma@xxxxxxxxxxxxxxx>; Erez Alfasi <ereza@xxxxxxxxxxxx> > Subject: [PATCH rdma-next v1 3/4] RDMA/nldev: Provide MR statistics > > From: Erez Alfasi <ereza@xxxxxxxxxxxx> > > Add RDMA nldev netlink interface for dumping MR statistics information. > > Output example: > ereza@dev~$: ./ibv_rc_pingpong -o -P -s 500000000 > local address: LID 0x0001, QPN 0x00008a, PSN 0xf81096, GID :: > > ereza@dev~$: rdma stat show mr > dev mlx5_0 mrn 2 page_faults 122071 page_invalidations 0 > prefetched_pages 122071 > > Signed-off-by: Erez Alfasi <ereza@xxxxxxxxxxxx> > Signed-off-by: Leon Romanovsky <leonro@xxxxxxxxxxxx> > --- > drivers/infiniband/core/device.c | 1 + > drivers/infiniband/core/nldev.c | 54 +++++++++++++++++++++++++++++-- > drivers/infiniband/hw/mlx5/main.c | 16 +++++++++ > include/rdma/ib_verbs.h | 9 ++++++ > 4 files changed, 78 insertions(+), 2 deletions(-) > > diff --git a/drivers/infiniband/core/device.c > b/drivers/infiniband/core/device.c > index 99c4a55545cf..34a9e37c5c61 100644 > --- a/drivers/infiniband/core/device.c > +++ b/drivers/infiniband/core/device.c > @@ -2610,6 +2610,7 @@ void ib_set_device_ops(struct ib_device *dev, > const struct ib_device_ops *ops) > SET_DEVICE_OP(dev_ops, get_dma_mr); > SET_DEVICE_OP(dev_ops, get_hw_stats); > SET_DEVICE_OP(dev_ops, get_link_layer); > + SET_DEVICE_OP(dev_ops, fill_odp_stats); > SET_DEVICE_OP(dev_ops, get_netdev); > SET_DEVICE_OP(dev_ops, get_port_immutable); > SET_DEVICE_OP(dev_ops, get_vector_affinity); diff --git > a/drivers/infiniband/core/nldev.c b/drivers/infiniband/core/nldev.c index > 47f7fe5432db..47fee3d68cb9 100644 > --- a/drivers/infiniband/core/nldev.c > +++ b/drivers/infiniband/core/nldev.c > @@ -37,6 +37,7 @@ > #include <net/netlink.h> > #include <rdma/rdma_cm.h> > #include <rdma/rdma_netlink.h> > +#include <rdma/ib_umem_odp.h> > > #include "core_priv.h" > #include "cma_priv.h" > @@ -748,6 +749,49 @@ static int fill_stat_hwcounter_entry(struct sk_buff > *msg, > return -EMSGSIZE; > } > > +static int fill_stat_mr_entry(struct sk_buff *msg, bool has_cap_net_admin, > + struct rdma_restrack_entry *res, uint32_t port) { > + struct ib_mr *mr = container_of(res, struct ib_mr, res); > + struct ib_device *dev = mr->pd->device; > + struct ib_odp_counters odp_stats; > + struct nlattr *table_attr; > + > + if (nla_put_u32(msg, RDMA_NLDEV_ATTR_RES_MRN, res->id)) > + goto err; > + > + if (!dev->ops.fill_odp_stats) > + return 0; > + > + if (!dev->ops.fill_odp_stats(mr, &odp_stats)) > + return 0; > + > + table_attr = nla_nest_start(msg, > + RDMA_NLDEV_ATTR_STAT_HWCOUNTERS); > + > + if (!table_attr) > + return -EMSGSIZE; > + > + if (fill_stat_hwcounter_entry(msg, "page_faults", > + (u64)atomic64_read(&odp_stats.faults))) > + goto err_table; > + if (fill_stat_hwcounter_entry( > + msg, "page_invalidations", > + (u64)atomic64_read(&odp_stats.invalidations))) > + goto err_table; > + if (fill_stat_hwcounter_entry(msg, "prefetched_pages", > + > (u64)atomic64_read(&odp_stats.prefetched))) > + goto err_table; > + > + nla_nest_end(msg, table_attr); > + return 0; > + > +err_table: > + nla_nest_cancel(msg, table_attr); > +err: > + return -EMSGSIZE; > +} > + > static int fill_stat_counter_hwcounters(struct sk_buff *msg, > struct rdma_counter *counter) > { > @@ -2008,7 +2052,10 @@ static int nldev_stat_get_doit(struct sk_buff *skb, > struct nlmsghdr *nlh, > case RDMA_NLDEV_ATTR_RES_QP: > ret = stat_get_doit_qp(skb, nlh, extack, tb); > break; > - > + case RDMA_NLDEV_ATTR_RES_MR: > + ret = res_get_common_doit(skb, nlh, extack, > RDMA_RESTRACK_MR, > + fill_stat_mr_entry); > + break; > default: > ret = -EINVAL; > break; > @@ -2032,7 +2079,10 @@ static int nldev_stat_get_dumpit(struct sk_buff > *skb, > case RDMA_NLDEV_ATTR_RES_QP: > ret = nldev_res_get_counter_dumpit(skb, cb); > break; > - > + case RDMA_NLDEV_ATTR_RES_MR: > + ret = res_get_common_dumpit(skb, cb, > RDMA_RESTRACK_MR, > + fill_stat_mr_entry); > + break; > default: > ret = -EINVAL; > break; > diff --git a/drivers/infiniband/hw/mlx5/main.c > b/drivers/infiniband/hw/mlx5/main.c > index 07aecba16019..05095fda03cc 100644 > --- a/drivers/infiniband/hw/mlx5/main.c > +++ b/drivers/infiniband/hw/mlx5/main.c > @@ -67,6 +67,7 @@ > #include <rdma/uverbs_std_types.h> > #include <rdma/mlx5_user_ioctl_verbs.h> #include > <rdma/mlx5_user_ioctl_cmds.h> > +#include <rdma/ib_umem_odp.h> > > #define UVERBS_MODULE_NAME mlx5_ib > #include <rdma/uverbs_named_ioctl.h> > @@ -121,6 +122,20 @@ struct mlx5_ib_dev > *mlx5_ib_get_ibdev_from_mpi(struct mlx5_ib_multiport_info *mp > return dev; > } > > +static bool mlx5_ib_fill_odp_stats(struct ib_mr *ibmr, > + struct ib_odp_counters *cnt) > +{ > + struct mlx5_ib_mr *mr = to_mmr(ibmr); > + > + if (!is_odp_mr(mr)) > + return false; > + > + memcpy(cnt, &to_ib_umem_odp(mr->umem)->odp_stats, > + sizeof(struct ib_odp_counters)); > + > + return true; > +} > + > static enum rdma_link_layer > mlx5_port_type_cap_to_rdma_ll(int port_type_cap) { @@ -6316,6 +6331,7 > @@ static const struct ib_device_ops mlx5_ib_dev_ops = { > .get_dev_fw_str = get_dev_fw_str, > .get_dma_mr = mlx5_ib_get_dma_mr, > .get_link_layer = mlx5_ib_port_link_layer, > + .fill_odp_stats = mlx5_ib_fill_odp_stats, > .map_mr_sg = mlx5_ib_map_mr_sg, > .map_mr_sg_pi = mlx5_ib_map_mr_sg_pi, > .mmap = mlx5_ib_mmap, > diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index > de5bc352f473..48d6513b3b59 100644 > --- a/include/rdma/ib_verbs.h > +++ b/include/rdma/ib_verbs.h > @@ -72,6 +72,7 @@ > #define IB_FW_VERSION_NAME_MAX ETHTOOL_FWVERS_LEN > > struct ib_umem_odp; > +struct ib_odp_counters; > > extern struct workqueue_struct *ib_wq; > extern struct workqueue_struct *ib_comp_wq; @@ -2566,6 +2567,14 @@ > struct ib_device_ops { > */ > int (*counter_update_stats)(struct rdma_counter *counter); > > + /** > + * fill_odp_stats - Fill MR ODP stats into a given > + * ib_odp_counters struct. > + * Return value - true in case counters has been filled, > + * false otherwise (if its non-ODP registered MR for example). > + */ > + bool (*fill_odp_stats)(struct ib_mr *mr, struct ib_odp_counters > *cnt); > + Requesting ODP stats on non-ODP MR is an error. Instead of returning bool, please return int = -EINVAL as an error for non ODP MRs. > DECLARE_RDMA_OBJ_SIZE(ib_ah); > DECLARE_RDMA_OBJ_SIZE(ib_cq); > DECLARE_RDMA_OBJ_SIZE(ib_pd); > -- > 2.20.1