On Mon, Feb 11, 2019 at 10:02:02AM -0800, Michael Ruhl wrote: > >-----Original Message----- > >From: linux-rdma-owner@xxxxxxxxxxxxxxx [mailto:linux-rdma- > >owner@xxxxxxxxxxxxxxx] On Behalf Of ira.weiny@xxxxxxxxx > >Sent: Monday, February 11, 2019 10:15 AM > >To: linux-rdma@xxxxxxxxxxxxxxx > >Cc: Hal Rosenstock <hal@xxxxxxxxxxxxxxxxxx>; Doug Ledford > ><dledford@xxxxxxxxxx>; Jason Gunthorpe <jgg@xxxxxxxx>; Steven Rostedt > ><rostedt@xxxxxxxxxxx>; Ingo Molnar <mingo@xxxxxxxxxx>; Alexei > >Starovoitov <ast@xxxxxxxxxx>; Daniel Borkmann <daniel@xxxxxxxxxxxxx>; > >Leon Romanovsky <leon@xxxxxxxxxx>; Weiny, Ira <ira.weiny@xxxxxxxxx> > >Subject: [PATCH for-next V4 1/6] IB/MAD: Add send path trace points > > > >From: Ira Weiny <ira.weiny@xxxxxxxxx> > > > >Use the standard Linux trace mechanism to trace MADs being sent. 4 trace > >points are added, when the MAD is posted to the qp, when the MAD is > >completed, if a MAD is resent, and when the MAD completes in error. > > > >Signed-off-by: Ira Weiny <ira.weiny@xxxxxxxxx> > > > >--- > > > >Changes since v3: > > Change dev_name to dev_index > > > >Changes since v2: > > Change license text to SPDX tag > > Change dev_name to string from array > > Reorder fields for more compact ring buffer utilization > > Use a defined roce address type for safer memcpy/memset > > > >Changes since v1: > > Update MAINTAINERS with tracing file > > > > MAINTAINERS | 1 + > > drivers/infiniband/core/mad.c | 73 ++++++++++++++- > > include/trace/events/ib_mad.h | 164 > >++++++++++++++++++++++++++++++++++ > > 3 files changed, 237 insertions(+), 1 deletion(-) > > create mode 100644 include/trace/events/ib_mad.h > > > >diff --git a/MAINTAINERS b/MAINTAINERS > >index 8c68de3cfd80..9254bd40f1ae 100644 > >--- a/MAINTAINERS > >+++ b/MAINTAINERS > >@@ -7491,6 +7491,7 @@ F: drivers/infiniband/ > > F: include/uapi/linux/if_infiniband.h > > F: include/uapi/rdma/ > > F: include/rdma/ > >+F: include/trace/events/ib_mad.h > > > > INGENIC JZ4780 DMA Driver > > M: Zubair Lutfullah Kakakhel <Zubair.Kakakhel@xxxxxxxxxx> > >diff --git a/drivers/infiniband/core/mad.c b/drivers/infiniband/core/mad.c > >index 7870823bac47..e166d584b80c 100644 > >--- a/drivers/infiniband/core/mad.c > >+++ b/drivers/infiniband/core/mad.c > >@@ -3,7 +3,7 @@ > > * Copyright (c) 2005 Intel Corporation. All rights reserved. > > * Copyright (c) 2005 Mellanox Technologies Ltd. All rights reserved. > > * Copyright (c) 2009 HNR Consulting. All rights reserved. > >- * Copyright (c) 2014 Intel Corporation. All rights reserved. > >+ * Copyright (c) 2014,2018 Intel Corporation. All rights reserved. > > * > > * This software is available to you under a choice of one of two > > * licenses. You may choose to be licensed under the terms of the GNU > >@@ -51,6 +51,51 @@ > > #include "opa_smi.h" > > #include "agent.h" > > > >+struct rdma_mad_trace_addr { > >+ u32 dlid; > >+ struct roce_ah_attr roce_addr; > >+ u8 sl; > >+ u16 pkey; > >+ u32 rqpn; > >+ u32 rqkey; > >+}; > >+static void trace_create_mad_addr(struct ib_device *dev, u8 pnum, > >+ struct ib_ud_wr *wr, > >+ struct rdma_mad_trace_addr *addr) > >+{ > >+ struct rdma_ah_attr attr; > >+ > >+ memset(&attr, 0, sizeof(attr)); > >+ rdma_query_ah(wr->ah, &attr); > >+ > >+ /* These are common */ > >+ addr->sl = attr.sl; > >+ ib_query_pkey(dev, pnum, wr->pkey_index, &addr->pkey); > >+ addr->rqpn = wr->remote_qpn; > >+ addr->rqkey = wr->remote_qkey; > >+ > >+ switch (attr.type) { > >+ case RDMA_AH_ATTR_TYPE_IB: > >+ addr->dlid = attr.ib.dlid; > >+ memset(&addr->roce_addr, 0, sizeof(addr->roce_addr)); > >+ break; > >+ case RDMA_AH_ATTR_TYPE_OPA: > >+ addr->dlid = attr.opa.dlid; > >+ memset(&addr->roce_addr, 0, sizeof(addr->roce_addr)); > >+ break; > >+ case RDMA_AH_ATTR_TYPE_ROCE: > >+ addr->dlid = 0; > >+ memcpy(&addr->roce_addr, &attr.roce, sizeof(addr- > >>roce_addr)); > >+ break; > >+ case RDMA_AH_ATTR_TYPE_UNDEFINED: > >+ addr->dlid = 0; > >+ memset(&addr->roce_addr, 0, sizeof(addr->roce_addr)); > >+ break; > >+ } > >+} > >+#define CREATE_TRACE_POINTS > >+#include <trace/events/ib_mad.h> > >+ > > static int mad_sendq_size = IB_MAD_QP_SEND_SIZE; > > static int mad_recvq_size = IB_MAD_QP_RECV_SIZE; > > > >@@ -1223,6 +1268,14 @@ int ib_send_mad(struct ib_mad_send_wr_private > >*mad_send_wr) > > > > spin_lock_irqsave(&qp_info->send_queue.lock, flags); > > if (qp_info->send_queue.count < qp_info->send_queue.max_active) > >{ > >+ if (trace_ib_mad_ib_send_mad_enabled()) { > >+ struct rdma_mad_trace_addr addr; > >+ > >+ trace_create_mad_addr(qp_info->port_priv->device, > >+ qp_info->port_priv->port_num, > >+ &mad_send_wr->send_wr, > >&addr); > >+ trace_ib_mad_ib_send_mad(mad_send_wr, &addr); > >+ } > > ret = ib_post_send(mad_agent->qp, &mad_send_wr- > >>send_wr.wr, > > NULL); > > list = &qp_info->send_queue.list; > > Ira, > > Are the trace_ib_mad_ib_send_mad_enabled() (and the other locations) > actually necessary? > > When you populate the TP_STRUCT_entry values, you can call a function to > get information (i.e. see the hfi1/trac_ibhdrs.h usage of hfi1_trace_packet_hdr_len(). > > You may be able to call the trace_create_mad_addr() there (will probably need > to pass qp_info into trac_ib_mad_ib_send_mad()) rather than having to have > this enable check in the code. How much of an optimization is this? Is it more for code cleanliness? I don't see how there would be much performance difference here. If it is for code cleanliness will you accept me cleaning it up later? Ira > > Mike > > > >@@ -2496,6 +2549,8 @@ static void ib_mad_send_done(struct ib_cq *cq, > >struct ib_wc *wc) > > send_queue = mad_list->mad_queue; > > qp_info = send_queue->qp_info; > > > >+ trace_ib_mad_send_done_handler(mad_send_wr, wc); > >+ > > retry: > > ib_dma_unmap_single(mad_send_wr->send_buf.mad_agent- > >>device, > > mad_send_wr->header_mapping, > >@@ -2527,6 +2582,14 @@ static void ib_mad_send_done(struct ib_cq *cq, > >struct ib_wc *wc) > > ib_mad_complete_send_wr(mad_send_wr, &mad_send_wc); > > > > if (queued_send_wr) { > >+ if (trace_ib_mad_send_done_resend_enabled()) { > >+ struct rdma_mad_trace_addr addr; > >+ > >+ trace_create_mad_addr(qp_info->port_priv->device, > >+ qp_info->port_priv->port_num, > >+ &mad_send_wr->send_wr, > >&addr); > >+ > > trace_ib_mad_send_done_resend(queued_send_wr, &addr); > >+ } > > ret = ib_post_send(qp_info->qp, &queued_send_wr- > >>send_wr.wr, > > NULL); > > if (ret) { > >@@ -2574,6 +2637,14 @@ static bool ib_mad_send_error(struct > >ib_mad_port_private *port_priv, > > if (mad_send_wr->retry) { > > /* Repost send */ > > mad_send_wr->retry = 0; > >+ if (trace_ib_mad_error_handler_enabled()) { > >+ struct rdma_mad_trace_addr addr; > >+ > >+ trace_create_mad_addr(qp_info->port_priv- > >>device, > >+ qp_info->port_priv- > >>port_num, > >+ &mad_send_wr->send_wr, > >&addr); > >+ trace_ib_mad_error_handler(mad_send_wr, > >&addr); > >+ } > > ret = ib_post_send(qp_info->qp, &mad_send_wr- > >>send_wr.wr, > > NULL); > > if (!ret) > >diff --git a/include/trace/events/ib_mad.h b/include/trace/events/ib_mad.h > >new file mode 100644 > >index 000000000000..30f72a29cccb > >--- /dev/null > >+++ b/include/trace/events/ib_mad.h > >@@ -0,0 +1,164 @@ > >+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB > >+ > >+/* > >+ * Copyright (c) 2018 Intel Corporation. All rights reserved. > >+ */ > >+ > >+#undef TRACE_SYSTEM > >+#define TRACE_SYSTEM ib_mad > >+ > >+#if !defined(_TRACE_IB_MAD_H) || defined(TRACE_HEADER_MULTI_READ) > >+#define _TRACE_IB_MAD_H > >+ > >+#include <linux/tracepoint.h> > >+#include <rdma/ib_mad.h> > >+ > >+DECLARE_EVENT_CLASS(ib_mad_send_template, > >+ TP_PROTO(struct ib_mad_send_wr_private *wr, struct > >rdma_mad_trace_addr *addr), > >+ TP_ARGS(wr, addr), > >+ > >+ TP_STRUCT__entry( > >+ __field(u8, base_version) > >+ __field(u8, mgmt_class) > >+ __field(u8, class_version) > >+ __field(u8, port_num) > >+ __field(u32, qp_num) > >+ __field(u8, method) > >+ __field(u8, sl) > >+ __field(u16, attr_id) > >+ __field(u32, attr_mod) > >+ __field(u64, wrtid) > >+ __field(u64, tid) > >+ __field(u16, status) > >+ __field(u16, class_specific) > >+ __field(u32, length) > >+ __field(u32, dlid) > >+ __field(u32, rqpn) > >+ __field(u32, rqkey) > >+ __field(u32, dev_index) > >+ __field(void *, agent_priv) > >+ __field(unsigned long, timeout) > >+ __field(int, retries_left) > >+ __field(int, max_retries) > >+ __field(int, retry) > >+ __field(u16, pkey) > >+ ), > >+ > >+ TP_fast_assign( > >+ __entry->dev_index = wr->mad_agent_priv->agent.device- > >>index; > >+ __entry->port_num = wr->mad_agent_priv- > >>agent.port_num; > >+ __entry->qp_num = wr->mad_agent_priv->qp_info->qp- > >>qp_num; > >+ __entry->agent_priv = wr->mad_agent_priv; > >+ __entry->wrtid = wr->tid; > >+ __entry->max_retries = wr->max_retries; > >+ __entry->retries_left = wr->retries_left; > >+ __entry->retry = wr->retry; > >+ __entry->timeout = wr->timeout; > >+ __entry->length = wr->send_buf.hdr_len + > >+ wr->send_buf.data_len; > >+ __entry->base_version = ((struct ib_mad_hdr *)wr- > >>send_buf.mad)->base_version; > >+ __entry->mgmt_class = ((struct ib_mad_hdr *)wr- > >>send_buf.mad)->mgmt_class; > >+ __entry->class_version = ((struct ib_mad_hdr *)wr- > >>send_buf.mad)->class_version; > >+ __entry->method = ((struct ib_mad_hdr *)wr- > >>send_buf.mad)->method; > >+ __entry->status = ((struct ib_mad_hdr *)wr- > >>send_buf.mad)->status; > >+ __entry->class_specific = ((struct ib_mad_hdr *)wr- > >>send_buf.mad)->class_specific; > >+ __entry->tid = ((struct ib_mad_hdr *)wr->send_buf.mad)- > >>tid; > >+ __entry->attr_id = ((struct ib_mad_hdr *)wr- > >>send_buf.mad)->attr_id; > >+ __entry->attr_mod = ((struct ib_mad_hdr *)wr- > >>send_buf.mad)->attr_mod; > >+ __entry->dlid = addr->dlid; > >+ __entry->sl = addr->sl; > >+ __entry->pkey = addr->pkey; > >+ __entry->rqpn = addr->rqpn; > >+ __entry->rqkey = addr->rqkey; > >+ ), > >+ > >+ TP_printk("%d:%d QP%d agent %p: " \ > >+ "wrtid 0x%llx; %d/%d retries(%d); timeout %lu length %d : > >hdr : " \ > >+ "base_ver 0x%x class 0x%x class_ver 0x%x method 0x%x " \ > >+ "status 0x%x class_specific 0x%x tid 0x%llx attr_id 0x%x > >attr_mod 0x%x " \ > >+ " => dlid 0x%08x sl %d pkey 0x%x rpqn 0x%x rqpkey 0x%x", > >+ __entry->dev_index, __entry->port_num, __entry- > >>qp_num, > >+ __entry->agent_priv, be64_to_cpu(__entry->wrtid), > >+ __entry->retries_left, __entry->max_retries, > >+ __entry->retry, __entry->timeout, __entry->length, > >+ __entry->base_version, __entry->mgmt_class, __entry- > >>class_version, > >+ __entry->method, be16_to_cpu(__entry->status), > >+ be16_to_cpu(__entry->class_specific), > >+ be64_to_cpu(__entry->tid), be16_to_cpu(__entry->attr_id), > >+ be32_to_cpu(__entry->attr_mod), > >+ be32_to_cpu(__entry->dlid), __entry->sl, __entry->pkey, > >__entry->rqpn, > >+ __entry->rqkey > >+ ) > >+); > >+ > >+DEFINE_EVENT(ib_mad_send_template, ib_mad_error_handler, > >+ TP_PROTO(struct ib_mad_send_wr_private *wr, struct > >rdma_mad_trace_addr *addr), > >+ TP_ARGS(wr, addr)); > >+DEFINE_EVENT(ib_mad_send_template, ib_mad_ib_send_mad, > >+ TP_PROTO(struct ib_mad_send_wr_private *wr, struct > >rdma_mad_trace_addr *addr), > >+ TP_ARGS(wr, addr)); > >+DEFINE_EVENT(ib_mad_send_template, ib_mad_send_done_resend, > >+ TP_PROTO(struct ib_mad_send_wr_private *wr, struct > >rdma_mad_trace_addr *addr), > >+ TP_ARGS(wr, addr)); > >+ > >+TRACE_EVENT(ib_mad_send_done_handler, > >+ TP_PROTO(struct ib_mad_send_wr_private *wr, struct ib_wc *wc), > >+ TP_ARGS(wr, wc), > >+ > >+ TP_STRUCT__entry( > >+ __field(u8, port_num) > >+ __field(u8, base_version) > >+ __field(u8, mgmt_class) > >+ __field(u8, class_version) > >+ __field(u32, qp_num) > >+ __field(u64, wrtid) > >+ __field(u16, status) > >+ __field(u16, wc_status) > >+ __field(u32, length) > >+ __field(void *, agent_priv) > >+ __field(unsigned long, timeout) > >+ __field(u32, dev_index) > >+ __field(int, retries_left) > >+ __field(int, max_retries) > >+ __field(int, retry) > >+ __field(u8, method) > >+ ), > >+ > >+ TP_fast_assign( > >+ __entry->dev_index = wr->mad_agent_priv->agent.device- > >>index; > >+ __entry->port_num = wr->mad_agent_priv- > >>agent.port_num; > >+ __entry->qp_num = wr->mad_agent_priv->qp_info->qp- > >>qp_num; > >+ __entry->agent_priv = wr->mad_agent_priv; > >+ __entry->wrtid = wr->tid; > >+ __entry->max_retries = wr->max_retries; > >+ __entry->retries_left = wr->retries_left; > >+ __entry->retry = wr->retry; > >+ __entry->timeout = wr->timeout; > >+ __entry->base_version = ((struct ib_mad_hdr *)wr- > >>send_buf.mad)->base_version; > >+ __entry->mgmt_class = ((struct ib_mad_hdr *)wr- > >>send_buf.mad)->mgmt_class; > >+ __entry->class_version = ((struct ib_mad_hdr *)wr- > >>send_buf.mad)->class_version; > >+ __entry->method = ((struct ib_mad_hdr *)wr- > >>send_buf.mad)->method; > >+ __entry->status = ((struct ib_mad_hdr *)wr- > >>send_buf.mad)->status; > >+ __entry->wc_status = wc->status; > >+ __entry->length = wc->byte_len; > >+ ), > >+ > >+ TP_printk("%d:%d QP%d : SEND WC Status %d : agent %p: " \ > >+ "wrtid 0x%llx %d/%d retries(%d) timeout %lu length %d: hdr : > >" \ > >+ "base_ver 0x%x class 0x%x class_ver 0x%x method 0x%x " \ > >+ "status 0x%x", > >+ __entry->dev_index, __entry->port_num, __entry- > >>qp_num, > >+ __entry->wc_status, > >+ __entry->agent_priv, be64_to_cpu(__entry->wrtid), > >+ __entry->retries_left, __entry->max_retries, > >+ __entry->retry, __entry->timeout, > >+ __entry->length, > >+ __entry->base_version, __entry->mgmt_class, __entry- > >>class_version, > >+ __entry->method, be16_to_cpu(__entry->status) > >+ ) > >+); > >+ > >+ > >+#endif /* _TRACE_IB_MAD_H */ > >+ > >+#include <trace/define_trace.h> > >-- > >2.20.1 >