Add timestamp support in rc_pingpong, it can serve as some example of using ibv_create_cq_ex verb and the new ibv_wc_read_xxx accessors. Signed-off-by: Yishai Hadas <yishaih@xxxxxxxxxxxx> Reviewed-by: Matan Barak <matanb@xxxxxxxxxxxx> --- examples/rc_pingpong.c | 278 ++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 215 insertions(+), 63 deletions(-) diff --git a/examples/rc_pingpong.c b/examples/rc_pingpong.c index 90a8320..8d58357 100644 --- a/examples/rc_pingpong.c +++ b/examples/rc_pingpong.c @@ -49,6 +49,14 @@ #include "pingpong.h" +#ifndef max +#define max(x, y) (((x) > (y)) ? (x) : (y)) +#endif + +#ifndef min +#define min(x, y) (((x) < (y)) ? (x) : (y)) +#endif + enum { PINGPONG_RECV_WRID = 1, PINGPONG_SEND_WRID = 2, @@ -56,13 +64,17 @@ enum { static int page_size; static int use_odp; +static int use_ts; struct pingpong_context { struct ibv_context *context; struct ibv_comp_channel *channel; struct ibv_pd *pd; struct ibv_mr *mr; - struct ibv_cq *cq; + union { + struct ibv_cq *cq; + struct ibv_cq_ex *cq_ex; + } cq_s; struct ibv_qp *qp; void *buf; int size; @@ -70,8 +82,15 @@ struct pingpong_context { int rx_depth; int pending; struct ibv_port_attr portinfo; + uint64_t completion_timestamp_mask; }; +struct ibv_cq *pp_cq(struct pingpong_context *ctx) +{ + return use_ts ? ibv_cq_ex_to_cq(ctx->cq_s.cq_ex) : + ctx->cq_s.cq; +} + struct pingpong_dest { int lid; int qpn; @@ -357,7 +376,7 @@ static struct pingpong_context *pp_init_ctx(struct ibv_device *ib_dev, int size, goto clean_comp_channel; } - if (use_odp) { + if (use_odp || use_ts) { const uint32_t rc_caps_mask = IBV_ODP_SUPPORT_SEND | IBV_ODP_SUPPORT_RECV; struct ibv_device_attr_ex attrx; @@ -367,12 +386,22 @@ static struct pingpong_context *pp_init_ctx(struct ibv_device *ib_dev, int size, goto clean_comp_channel; } - if (!(attrx.odp_caps.general_caps & IBV_ODP_SUPPORT) || - (attrx.odp_caps.per_transport_caps.rc_odp_caps & rc_caps_mask) != rc_caps_mask) { - fprintf(stderr, "The device isn't ODP capable or does not support RC send and receive with ODP\n"); - goto clean_comp_channel; + if (use_odp) { + if (!(attrx.odp_caps.general_caps & IBV_ODP_SUPPORT) || + (attrx.odp_caps.per_transport_caps.rc_odp_caps & rc_caps_mask) != rc_caps_mask) { + fprintf(stderr, "The device isn't ODP capable or does not support RC send and receive with ODP\n"); + goto clean_comp_channel; + } + access_flags |= IBV_ACCESS_ON_DEMAND; + } + + if (use_ts) { + if (!attrx.completion_timestamp_mask) { + fprintf(stderr, "The device isn't completion timestamp capable\n"); + goto clean_comp_channel; + } + ctx->completion_timestamp_mask = attrx.completion_timestamp_mask; } - access_flags |= IBV_ACCESS_ON_DEMAND; } ctx->mr = ibv_reg_mr(ctx->pd, ctx->buf, size, access_flags); @@ -381,9 +410,22 @@ static struct pingpong_context *pp_init_ctx(struct ibv_device *ib_dev, int size, goto clean_pd; } - ctx->cq = ibv_create_cq(ctx->context, rx_depth + 1, NULL, - ctx->channel, 0); - if (!ctx->cq) { + if (use_ts) { + struct ibv_cq_init_attr_ex attr_ex = { + .cqe = rx_depth + 1, + .cq_context = NULL, + .channel = ctx->channel, + .comp_vector = 0, + .wc_flags = IBV_WC_EX_WITH_COMPLETION_TIMESTAMP + }; + + ctx->cq_s.cq_ex = ibv_create_cq_ex(ctx->context, &attr_ex); + } else { + ctx->cq_s.cq = ibv_create_cq(ctx->context, rx_depth + 1, NULL, + ctx->channel, 0); + } + + if (!pp_cq(ctx)) { fprintf(stderr, "Couldn't create CQ\n"); goto clean_mr; } @@ -391,8 +433,8 @@ static struct pingpong_context *pp_init_ctx(struct ibv_device *ib_dev, int size, { struct ibv_qp_attr attr; struct ibv_qp_init_attr init_attr = { - .send_cq = ctx->cq, - .recv_cq = ctx->cq, + .send_cq = pp_cq(ctx), + .recv_cq = pp_cq(ctx), .cap = { .max_send_wr = 1, .max_recv_wr = rx_depth, @@ -438,7 +480,7 @@ clean_qp: ibv_destroy_qp(ctx->qp); clean_cq: - ibv_destroy_cq(ctx->cq); + ibv_destroy_cq(pp_cq(ctx)); clean_mr: ibv_dereg_mr(ctx->mr); @@ -469,7 +511,7 @@ int pp_close_ctx(struct pingpong_context *ctx) return 1; } - if (ibv_destroy_cq(ctx->cq)) { + if (ibv_destroy_cq(pp_cq(ctx))) { fprintf(stderr, "Couldn't destroy CQ\n"); return 1; } @@ -543,6 +585,89 @@ static int pp_post_send(struct pingpong_context *ctx) return ibv_post_send(ctx->qp, &wr, &bad_wr); } +struct ts_params { + unsigned int comp_recv_max_time_delta; + unsigned int comp_recv_min_time_delta; + uint64_t comp_recv_total_time_delta; + uint64_t comp_recv_prev_time; + int last_comp_with_ts; + unsigned int comp_with_time_iters; +}; + +static inline int parse_single_wc(struct pingpong_context *ctx, int *scnt, + int *rcnt, int *routs, int iters, + uint64_t wr_id, enum ibv_wc_status status, + uint64_t completion_timestamp, + struct ts_params *ts) +{ + if (status != IBV_WC_SUCCESS) { + fprintf(stderr, "Failed status %s (%d) for wr_id %d\n", + ibv_wc_status_str(status), + status, (int)wr_id); + return 1; + } + + switch ((int)wr_id) { + case PINGPONG_SEND_WRID: + ++(*scnt); + break; + + case PINGPONG_RECV_WRID: + if (--(*routs) <= 1) { + *routs += pp_post_recv(ctx, ctx->rx_depth - *routs); + if (*routs < ctx->rx_depth) { + fprintf(stderr, + "Couldn't post receive (%d)\n", + *routs); + return 1; + } + } + + ++(*rcnt); + if (use_ts) { + if (ts->last_comp_with_ts) { + uint64_t delta; + + /* checking whether the clock was wrapped around */ + if (completion_timestamp >= ts->comp_recv_prev_time) + delta = completion_timestamp - ts->comp_recv_prev_time; + else + delta = ctx->completion_timestamp_mask - ts->comp_recv_prev_time + + completion_timestamp + 1; + + ts->comp_recv_max_time_delta = max(ts->comp_recv_max_time_delta, delta); + ts->comp_recv_min_time_delta = min(ts->comp_recv_min_time_delta, delta); + ts->comp_recv_total_time_delta += delta; + ts->comp_with_time_iters++; + } + + ts->comp_recv_prev_time = completion_timestamp; + ts->last_comp_with_ts = 1; + } else { + ts->last_comp_with_ts = 0; + } + + break; + + default: + fprintf(stderr, "Completion for unknown wr_id %d\n", + (int)wr_id); + return 1; + } + + ctx->pending &= ~(int)wr_id; + if (*scnt < iters && !ctx->pending) { + if (pp_post_send(ctx)) { + fprintf(stderr, "Couldn't post send\n"); + return 1; + } + ctx->pending = PINGPONG_RECV_WRID | + PINGPONG_SEND_WRID; + } + + return 0; +} + static void usage(const char *argv0) { printf("Usage:\n"); @@ -561,6 +686,7 @@ static void usage(const char *argv0) printf(" -e, --events sleep on CQ events (default poll)\n"); printf(" -g, --gid-idx=<gid index> local port gid index\n"); printf(" -o, --odp use on demand paging\n"); + printf(" -t, --ts get CQE with timestamp\n"); } int main(int argc, char *argv[]) @@ -586,6 +712,7 @@ int main(int argc, char *argv[]) int sl = 0; int gidx = -1; char gid[33]; + struct ts_params ts; srand48(getpid() * time(NULL)); @@ -604,11 +731,12 @@ int main(int argc, char *argv[]) { .name = "events", .has_arg = 0, .val = 'e' }, { .name = "gid-idx", .has_arg = 1, .val = 'g' }, { .name = "odp", .has_arg = 0, .val = 'o' }, + { .name = "ts", .has_arg = 0, .val = 't' }, { 0 } }; - c = getopt_long(argc, argv, "p:d:i:s:m:r:n:l:eg:o", - long_options, NULL); + c = getopt_long(argc, argv, "p:d:i:s:m:r:n:l:eg:ot", + long_options, NULL); if (c == -1) break; @@ -669,6 +797,9 @@ int main(int argc, char *argv[]) case 'o': use_odp = 1; break; + case 't': + use_ts = 1; + break; default: usage(argv[0]); @@ -683,6 +814,15 @@ int main(int argc, char *argv[]) return 1; } + if (use_ts) { + ts.comp_recv_max_time_delta = 0; + ts.comp_recv_min_time_delta = 0xffffffff; + ts.comp_recv_total_time_delta = 0; + ts.comp_recv_prev_time = 0; + ts.last_comp_with_ts = 0; + ts.comp_with_time_iters = 0; + } + page_size = sysconf(_SC_PAGESIZE); dev_list = ibv_get_device_list(NULL); @@ -720,7 +860,7 @@ int main(int argc, char *argv[]) } if (use_event) - if (ibv_req_notify_cq(ctx->cq, 0)) { + if (ibv_req_notify_cq(pp_cq(ctx), 0)) { fprintf(stderr, "Couldn't request CQ notification\n"); return 1; } @@ -788,6 +928,8 @@ int main(int argc, char *argv[]) rcnt = scnt = 0; while (rcnt < iters || scnt < iters) { + int ret; + if (use_event) { struct ibv_cq *ev_cq; void *ev_ctx; @@ -799,72 +941,73 @@ int main(int argc, char *argv[]) ++num_cq_events; - if (ev_cq != ctx->cq) { + if (ev_cq != pp_cq(ctx)) { fprintf(stderr, "CQ event for unknown CQ %p\n", ev_cq); return 1; } - if (ibv_req_notify_cq(ctx->cq, 0)) { + if (ibv_req_notify_cq(pp_cq(ctx), 0)) { fprintf(stderr, "Couldn't request CQ notification\n"); return 1; } } - { - struct ibv_wc wc[2]; + if (use_ts) { + struct ibv_poll_cq_attr attr = {}; + + do { + ret = ibv_start_poll(ctx->cq_s.cq_ex, &attr); + } while (!use_event && ret == ENOENT); + + if (ret) { + fprintf(stderr, "poll CQ failed %d\n", ret); + return ret; + } + ret = parse_single_wc(ctx, &scnt, &rcnt, &routs, + iters, + ctx->cq_s.cq_ex->wr_id, + ctx->cq_s.cq_ex->status, + ibv_wc_read_completion_ts(ctx->cq_s.cq_ex), + &ts); + if (ret) { + ibv_end_poll(ctx->cq_s.cq_ex); + return ret; + } + ret = ibv_next_poll(ctx->cq_s.cq_ex); + if (!ret) + ret = parse_single_wc(ctx, &scnt, &rcnt, &routs, + iters, + ctx->cq_s.cq_ex->wr_id, + ctx->cq_s.cq_ex->status, + ibv_wc_read_completion_ts(ctx->cq_s.cq_ex), + &ts); + ibv_end_poll(ctx->cq_s.cq_ex); + if (ret && ret != ENOENT) { + fprintf(stderr, "poll CQ failed %d\n", ret); + return ret; + } + } else { int ne, i; + struct ibv_wc wc[2]; do { - ne = ibv_poll_cq(ctx->cq, 2, wc); + ne = ibv_poll_cq(pp_cq(ctx), 2, wc); if (ne < 0) { fprintf(stderr, "poll CQ failed %d\n", ne); return 1; } - } while (!use_event && ne < 1); for (i = 0; i < ne; ++i) { - if (wc[i].status != IBV_WC_SUCCESS) { - fprintf(stderr, "Failed status %s (%d) for wr_id %d\n", - ibv_wc_status_str(wc[i].status), - wc[i].status, (int) wc[i].wr_id); + ret = parse_single_wc(ctx, &scnt, &rcnt, &routs, + iters, + wc[i].wr_id, + wc[i].status, + 0, &ts); + if (ret) { + fprintf(stderr, "parse WC failed %d\n", ne); return 1; } - - switch ((int) wc[i].wr_id) { - case PINGPONG_SEND_WRID: - ++scnt; - break; - - case PINGPONG_RECV_WRID: - if (--routs <= 1) { - routs += pp_post_recv(ctx, ctx->rx_depth - routs); - if (routs < ctx->rx_depth) { - fprintf(stderr, - "Couldn't post receive (%d)\n", - routs); - return 1; - } - } - - ++rcnt; - break; - - default: - fprintf(stderr, "Completion for unknown wr_id %d\n", - (int) wc[i].wr_id); - return 1; - } - - ctx->pending &= ~(int) wc[i].wr_id; - if (scnt < iters && !ctx->pending) { - if (pp_post_send(ctx)) { - fprintf(stderr, "Couldn't post send\n"); - return 1; - } - ctx->pending = PINGPONG_RECV_WRID | - PINGPONG_SEND_WRID; - } } } } @@ -883,9 +1026,18 @@ int main(int argc, char *argv[]) bytes, usec / 1000000., bytes * 8. / usec); printf("%d iters in %.2f seconds = %.2f usec/iter\n", iters, usec / 1000000., usec / iters); + + if (use_ts && ts.comp_with_time_iters) { + printf("Max receive completion clock cycles = %u\n", + ts.comp_recv_max_time_delta); + printf("Min receive completion clock cycles = %u\n", + ts.comp_recv_min_time_delta); + printf("Average receive completion clock cycles = %f\n", + (double)ts.comp_recv_total_time_delta / ts.comp_with_time_iters); + } } - ibv_ack_cq_events(ctx->cq, num_cq_events); + ibv_ack_cq_events(pp_cq(ctx), num_cq_events); if (pp_close_ctx(ctx)) return 1; -- 1.8.3.1 -- To unsubscribe from this list: send the line "unsubscribe linux-rdma" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html