Add trace events for hbl_cn to track memory allocations. Signed-off-by: Omer Shpigelman <oshpigelman@xxxxxxxxx> Co-developed-by: Abhilash K V <kvabhilash@xxxxxxxxx> Signed-off-by: Abhilash K V <kvabhilash@xxxxxxxxx> Co-developed-by: Andrey Agranovich <aagranovich@xxxxxxxxx> Signed-off-by: Andrey Agranovich <aagranovich@xxxxxxxxx> Co-developed-by: Bharat Jauhari <bjauhari@xxxxxxxxx> Signed-off-by: Bharat Jauhari <bjauhari@xxxxxxxxx> Co-developed-by: David Meriin <dmeriin@xxxxxxxxx> Signed-off-by: David Meriin <dmeriin@xxxxxxxxx> Co-developed-by: Sagiv Ozeri <sozeri@xxxxxxxxx> Signed-off-by: Sagiv Ozeri <sozeri@xxxxxxxxx> Co-developed-by: Zvika Yehudai <zyehudai@xxxxxxxxx> Signed-off-by: Zvika Yehudai <zyehudai@xxxxxxxxx> --- .../net/ethernet/intel/hbl_cn/common/hbl_cn.c | 28 ++++- .../ethernet/intel/hbl_cn/common/hbl_cn_drv.c | 3 + .../intel/hbl_cn/common/hbl_cn_memory.c | 9 ++ include/trace/events/habanalabs_cn.h | 116 ++++++++++++++++++ 4 files changed, 154 insertions(+), 2 deletions(-) create mode 100644 include/trace/events/habanalabs_cn.h diff --git a/drivers/net/ethernet/intel/hbl_cn/common/hbl_cn.c b/drivers/net/ethernet/intel/hbl_cn/common/hbl_cn.c index 946b11bfa61b..4e910b2cb8ac 100644 --- a/drivers/net/ethernet/intel/hbl_cn/common/hbl_cn.c +++ b/drivers/net/ethernet/intel/hbl_cn/common/hbl_cn.c @@ -12,6 +12,8 @@ #include <linux/pci.h> #include <linux/slab.h> +#include <trace/events/habanalabs_cn.h> + #define NIC_MIN_WQS_PER_PORT 2 #define NIC_SEQ_RESETS_TIMEOUT_MS 15000 /* 15 seconds */ @@ -5892,8 +5894,15 @@ void *__hbl_cn_dma_alloc_coherent(struct hbl_cn_device *hdev, size_t size, dma_a gfp_t flag, const char *caller) { const struct hbl_cn_asic_funcs *asic_funcs = hdev->asic_funcs; + void *ptr; + + ptr = asic_funcs->dma_alloc_coherent(hdev, size, dma_handle, flag); - return asic_funcs->dma_alloc_coherent(hdev, size, dma_handle, flag); + if (trace_habanalabs_cn_dma_alloc_coherent_enabled()) + trace_habanalabs_cn_dma_alloc_coherent(hdev->dev, (u64)(uintptr_t)ptr, *dma_handle, + size, caller); + + return ptr; } void __hbl_cn_dma_free_coherent(struct hbl_cn_device *hdev, size_t size, void *cpu_addr, @@ -5902,14 +5911,25 @@ void __hbl_cn_dma_free_coherent(struct hbl_cn_device *hdev, size_t size, void *c const struct hbl_cn_asic_funcs *asic_funcs = hdev->asic_funcs; asic_funcs->dma_free_coherent(hdev, size, cpu_addr, dma_addr); + + if (trace_habanalabs_cn_dma_free_coherent_enabled()) + trace_habanalabs_cn_dma_free_coherent(hdev->dev, (u64)(uintptr_t)cpu_addr, dma_addr, + size, caller); } void *__hbl_cn_dma_pool_zalloc(struct hbl_cn_device *hdev, size_t size, gfp_t mem_flags, dma_addr_t *dma_handle, const char *caller) { const struct hbl_cn_asic_funcs *asic_funcs = hdev->asic_funcs; + void *ptr; - return asic_funcs->dma_pool_zalloc(hdev, size, mem_flags, dma_handle); + ptr = asic_funcs->dma_pool_zalloc(hdev, size, mem_flags, dma_handle); + + if (trace_habanalabs_cn_dma_pool_zalloc_enabled()) + trace_habanalabs_cn_dma_pool_zalloc(hdev->dev, (u64)(uintptr_t)ptr, *dma_handle, + size, caller); + + return ptr; } void __hbl_cn_dma_pool_free(struct hbl_cn_device *hdev, void *vaddr, dma_addr_t dma_addr, @@ -5918,6 +5938,10 @@ void __hbl_cn_dma_pool_free(struct hbl_cn_device *hdev, void *vaddr, dma_addr_t const struct hbl_cn_asic_funcs *asic_funcs = hdev->asic_funcs; asic_funcs->dma_pool_free(hdev, vaddr, dma_addr); + + if (trace_habanalabs_cn_dma_pool_free_enabled()) + trace_habanalabs_cn_dma_pool_free(hdev->dev, (u64)(uintptr_t)vaddr, dma_addr, 0, + caller); } int hbl_cn_get_reg_pcie_addr(struct hbl_cn_device *hdev, u8 bar_id, u32 reg, u64 *pci_addr) diff --git a/drivers/net/ethernet/intel/hbl_cn/common/hbl_cn_drv.c b/drivers/net/ethernet/intel/hbl_cn/common/hbl_cn_drv.c index 47eedd27f36e..5ea690509592 100644 --- a/drivers/net/ethernet/intel/hbl_cn/common/hbl_cn_drv.c +++ b/drivers/net/ethernet/intel/hbl_cn/common/hbl_cn_drv.c @@ -12,6 +12,9 @@ #include <linux/auxiliary_bus.h> #include <linux/sched/clock.h> +#define CREATE_TRACE_POINTS +#include <trace/events/habanalabs_cn.h> + #define HBL_DRIVER_AUTHOR "HabanaLabs Kernel Driver Team" #define HBL_DRIVER_DESC "HabanaLabs AI accelerators Core Network driver" diff --git a/drivers/net/ethernet/intel/hbl_cn/common/hbl_cn_memory.c b/drivers/net/ethernet/intel/hbl_cn/common/hbl_cn_memory.c index 878ecba66aa3..305b5b85acbe 100644 --- a/drivers/net/ethernet/intel/hbl_cn/common/hbl_cn_memory.c +++ b/drivers/net/ethernet/intel/hbl_cn/common/hbl_cn_memory.c @@ -6,6 +6,7 @@ #include <linux/vmalloc.h> #include "hbl_cn.h" +#include <trace/events/habanalabs_cn.h> static int hbl_cn_map_vmalloc_range(struct hbl_cn_ctx *ctx, u64 vmalloc_va, u64 device_va, u64 size) @@ -201,12 +202,16 @@ static struct hbl_cn_mem_buf *cn_mem_buf_alloc(struct hbl_cn_ctx *ctx, gfp_t gfp static int cn_mem_alloc(struct hbl_cn_ctx *ctx, struct hbl_cn_mem_data *mem_data) { + struct hbl_cn_device *hdev = ctx->hdev; struct hbl_cn_mem_buf *buf; buf = cn_mem_buf_alloc(ctx, GFP_KERNEL, mem_data); if (!buf) return -ENOMEM; + trace_habanalabs_cn_mem_alloc(hdev->dev, buf->mem_id, buf->handle, (u64)buf->kernel_address, + buf->bus_address, buf->device_va, buf->mappable_size); + mem_data->handle = buf->handle; if (mem_data->mem_id == HBL_CN_DRV_MEM_HOST_DMA_COHERENT) @@ -242,6 +247,10 @@ int hbl_cn_mem_alloc(struct hbl_cn_ctx *ctx, struct hbl_cn_mem_data *mem_data) static void cn_mem_buf_destroy(struct hbl_cn_mem_buf *buf) { + trace_habanalabs_cn_mem_destroy(buf->ctx->hdev->dev, buf->mem_id, buf->handle, + (u64)buf->kernel_address, buf->bus_address, buf->device_va, + buf->mappable_size); + if (buf->device_va) hbl_cn_unmap_vmalloc_range(buf->ctx, buf->device_va, buf->mappable_size); diff --git a/include/trace/events/habanalabs_cn.h b/include/trace/events/habanalabs_cn.h new file mode 100644 index 000000000000..aca962cf3130 --- /dev/null +++ b/include/trace/events/habanalabs_cn.h @@ -0,0 +1,116 @@ +/* SPDX-License-Identifier: GPL-2.0 + * + * Copyright 2023 HabanaLabs, Ltd. + * Copyright (C) 2023-2024, Intel Corporation. + * All Rights Reserved. + * + */ + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM habanalabs_cn + +#if !defined(_TRACE_HABANALABS_CN_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_HABANALABS_CN_H + +#include <linux/tracepoint.h> + +DECLARE_EVENT_CLASS(habanalabs_cn_mem_template, + TP_PROTO(struct device *dev, u32 mem_id, u64 handle, u64 kernel_addr, u64 bus_addr, + u64 device_va, size_t size), + + TP_ARGS(dev, mem_id, handle, kernel_addr, bus_addr, device_va, size), + + TP_STRUCT__entry( + __string(dname, dev_name(dev)) + __field(u32, mem_id) + __field(u64, handle) + __field(u64, kernel_addr) + __field(u64, bus_addr) + __field(u64, device_va) + __field(u32, size) + ), + + TP_fast_assign( + __assign_str(dname); + __entry->mem_id = mem_id; + __entry->handle = handle; + __entry->kernel_addr = kernel_addr; + __entry->bus_addr = bus_addr; + __entry->device_va = device_va; + __entry->size = size; + ), + + TP_printk("%s: mem_id: %#x, handle: %#llx, kernel_addr: %#llx, bus_addr: %#llx, device_va: %#llx, size: %#x", + __get_str(dname), + __entry->mem_id, + __entry->handle, + __entry->kernel_addr, + __entry->bus_addr, + __entry->device_va, + __entry->size) +); + +DEFINE_EVENT(habanalabs_cn_mem_template, habanalabs_cn_mem_alloc, + TP_PROTO(struct device *dev, u32 mem_id, u64 handle, u64 kernel_addr, u64 bus_addr, + u64 device_va, size_t size), + TP_ARGS(dev, mem_id, handle, kernel_addr, bus_addr, device_va, size)); + +DEFINE_EVENT(habanalabs_cn_mem_template, habanalabs_cn_mem_destroy, + TP_PROTO(struct device *dev, u32 mem_id, u64 handle, u64 kernel_addr, u64 bus_addr, + u64 device_va, size_t size), + TP_ARGS(dev, mem_id, handle, kernel_addr, bus_addr, device_va, size)); + +DECLARE_EVENT_CLASS(habanalabs_cn_dma_alloc_template, + TP_PROTO(struct device *dev, u64 cpu_addr, u64 dma_addr, size_t size, const char *caller), + + TP_ARGS(dev, cpu_addr, dma_addr, size, caller), + + TP_STRUCT__entry( + __string(dname, dev_name(dev)) + __field(u64, cpu_addr) + __field(u64, dma_addr) + __field(u32, size) + __field(const char *, caller) + ), + + TP_fast_assign( + __assign_str(dname); + __entry->cpu_addr = cpu_addr; + __entry->dma_addr = dma_addr; + __entry->size = size; + __entry->caller = caller; + ), + + TP_printk("%s: cpu_addr: %#llx, dma_addr: %#llx, size: %#x, caller: %s", + __get_str(dname), + __entry->cpu_addr, + __entry->dma_addr, + __entry->size, + __entry->caller + ) +); + +DEFINE_EVENT(habanalabs_cn_dma_alloc_template, habanalabs_cn_dma_alloc_coherent, + TP_PROTO(struct device *dev, u64 cpu_addr, u64 dma_addr, size_t size, + const char *caller), + TP_ARGS(dev, cpu_addr, dma_addr, size, caller)); + +DEFINE_EVENT(habanalabs_cn_dma_alloc_template, habanalabs_cn_dma_free_coherent, + TP_PROTO(struct device *dev, u64 cpu_addr, u64 dma_addr, size_t size, + const char *caller), + TP_ARGS(dev, cpu_addr, dma_addr, size, caller)); + +DEFINE_EVENT(habanalabs_cn_dma_alloc_template, habanalabs_cn_dma_pool_zalloc, + TP_PROTO(struct device *dev, u64 cpu_addr, u64 dma_addr, size_t size, + const char *caller), + TP_ARGS(dev, cpu_addr, dma_addr, size, caller)); + +DEFINE_EVENT(habanalabs_cn_dma_alloc_template, habanalabs_cn_dma_pool_free, + TP_PROTO(struct device *dev, u64 cpu_addr, u64 dma_addr, size_t size, + const char *caller), + TP_ARGS(dev, cpu_addr, dma_addr, size, caller)); + +#endif /* if !defined(_TRACE_HABANALABS_CN_H) || defined(TRACE_HEADER_MULTI_READ) */ + +/* This part must be outside protection */ +#include <trace/define_trace.h> -- 2.34.1