[PATCH bpf-next v2 1/1] libbpf: perfbuf: allow raw access to buffers

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



From: Jon Doron <jond@xxxxxx>

Add support for writing a custom event reader, by exposing the ring
buffer state, and allowing to set it's tail.

Few simple examples where this type of needed:
1. perf_event_read_simple is allocating using malloc, perhaps you want
   to handle the wrap-around in some other way.
2. Since perf buf is per-cpu then the order of the events is not
   guarnteed, for example:
   Given 3 events where each event has a timestamp t0 < t1 < t2,
   and the events are spread on more than 1 CPU, then we can end
   up with the following state in the ring buf:
   CPU[0] => [t0, t2]
   CPU[1] => [t1]
   When you consume the events from CPU[0], you could know there is
   a t1 missing, (assuming there are no drops, and your event data
   contains a sequential index).
   So now one can simply do the following, for CPU[0], you can store
   the address of t0 and t2 in an array (without moving the tail, so
   there data is not perished) then move on the CPU[1] and set the
   address of t1 in the same array.
   So you end up with something like:
   void **arr[] = [&t0, &t1, &t2], now you can consume it orderely
   and move the tails as you process in order.
3. Assuming there are multiple CPUs and we want to start draining the
   messages from them, then we can "pick" with which one to start with
   according to the remaining free space in the ring buffer.

Signed-off-by: Jon Doron <jond@xxxxxx>
---
 tools/lib/bpf/libbpf.c   | 40 ++++++++++++++++++++++++++++++++++++++++
 tools/lib/bpf/libbpf.h   | 25 +++++++++++++++++++++++++
 tools/lib/bpf/libbpf.map |  2 ++
 3 files changed, 67 insertions(+)

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index e89cc9c885b3..37299aa05185 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -12433,6 +12433,46 @@ static int perf_buffer__process_records(struct perf_buffer *pb,
 	return 0;
 }
 
+int perf_buffer__raw_ring_buf(const struct perf_buffer *pb, size_t buf_idx,
+			      void **base, size_t *buf_size, __u64 *head,
+			      __u64 *tail)
+{
+	struct perf_cpu_buf *cpu_buf;
+	struct perf_event_mmap_page *header;
+
+	if (buf_idx >= pb->cpu_cnt)
+		return libbpf_err(-EINVAL);
+
+	cpu_buf = pb->cpu_bufs[buf_idx];
+	if (!cpu_buf)
+		return libbpf_err(-ENOENT);
+
+	header = cpu_buf->base;
+	*head = ring_buffer_read_head(header);
+	*tail = header->data_tail;
+	*base = ((__u8 *)header) + pb->page_size;
+	*buf_size = pb->mmap_size;
+	return 0;
+}
+
+int perf_buffer__set_ring_buf_tail(const struct perf_buffer *pb, size_t buf_idx,
+				   __u64 tail)
+{
+	struct perf_cpu_buf *cpu_buf;
+	struct perf_event_mmap_page *header;
+
+	if (buf_idx >= pb->cpu_cnt)
+		return libbpf_err(-EINVAL);
+
+	cpu_buf = pb->cpu_bufs[buf_idx];
+	if (!cpu_buf)
+		return libbpf_err(-ENOENT);
+
+	header = cpu_buf->base;
+	ring_buffer_write_tail(header, tail);
+	return 0;
+}
+
 int perf_buffer__epoll_fd(const struct perf_buffer *pb)
 {
 	return pb->epoll_fd;
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index 9e9a3fd3edd8..035a0ce42139 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -1381,6 +1381,31 @@ LIBBPF_API int perf_buffer__consume(struct perf_buffer *pb);
 LIBBPF_API int perf_buffer__consume_buffer(struct perf_buffer *pb, size_t buf_idx);
 LIBBPF_API size_t perf_buffer__buffer_cnt(const struct perf_buffer *pb);
 LIBBPF_API int perf_buffer__buffer_fd(const struct perf_buffer *pb, size_t buf_idx);
+/** @brief **perf_buffer__raw_ring_buf()** gets the ring buffer information for
+ * a given CPU perf buffer.
+ * This API and **perf_buffer__set_ring_buf_tail()** allow low level access
+ * to the ring buffer in order to implement a custom ring buffer drain
+ * mechanisim.
+ *
+ * @param pb the perf_buffer instance
+ * @param buf_idx the index of the perf buffer
+ * @param base will get the base of the ring buffer mmap
+ * @param buf_size will get size of the ring buffer mmap
+ * @param head gets the ring buffer head pointer
+ * @param tail gets the ring buffer tail pointer
+ * @return 0, for success
+ */
+LIBBPF_API int perf_buffer__raw_ring_buf(const struct perf_buffer *pb,
+					 size_t buf_idx, void **base,
+					 size_t *buf_size, __u64 *head,
+					 __u64 *tail);
+/** @brief **perf_buffer__set_ring_buf_tail()** sets the ring buffer tail
+ * @param pb the perf_buffer instance
+ * @param buf_idx the index of the perf buffer
+ * @param tail sets the value up-until where messages were consumed.
+ */
+LIBBPF_API int perf_buffer__set_ring_buf_tail(const struct perf_buffer *pb,
+					      size_t buf_idx, __u64 tail);
 
 typedef enum bpf_perf_event_ret
 	(*bpf_perf_event_print_t)(struct perf_event_header *hdr,
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index 52973cffc20c..22fbc97839dd 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -458,6 +458,8 @@ LIBBPF_0.8.0 {
 		bpf_program__set_insns;
 		libbpf_register_prog_handler;
 		libbpf_unregister_prog_handler;
+		perf_buffer__raw_ring_buf;
+		perf_buffer__set_ring_buf_tail;
 } LIBBPF_0.7.0;
 
 LIBBPF_1.0.0 {
-- 
2.36.1




[Index of Archives]     [Linux Samsung SoC]     [Linux Rockchip SoC]     [Linux Actions SoC]     [Linux for Synopsys ARC Processors]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]


  Powered by Linux