Re: [RFC PATCH bpf-next 4/4] libbpf: don't remove eBPF resources when other xsks are present

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 2019-06-03 15:19, Maciej Fijalkowski wrote:
In case where multiple xsk sockets are attached to a single interface
and one of them gets detached, the eBPF maps and program are removed.
This should not happen as the rest of xsksocks are still using these
resources.

In order to fix that, let's have an additional eBPF map with a single
entry that will be used as a xsks count. During the xsk_socket__delete,
remove the resources only when this count is equal to 0.  This map is
not being accessed from eBPF program, so the verifier is not associating
it with the prog, which in turn makes bpf_obj_get_info_by_fd not
reporting this map in nr_map_ids field of struct bpf_prog_info. The
described behaviour brings the need to have this map pinned, so in
case when socket is being created and the libbpf detects the presence of
bpf resources, it will be able to access that map.


This commit is only needed after #3 is applied, right? So, this is a way of refcounting XDP socks?


Signed-off-by: Maciej Fijalkowski <maciej.fijalkowski@xxxxxxxxx>
---
  tools/lib/bpf/xsk.c | 59 +++++++++++++++++++++++++++++++++++++++++++++--------
  1 file changed, 51 insertions(+), 8 deletions(-)

diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c
index e28bedb0b078..88d2c931ad14 100644
--- a/tools/lib/bpf/xsk.c
+++ b/tools/lib/bpf/xsk.c
@@ -44,6 +44,8 @@
   #define PF_XDP AF_XDP
  #endif
+#define XSKS_CNT_MAP_PATH "/sys/fs/bpf/xsks_cnt_map"
+
  struct xsk_umem {
  	struct xsk_ring_prod *fill;
  	struct xsk_ring_cons *comp;
@@ -65,6 +67,7 @@ struct xsk_socket {
  	int prog_fd;
  	int qidconf_map_fd;
  	int xsks_map_fd;
+	int xsks_cnt_map_fd;
  	__u32 queue_id;
  	char ifname[IFNAMSIZ];
  };
@@ -372,7 +375,7 @@ static int xsk_get_max_queues(struct xsk_socket *xsk)
  static int xsk_create_bpf_maps(struct xsk_socket *xsk)
  {
  	int max_queues;
-	int fd;
+	int fd, ret;
max_queues = xsk_get_max_queues(xsk);
  	if (max_queues < 0)
@@ -392,6 +395,24 @@ static int xsk_create_bpf_maps(struct xsk_socket *xsk)
  	}
  	xsk->xsks_map_fd = fd;
+ fd = bpf_create_map_name(BPF_MAP_TYPE_ARRAY, "xsks_cnt_map",
+				 sizeof(int), sizeof(int), 1, 0);
+	if (fd < 0) {
+		close(xsk->qidconf_map_fd);
+		close(xsk->xsks_map_fd);
+		return fd;
+	}
+
+	ret = bpf_obj_pin(fd, XSKS_CNT_MAP_PATH);
+	if (ret < 0) {
+		pr_warning("pinning map failed; is bpffs mounted?\n");
+		close(xsk->qidconf_map_fd);
+		close(xsk->xsks_map_fd);
+		close(fd);
+		return ret;
+	}
+	xsk->xsks_cnt_map_fd = fd;
+
  	return 0;
  }
@@ -456,8 +477,10 @@ static int xsk_lookup_bpf_maps(struct xsk_socket *xsk)
  		close(fd);
  	}
+ xsk->xsks_cnt_map_fd = bpf_obj_get(XSKS_CNT_MAP_PATH);
  	err = 0;
-	if (xsk->qidconf_map_fd < 0 || xsk->xsks_map_fd < 0) {
+	if (xsk->qidconf_map_fd < 0 || xsk->xsks_map_fd < 0 ||
+	    xsk->xsks_cnt_map_fd < 0) {
  		err = -ENOENT;
  		xsk_delete_bpf_maps(xsk);
  	}
@@ -467,17 +490,25 @@ static int xsk_lookup_bpf_maps(struct xsk_socket *xsk)
  	return err;
  }
-static void xsk_clear_bpf_maps(struct xsk_socket *xsk)
+static void xsk_clear_bpf_maps(struct xsk_socket *xsk, long *xsks_cnt_ptr)
  {
+	long xsks_cnt, key = 0;
  	int qid = false;
bpf_map_update_elem(xsk->qidconf_map_fd, &xsk->queue_id, &qid, 0);
  	bpf_map_delete_elem(xsk->xsks_map_fd, &xsk->queue_id);
+	bpf_map_lookup_elem(xsk->xsks_cnt_map_fd, &key, &xsks_cnt);
+	if (xsks_cnt)
+		xsks_cnt--;
+	bpf_map_update_elem(xsk->xsks_cnt_map_fd, &key, &xsks_cnt, 0);
+	if (xsks_cnt_ptr)
+		*xsks_cnt_ptr = xsks_cnt;

This refcount scheme will not work; There's no synchronization between the updates (cross process)!

  }
static int xsk_set_bpf_maps(struct xsk_socket *xsk)
  {
  	int qid = true, fd = xsk->fd, err;
+	long xsks_cnt, key = 0;
err = bpf_map_update_elem(xsk->qidconf_map_fd, &xsk->queue_id, &qid, 0);
  	if (err)
@@ -487,9 +518,18 @@ static int xsk_set_bpf_maps(struct xsk_socket *xsk)
  	if (err)
  		goto out;
+ err = bpf_map_lookup_elem(xsk->xsks_cnt_map_fd, &key, &xsks_cnt);
+	if (err)
+		goto out;
+
+	xsks_cnt++;
+	err = bpf_map_update_elem(xsk->xsks_cnt_map_fd, &key, &xsks_cnt, 0);
+	if (err)
+		goto out;
+

Dito.

  	return 0;
  out:
-	xsk_clear_bpf_maps(xsk);
+	xsk_clear_bpf_maps(xsk, NULL);
  	return err;
  }
@@ -752,13 +792,18 @@ void xsk_socket__delete(struct xsk_socket *xsk)
  	size_t desc_sz = sizeof(struct xdp_desc);
  	struct xdp_mmap_offsets off;
  	socklen_t optlen;
+	long xsks_cnt;
  	int err;
if (!xsk)
  		return;
- xsk_clear_bpf_maps(xsk);
-	xsk_delete_bpf_maps(xsk);
+	xsk_clear_bpf_maps(xsk, &xsks_cnt);
+	unlink(XSKS_CNT_MAP_PATH);
+	if (!xsks_cnt) {
+		xsk_delete_bpf_maps(xsk);
+		xsk_remove_xdp_prog(xsk);
+	}
optlen = sizeof(off);
  	err = getsockopt(xsk->fd, SOL_XDP, XDP_MMAP_OFFSETS, &off, &optlen);
@@ -774,8 +819,6 @@ void xsk_socket__delete(struct xsk_socket *xsk)
} - xsk_remove_xdp_prog(xsk);
-
  	xsk->umem->refcount--;
  	/* Do not close an fd that also has an associated umem connected
  	 * to it.




[Index of Archives]     [Linux Samsung SoC]     [Linux Rockchip SoC]     [Linux Actions SoC]     [Linux for Synopsys ARC Processors]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]


  Powered by Linux