[PATCH v3] bpf: Remove in_atomic() from bpf_link_put().

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



bpf_free_inode() is invoked as a RCU callback. Usually RCU callbacks are
invoked within softirq context. By setting rcutree.use_softirq=0 boot
option the RCU callbacks will be invoked in a per-CPU kthread with
bottom halves disabled which implies a RCU read section.

On PREEMPT_RT the context remains fully preemptible. The RCU read
section however does not allow schedule() invocation. The latter happens
in mutex_lock() performed by bpf_trampoline_unlink_prog() originated
from bpf_link_put().

It was pointed out that the bpf_link_put() invocation should not be
delayed if originated from close(). It was also pointed out that other
invocations from within a syscall should also avoid the workqueue.
After an audit of all bpf_link_put() callers it looks that the only
atomic caller is the RCU callback. Everything else is called from
preemptible context because it is a syscall, a mutex_t is acquired near
by or due a GFP_KERNEL memory allocation.

Let bpf_link_put() free the resources directly. Add
bpf_link_put_from_atomic() which uses the kworker to free the
resources. Let bpf_any_put() invoke one or the other depending on the
context it is called from (RCU or not).

Signed-off-by: Sebastian Andrzej Siewior <bigeasy@xxxxxxxxxxxxx>
---
On 2023-05-25 10:51:23 [-0700], Andrii Nakryiko wrote:
v2…v3:
  - Drop bpf_link_put_direct(). Let bpf_link_put() do the direct free
    and add bpf_link_put_from_atomic() to do the delayed free via the
    worker.

v1…v2:
   - Add bpf_link_put_direct() to be used from bpf_link_release() as
     suggested.

> Looks good to me, but it's not sufficient. See kernel/bpf/inode.c, we
> need to do bpf_link_put_direct() from bpf_put_any(), which should be
> safe as well because it all is either triggered from bpf() syscall or
> by unlink()'ing BPF FS file. For file deletion we have the same
> requirement to have deterministic release of bpf_link.

Okay. I checked all callers and it seems that the only atomic caller is
the RCU callback.

 include/linux/bpf.h  |  5 +++++
 kernel/bpf/inode.c   | 13 ++++++++-----
 kernel/bpf/syscall.c | 21 ++++++++++++---------
 3 files changed, 25 insertions(+), 14 deletions(-)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index e53ceee1df370..dced1f880cfa6 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -2073,6 +2073,7 @@ int bpf_link_settle(struct bpf_link_primer *primer);
 void bpf_link_cleanup(struct bpf_link_primer *primer);
 void bpf_link_inc(struct bpf_link *link);
 void bpf_link_put(struct bpf_link *link);
+void bpf_link_put_from_atomic(struct bpf_link *link);
 int bpf_link_new_fd(struct bpf_link *link);
 struct file *bpf_link_new_file(struct bpf_link *link, int *reserved_fd);
 struct bpf_link *bpf_link_get_from_fd(u32 ufd);
@@ -2432,6 +2433,10 @@ static inline void bpf_link_put(struct bpf_link *link)
 {
 }
 
+static inline void bpf_link_put_from_atomic(struct bpf_link *link)
+{
+}
+
 static inline int bpf_obj_get_user(const char __user *pathname, int flags)
 {
 	return -EOPNOTSUPP;
diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c
index 9948b542a470e..2e1e9f3c7f701 100644
--- a/kernel/bpf/inode.c
+++ b/kernel/bpf/inode.c
@@ -49,7 +49,7 @@ static void *bpf_any_get(void *raw, enum bpf_type type)
 	return raw;
 }
 
-static void bpf_any_put(void *raw, enum bpf_type type)
+static void bpf_any_put(void *raw, enum bpf_type type, bool may_sleep)
 {
 	switch (type) {
 	case BPF_TYPE_PROG:
@@ -59,7 +59,10 @@ static void bpf_any_put(void *raw, enum bpf_type type)
 		bpf_map_put_with_uref(raw);
 		break;
 	case BPF_TYPE_LINK:
-		bpf_link_put(raw);
+		if (may_sleep)
+			bpf_link_put(raw);
+		else
+			bpf_link_put_from_atomic(raw);
 		break;
 	default:
 		WARN_ON_ONCE(1);
@@ -490,7 +493,7 @@ int bpf_obj_pin_user(u32 ufd, const char __user *pathname)
 
 	ret = bpf_obj_do_pin(pathname, raw, type);
 	if (ret != 0)
-		bpf_any_put(raw, type);
+		bpf_any_put(raw, type, true);
 
 	return ret;
 }
@@ -552,7 +555,7 @@ int bpf_obj_get_user(const char __user *pathname, int flags)
 		return -ENOENT;
 
 	if (ret < 0)
-		bpf_any_put(raw, type);
+		bpf_any_put(raw, type, true);
 	return ret;
 }
 
@@ -617,7 +620,7 @@ static void bpf_free_inode(struct inode *inode)
 	if (S_ISLNK(inode->i_mode))
 		kfree(inode->i_link);
 	if (!bpf_inode_type(inode, &type))
-		bpf_any_put(inode->i_private, type);
+		bpf_any_put(inode->i_private, type, false);
 	free_inode_nonrcu(inode);
 }
 
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 14f39c1e573ee..87b07ebd6d146 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -2777,20 +2777,23 @@ static void bpf_link_put_deferred(struct work_struct *work)
 	bpf_link_free(link);
 }
 
-/* bpf_link_put can be called from atomic context, but ensures that resources
- * are freed from process context
+/* bpf_link_put_from_atomic is called from atomic context. It needs to be called
+ * from sleepable context in order to acquire sleeping locks during the process.
  */
-void bpf_link_put(struct bpf_link *link)
+void bpf_link_put_from_atomic(struct bpf_link *link)
 {
 	if (!atomic64_dec_and_test(&link->refcnt))
 		return;
 
-	if (in_atomic()) {
-		INIT_WORK(&link->work, bpf_link_put_deferred);
-		schedule_work(&link->work);
-	} else {
-		bpf_link_free(link);
-	}
+	INIT_WORK(&link->work, bpf_link_put_deferred);
+	schedule_work(&link->work);
+}
+
+void bpf_link_put(struct bpf_link *link)
+{
+	if (!atomic64_dec_and_test(&link->refcnt))
+		return;
+	bpf_link_free(link);
 }
 EXPORT_SYMBOL(bpf_link_put);
 
-- 
2.40.1






[Index of Archives]     [Linux Samsung SoC]     [Linux Rockchip SoC]     [Linux Actions SoC]     [Linux for Synopsys ARC Processors]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]


  Powered by Linux