Re: [BUG] rcu-tasks : should take care of sparse cpu masks

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Wed, Apr 6, 2022 at 5:44 PM KP Singh <kpsingh@xxxxxxxxxx> wrote:
>
> On Tue, Apr 5, 2022 at 10:38 PM Martin KaFai Lau <kafai@xxxxxx> wrote:
> >
> > On Tue, Apr 05, 2022 at 02:04:34AM +0200, KP Singh wrote:
> > > > >>> Either way, how frequently is call_rcu_tasks_trace() being invoked in
> > > > >>> your setup?  If it is being invoked frequently, increasing delays would
> > > > >>> allow multiple call_rcu_tasks_trace() instances to be served by a single
> > > > >>> tasklist scan.
> > > > >>>
> > > > >>>> Given that, I do not think bpf_sk_storage_free() can/should use
> > > > >>>> call_rcu_tasks_trace(),
> > > > >>>> we probably will have to fix this soon (or revert from our kernels)
> > > > >>>
> > > > >>> Well, you are in luck!!!  This commit added call_rcu_tasks_trace() to
> > > > >>> bpf_selem_unlink_storage_nolock(), which is invoked in a loop by
> > > > >>> bpf_sk_storage_free():
> > > > >>>
> > > > >>> 0fe4b381a59e ("bpf: Allow bpf_local_storage to be used by sleepable programs")
> > > > >>>
> > > > >>> This commit was authored by KP Singh, who I am adding on CC.  Or I would
> > > > >>> have, except that you beat me to it.  Good show!!!  ;-)
> > >
> > > Hello :)
> > >
> > > Martin, if this ends up being an issue we might have to go with the
> > > initial proposed approach
> > > of marking local storage maps explicitly as sleepable so that not all
> > > maps are forced to be
> > > synchronized via trace RCU.
> > >
> > > We can make the verifier reject loading programs that try to use
> > > non-sleepable local storage
> > > maps in sleepable programs.
> > >
> > > Do you think this is a feasible approach we can take or do you have
> > > other suggestions?
> > bpf_sk_storage_free() does not need to use call_rcu_tasks_trace().
> > The same should go for the bpf_{task,inode}_storage_free().
> > The sk at this point is being destroyed.  No bpf prog (sleepable or not)
> > can have a hold on this sk.  The only storage reader left is from
> > bpf_local_storage_map_free() which is under rcu_read_lock(),
> > so a 'kfree_rcu(selem, rcu)' is enough.
> > A few lines below in bpf_sk_storage_free(), 'kfree_rcu(sk_storage, rcu)'
> > is currently used instead of call_rcu_tasks_trace() for the same reason.
> >
> > KP, if the above makes sense, can you make a patch for it?
> > The bpf_local_storage_map_free() code path also does not need
> > call_rcu_tasks_trace(), so may as well change it together.
> > The bpf_*_storage_delete() helper and the map_{delete,update}_elem()
> > syscall still require the call_rcu_tasks_trace().
>
> Thanks, I will send a patch.

Martin, does this work? (I can send it as a patch later today)

diff --git a/include/linux/bpf_local_storage.h
b/include/linux/bpf_local_storage.h
index 493e63258497..7ea18d4da84b 100644
--- a/include/linux/bpf_local_storage.h
+++ b/include/linux/bpf_local_storage.h
@@ -143,9 +143,9 @@ void bpf_selem_link_storage_nolock(struct
bpf_local_storage *local_storage,

 bool bpf_selem_unlink_storage_nolock(struct bpf_local_storage *local_storage,
      struct bpf_local_storage_elem *selem,
-     bool uncharge_omem);
+     bool uncharge_omem, bool use_trace_rcu);

-void bpf_selem_unlink(struct bpf_local_storage_elem *selem);
+void bpf_selem_unlink(struct bpf_local_storage_elem *selem, bool
use_trace_rcu);

 void bpf_selem_link_map(struct bpf_local_storage_map *smap,
  struct bpf_local_storage_elem *selem);
diff --git a/kernel/bpf/bpf_inode_storage.c b/kernel/bpf/bpf_inode_storage.c
index 96be8d518885..10424a1cda51 100644
--- a/kernel/bpf/bpf_inode_storage.c
+++ b/kernel/bpf/bpf_inode_storage.c
@@ -90,7 +90,7 @@ void bpf_inode_storage_free(struct inode *inode)
  */
  bpf_selem_unlink_map(selem);
  free_inode_storage = bpf_selem_unlink_storage_nolock(
- local_storage, selem, false);
+ local_storage, selem, false, false);
  }
  raw_spin_unlock_bh(&local_storage->lock);
  rcu_read_unlock();
@@ -149,7 +149,7 @@ static int inode_storage_delete(struct inode
*inode, struct bpf_map *map)
  if (!sdata)
  return -ENOENT;

- bpf_selem_unlink(SELEM(sdata));
+ bpf_selem_unlink(SELEM(sdata), true);

  return 0;
 }
diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c
index 01aa2b51ec4d..fbe35554bab7 100644
--- a/kernel/bpf/bpf_local_storage.c
+++ b/kernel/bpf/bpf_local_storage.c
@@ -106,7 +106,7 @@ static void bpf_selem_free_rcu(struct rcu_head *rcu)
  */
 bool bpf_selem_unlink_storage_nolock(struct bpf_local_storage *local_storage,
      struct bpf_local_storage_elem *selem,
-     bool uncharge_mem)
+     bool uncharge_mem, bool use_trace_rcu)
 {
  struct bpf_local_storage_map *smap;
  bool free_local_storage;
@@ -150,11 +150,16 @@ bool bpf_selem_unlink_storage_nolock(struct
bpf_local_storage *local_storage,
     SDATA(selem))
  RCU_INIT_POINTER(local_storage->cache[smap->cache_idx], NULL);

- call_rcu_tasks_trace(&selem->rcu, bpf_selem_free_rcu);
+ if (use_trace_rcu)
+ call_rcu_tasks_trace(&selem->rcu, bpf_selem_free_rcu);
+ else
+ kfree_rcu(selem, rcu);
+
  return free_local_storage;
 }

-static void __bpf_selem_unlink_storage(struct bpf_local_storage_elem *selem)
+static void __bpf_selem_unlink_storage(struct bpf_local_storage_elem *selem,
+       bool use_trace_rcu)
 {
  struct bpf_local_storage *local_storage;
  bool free_local_storage = false;
@@ -169,12 +174,16 @@ static void __bpf_selem_unlink_storage(struct
bpf_local_storage_elem *selem)
  raw_spin_lock_irqsave(&local_storage->lock, flags);
  if (likely(selem_linked_to_storage(selem)))
  free_local_storage = bpf_selem_unlink_storage_nolock(
- local_storage, selem, true);
+ local_storage, selem, true, use_trace_rcu);
  raw_spin_unlock_irqrestore(&local_storage->lock, flags);

- if (free_local_storage)
- call_rcu_tasks_trace(&local_storage->rcu,
+ if (free_local_storage) {
+ if (use_trace_rcu)
+ call_rcu_tasks_trace(&local_storage->rcu,
      bpf_local_storage_free_rcu);
+ else
+ kfree_rcu(local_storage, rcu);
+ }
 }

 void bpf_selem_link_storage_nolock(struct bpf_local_storage *local_storage,
@@ -214,14 +223,14 @@ void bpf_selem_link_map(struct
bpf_local_storage_map *smap,
  raw_spin_unlock_irqrestore(&b->lock, flags);
 }

-void bpf_selem_unlink(struct bpf_local_storage_elem *selem)
+void bpf_selem_unlink(struct bpf_local_storage_elem *selem, bool use_trace_rcu)
 {
  /* Always unlink from map before unlinking from local_storage
  * because selem will be freed after successfully unlinked from
  * the local_storage.
  */
  bpf_selem_unlink_map(selem);
- __bpf_selem_unlink_storage(selem);
+ __bpf_selem_unlink_storage(selem, use_trace_rcu);
 }

 struct bpf_local_storage_data *
@@ -548,7 +557,7 @@ void bpf_local_storage_map_free(struct
bpf_local_storage_map *smap,
  migrate_disable();
  __this_cpu_inc(*busy_counter);
  }
- bpf_selem_unlink(selem);
+ bpf_selem_unlink(selem, false);
  if (busy_counter) {
  __this_cpu_dec(*busy_counter);
  migrate_enable();
diff --git a/kernel/bpf/bpf_task_storage.c b/kernel/bpf/bpf_task_storage.c
index 6638a0ecc3d2..57904263a710 100644
--- a/kernel/bpf/bpf_task_storage.c
+++ b/kernel/bpf/bpf_task_storage.c
@@ -102,7 +102,7 @@ void bpf_task_storage_free(struct task_struct *task)
  */
  bpf_selem_unlink_map(selem);
  free_task_storage = bpf_selem_unlink_storage_nolock(
- local_storage, selem, false);
+ local_storage, selem, false, false);
  }
  raw_spin_unlock_irqrestore(&local_storage->lock, flags);
  bpf_task_storage_unlock();
@@ -192,7 +192,7 @@ static int task_storage_delete(struct task_struct
*task, struct bpf_map *map)
  if (!sdata)
  return -ENOENT;

- bpf_selem_unlink(SELEM(sdata));
+ bpf_selem_unlink(SELEM(sdata), true);

  return 0;
 }
diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c
index e3ac36380520..83d7641ef67b 100644
--- a/net/core/bpf_sk_storage.c
+++ b/net/core/bpf_sk_storage.c
@@ -40,7 +40,7 @@ static int bpf_sk_storage_del(struct sock *sk,
struct bpf_map *map)
  if (!sdata)
  return -ENOENT;

- bpf_selem_unlink(SELEM(sdata));
+ bpf_selem_unlink(SELEM(sdata), true);

  return 0;
 }
@@ -75,8 +75,8 @@ void bpf_sk_storage_free(struct sock *sk)
  * sk_storage.
  */
  bpf_selem_unlink_map(selem);
- free_sk_storage = bpf_selem_unlink_storage_nolock(sk_storage,
-  selem, true);
+ free_sk_storage = bpf_selem_unlink_storage_nolock(
+ sk_storage, selem, true, false);
  }
  raw_spin_unlock_bh(&sk_storage->lock);
  rcu_read_unlock();



[Index of Archives]     [Linux Samsung SoC]     [Linux Rockchip SoC]     [Linux Actions SoC]     [Linux for Synopsys ARC Processors]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]


  Powered by Linux