Re: [PATCH bpf-next v3 2/3] bpf: Support kptrs in local storage maps

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 2/25/23 7:40 AM, Kumar Kartikeya Dwivedi wrote:
diff --git a/include/linux/bpf_local_storage.h b/include/linux/bpf_local_storage.h
index 6d37a40cd90e..0fe92986412b 100644
--- a/include/linux/bpf_local_storage.h
+++ b/include/linux/bpf_local_storage.h
@@ -74,6 +74,12 @@ struct bpf_local_storage_elem {
  	struct hlist_node snode;	/* Linked to bpf_local_storage */
  	struct bpf_local_storage __rcu *local_storage;
  	struct rcu_head rcu;
+	bool can_use_smap; /* Is it safe to access smap in bpf_selem_free_* RCU
+			    * callbacks? bpf_local_storage_map_free only
+			    * executes rcu_barrier when there are special
+			    * fields, this field remembers that to ensure we
+			    * don't access already freed smap in sdata.
+			    */
  	/* 8 bytes hole */
  	/* The data is stored in another cacheline to minimize
  	 * the number of cachelines access during a cache hit.
diff --git a/kernel/bpf/bpf_local_storage.c b/kernel/bpf/bpf_local_storage.c
index 58da17ae5124..2bdd722fe293 100644
--- a/kernel/bpf/bpf_local_storage.c
+++ b/kernel/bpf/bpf_local_storage.c
@@ -85,6 +85,7 @@ bpf_selem_alloc(struct bpf_local_storage_map *smap, void *owner,
  	if (selem) {
  		if (value)
  			copy_map_value(&smap->map, SDATA(selem)->data, value);
+		/* No need to call check_and_init_map_value as memory is zero init */
  		return selem;
  	}
@@ -113,10 +114,25 @@ static void bpf_selem_free_rcu(struct rcu_head *rcu)
  	struct bpf_local_storage_elem *selem;
selem = container_of(rcu, struct bpf_local_storage_elem, rcu);
+	/* The can_use_smap bool is set whenever we need to free additional
+	 * fields in selem data before freeing selem. bpf_local_storage_map_free
+	 * only executes rcu_barrier to wait for RCU callbacks when it has
+	 * special fields, hence we can only conditionally dereference smap, as
+	 * by this time the map might have already been freed without waiting
+	 * for our call_rcu callback if it did not have any special fields.
+	 */
+	if (selem->can_use_smap)
+		bpf_obj_free_fields(SDATA(selem)->smap->map.record, SDATA(selem)->data);
+	kfree(selem);
+}
+
+static void bpf_selem_free_tasks_trace_rcu(struct rcu_head *rcu)
+{
+	/* Free directly if Tasks Trace RCU GP also implies RCU GP */
  	if (rcu_trace_implies_rcu_gp())
-		kfree(selem);
+		bpf_selem_free_rcu(rcu);
  	else
-		kfree_rcu(selem, rcu);
+		call_rcu(rcu, bpf_selem_free_rcu);
  }
/* local_storage->lock must be held and selem->local_storage == local_storage.
@@ -170,9 +186,9 @@ static bool bpf_selem_unlink_storage_nolock(struct bpf_local_storage *local_stor
  		RCU_INIT_POINTER(local_storage->cache[smap->cache_idx], NULL);
if (use_trace_rcu)
-		call_rcu_tasks_trace(&selem->rcu, bpf_selem_free_rcu);
+		call_rcu_tasks_trace(&selem->rcu, bpf_selem_free_tasks_trace_rcu);
  	else
-		kfree_rcu(selem, rcu);
+		call_rcu(&selem->rcu, bpf_selem_free_rcu);

After another thought, bpf_obj_free_fields() does not need to go through the rcu gp, right?

bpf_obj_free_fields() can be done just before the call_rcu_tasks_trace() and the call_rcu() here. In hashtab, bpf_obj_free_fields() is also done just before bpf_mem_cache_free().

return free_local_storage;
  }
@@ -240,6 +256,11 @@ void bpf_selem_link_map(struct bpf_local_storage_map *smap,
  	RCU_INIT_POINTER(SDATA(selem)->smap, smap);
  	hlist_add_head_rcu(&selem->map_node, &b->list);
  	raw_spin_unlock_irqrestore(&b->lock, flags);
+
+	/* If our data will have special fields, smap will wait for us to use
+	 * its record in bpf_selem_free_* RCU callbacks before freeing itself.
+	 */
+	selem->can_use_smap = !IS_ERR_OR_NULL(smap->map.record);
  }
void bpf_selem_unlink(struct bpf_local_storage_elem *selem, bool use_trace_rcu)
@@ -723,6 +744,25 @@ void bpf_local_storage_map_free(struct bpf_map *map,
  	 */
  	synchronize_rcu();
+ /* Only delay freeing of smap, buckets are not needed anymore */
  	kvfree(smap->buckets);
+
+	/* When local storage has special fields, callbacks for
+	 * bpf_selem_free_rcu and bpf_selem_free_tasks_trace_rcu will keep using
+	 * the map BTF record, we need to execute an RCU barrier to wait for
+	 * them as the record will be freed right after our map_free callback.
+	 */
+	if (!IS_ERR_OR_NULL(smap->map.record)) {
+		rcu_barrier_tasks_trace();
+		/* We cannot skip rcu_barrier() when rcu_trace_implies_rcu_gp()
+		 * is true, because while call_rcu invocation is skipped in that
+		 * case in bpf_selem_free_tasks_trace_rcu (and all local storage
+		 * maps pass use_trace_rcu = true), there can be call_rcu
+		 * callbacks based on use_trace_rcu = false in the earlier while
+		 * ((selem = ...)) loop or from bpf_local_storage_unlink_nolock
+		 * called from owner's free path.
+		 */
+		rcu_barrier();
+	}
  	bpf_map_area_free(smap);
  }




[Index of Archives]     [Linux Samsung SoC]     [Linux Rockchip SoC]     [Linux Actions SoC]     [Linux for Synopsys ARC Processors]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]


  Powered by Linux