On 12/8/23 2:23 AM, Hou Tao wrote:
From: Hou Tao <houtao1@xxxxxxxxxx>
There is no need to call maybe_wait_bpf_programs() if all operations in
batched update, deletion, or lookup_and_deletion fail. So only call
maybe_wait_bpf_programs() if at least one map operation succeeds.
Similar with uattr->batch.count which is used to return the number of
succeeded map operations to userspace application, use attr->batch.count
to record the number of succeeded map operations in kernel. Sometimes
these two number may be different. For example, in
__htab_map_lookup_and_delete_batch(do_delete=true), it is possible that
10 items in current bucket have been successfully deleted, but copying
the deleted keys to userspace application fails, attr->batch.count will
be 10 but uattr->batch.count will be 0 instead.
Signed-off-by: Hou Tao <houtao1@xxxxxxxxxx>
---
include/linux/bpf.h | 14 +++++++-------
kernel/bpf/hashtab.c | 20 +++++++++++---------
kernel/bpf/syscall.c | 21 ++++++++++++++-------
3 files changed, 32 insertions(+), 23 deletions(-)
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index f7aa255c634f..a0c4d696a231 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -81,17 +81,17 @@ struct bpf_map_ops {
int (*map_get_next_key)(struct bpf_map *map, void *key, void *next_key);
void (*map_release_uref)(struct bpf_map *map);
void *(*map_lookup_elem_sys_only)(struct bpf_map *map, void *key);
- int (*map_lookup_batch)(struct bpf_map *map, const union bpf_attr *attr,
+ int (*map_lookup_batch)(struct bpf_map *map, union bpf_attr *attr,
union bpf_attr __user *uattr);
int (*map_lookup_and_delete_elem)(struct bpf_map *map, void *key,
void *value, u64 flags);
int (*map_lookup_and_delete_batch)(struct bpf_map *map,
- const union bpf_attr *attr,
+ union bpf_attr *attr,
union bpf_attr __user *uattr);
int (*map_update_batch)(struct bpf_map *map, struct file *map_file,
- const union bpf_attr *attr,
+ union bpf_attr *attr,
union bpf_attr __user *uattr);
- int (*map_delete_batch)(struct bpf_map *map, const union bpf_attr *attr,
+ int (*map_delete_batch)(struct bpf_map *map, union bpf_attr *attr,
union bpf_attr __user *uattr);
/* funcs callable from userspace and from eBPF programs */
@@ -2095,13 +2095,13 @@ void bpf_map_area_free(void *base);
bool bpf_map_write_active(const struct bpf_map *map);
void bpf_map_init_from_attr(struct bpf_map *map, union bpf_attr *attr);
int generic_map_lookup_batch(struct bpf_map *map,
- const union bpf_attr *attr,
+ union bpf_attr *attr,
union bpf_attr __user *uattr);
int generic_map_update_batch(struct bpf_map *map, struct file *map_file,
- const union bpf_attr *attr,
+ union bpf_attr *attr,
union bpf_attr __user *uattr);
int generic_map_delete_batch(struct bpf_map *map,
- const union bpf_attr *attr,
+ union bpf_attr *attr,
union bpf_attr __user *uattr);
struct bpf_map *bpf_map_get_curr_or_next(u32 *id);
struct bpf_prog *bpf_prog_get_curr_or_next(u32 *id);
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index 5b9146fa825f..b777bd8d4f8d 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -1673,7 +1673,7 @@ static int htab_lru_percpu_map_lookup_and_delete_elem(struct bpf_map *map,
static int
__htab_map_lookup_and_delete_batch(struct bpf_map *map,
- const union bpf_attr *attr,
+ union bpf_attr *attr,
union bpf_attr __user *uattr,
bool do_delete, bool is_lru_map,
bool is_percpu)
@@ -1708,6 +1708,7 @@ __htab_map_lookup_and_delete_batch(struct bpf_map *map,
if (!max_count)
return 0;
+ attr->batch.count = 0;
if (put_user(0, &uattr->batch.count))
return -EFAULT;
@@ -1845,6 +1846,7 @@ __htab_map_lookup_and_delete_batch(struct bpf_map *map,
}
dst_key += key_size;
dst_val += value_size;
+ attr->batch.count++;
}
htab_unlock_bucket(htab, b, batch, flags);
[...]
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index efda2353a7d5..d2641e51a1a7 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1695,7 +1695,7 @@ static int map_get_next_key(union bpf_attr *attr)
}
int generic_map_delete_batch(struct bpf_map *map,
- const union bpf_attr *attr,
+ union bpf_attr *attr,
union bpf_attr __user *uattr)
{
void __user *keys = u64_to_user_ptr(attr->batch.keys);
@@ -1715,6 +1715,7 @@ int generic_map_delete_batch(struct bpf_map *map,
if (!max_count)
return 0;
+ attr->batch.count = 0;
if (put_user(0, &uattr->batch.count))
return -EFAULT;
@@ -1742,6 +1743,8 @@ int generic_map_delete_batch(struct bpf_map *map,
break;
cond_resched();
}
+
+ attr->batch.count = cp;
if (copy_to_user(&uattr->batch.count, &cp, sizeof(cp)))
err = -EFAULT;
@@ -1751,7 +1754,7 @@ int generic_map_delete_batch(struct bpf_map *map,
}
int generic_map_update_batch(struct bpf_map *map, struct file *map_file,
- const union bpf_attr *attr,
+ union bpf_attr *attr,
union bpf_attr __user *uattr)
{
void __user *values = u64_to_user_ptr(attr->batch.values);
@@ -1774,6 +1777,7 @@ int generic_map_update_batch(struct bpf_map *map, struct file *map_file,
if (!max_count)
return 0;
+ attr->batch.count = 0;
if (put_user(0, &uattr->batch.count))
return -EFAULT;
@@ -1802,6 +1806,7 @@ int generic_map_update_batch(struct bpf_map *map, struct file *map_file,
cond_resched();
}
+ attr->batch.count = cp;
if (copy_to_user(&uattr->batch.count, &cp, sizeof(cp)))
err = -EFAULT;
@@ -1813,9 +1818,8 @@ int generic_map_update_batch(struct bpf_map *map, struct file *map_file,
#define MAP_LOOKUP_RETRIES 3
-int generic_map_lookup_batch(struct bpf_map *map,
- const union bpf_attr *attr,
- union bpf_attr __user *uattr)
+int generic_map_lookup_batch(struct bpf_map *map, union bpf_attr *attr,
+ union bpf_attr __user *uattr)
{
void __user *uobatch = u64_to_user_ptr(attr->batch.out_batch);
void __user *ubatch = u64_to_user_ptr(attr->batch.in_batch);
@@ -1838,6 +1842,7 @@ int generic_map_lookup_batch(struct bpf_map *map,
if (!max_count)
return 0;
+ attr->batch.count = 0;
if (put_user(0, &uattr->batch.count))
return -EFAULT;
@@ -1903,6 +1908,7 @@ int generic_map_lookup_batch(struct bpf_map *map,
if (err == -EFAULT)
goto free_buf;
+ attr->batch.count = cp;
You don't need to change generic_map_lookup_batch() here. It won't trigger
maybe_wait_bpf_programs().
if ((copy_to_user(&uattr->batch.count, &cp, sizeof(cp)) ||
(cp && copy_to_user(uobatch, prev_key, map->key_size))))
err = -EFAULT;
@@ -4926,7 +4932,7 @@ static int bpf_task_fd_query(const union bpf_attr *attr,
err = fn(__VA_ARGS__); \
} while (0)
-static int bpf_map_do_batch(const union bpf_attr *attr,
+static int bpf_map_do_batch(union bpf_attr *attr,
union bpf_attr __user *uattr,
int cmd)
{
@@ -4966,7 +4972,8 @@ static int bpf_map_do_batch(const union bpf_attr *attr,
BPF_DO_BATCH(map->ops->map_delete_batch, map, attr, uattr);
err_put:
if (has_write) {
- maybe_wait_bpf_programs(map);
+ if (attr->batch.count)
+ maybe_wait_bpf_programs(map);
Your code logic sounds correct but I feel you are optimizing for extreme
corner cases. In really esp production environment, a fault with something
like copy_to_user() should be extremely rare. So in my opinion, this optimization
is not needed.
bpf_map_write_active_dec(map);
}
fdput(f);