On Fri, Dec 8, 2023 at 2:55 PM Yonghong Song <yonghong.song@xxxxxxxxx> wrote: > > > On 12/8/23 2:23 AM, Hou Tao wrote: > > From: Hou Tao <houtao1@xxxxxxxxxx> > > > > There is no need to call maybe_wait_bpf_programs() if all operations in > > batched update, deletion, or lookup_and_deletion fail. So only call > > maybe_wait_bpf_programs() if at least one map operation succeeds. > > > > Similar with uattr->batch.count which is used to return the number of > > succeeded map operations to userspace application, use attr->batch.count > > to record the number of succeeded map operations in kernel. Sometimes > > these two number may be different. For example, in > > __htab_map_lookup_and_delete_batch(do_delete=true), it is possible that > > 10 items in current bucket have been successfully deleted, but copying > > the deleted keys to userspace application fails, attr->batch.count will > > be 10 but uattr->batch.count will be 0 instead. > > > > Signed-off-by: Hou Tao <houtao1@xxxxxxxxxx> > > --- > > include/linux/bpf.h | 14 +++++++------- > > kernel/bpf/hashtab.c | 20 +++++++++++--------- > > kernel/bpf/syscall.c | 21 ++++++++++++++------- > > 3 files changed, 32 insertions(+), 23 deletions(-) > > > > diff --git a/include/linux/bpf.h b/include/linux/bpf.h > > index f7aa255c634f..a0c4d696a231 100644 > > --- a/include/linux/bpf.h > > +++ b/include/linux/bpf.h > > @@ -81,17 +81,17 @@ struct bpf_map_ops { > > int (*map_get_next_key)(struct bpf_map *map, void *key, void *next_key); > > void (*map_release_uref)(struct bpf_map *map); > > void *(*map_lookup_elem_sys_only)(struct bpf_map *map, void *key); > > - int (*map_lookup_batch)(struct bpf_map *map, const union bpf_attr *attr, > > + int (*map_lookup_batch)(struct bpf_map *map, union bpf_attr *attr, > > union bpf_attr __user *uattr); > > int (*map_lookup_and_delete_elem)(struct bpf_map *map, void *key, > > void *value, u64 flags); > > int (*map_lookup_and_delete_batch)(struct bpf_map *map, > > - const union bpf_attr *attr, > > + union bpf_attr *attr, > > union bpf_attr __user *uattr); > > int (*map_update_batch)(struct bpf_map *map, struct file *map_file, > > - const union bpf_attr *attr, > > + union bpf_attr *attr, > > union bpf_attr __user *uattr); > > - int (*map_delete_batch)(struct bpf_map *map, const union bpf_attr *attr, > > + int (*map_delete_batch)(struct bpf_map *map, union bpf_attr *attr, > > union bpf_attr __user *uattr); > > > > /* funcs callable from userspace and from eBPF programs */ > > @@ -2095,13 +2095,13 @@ void bpf_map_area_free(void *base); > > bool bpf_map_write_active(const struct bpf_map *map); > > void bpf_map_init_from_attr(struct bpf_map *map, union bpf_attr *attr); > > int generic_map_lookup_batch(struct bpf_map *map, > > - const union bpf_attr *attr, > > + union bpf_attr *attr, > > union bpf_attr __user *uattr); > > int generic_map_update_batch(struct bpf_map *map, struct file *map_file, > > - const union bpf_attr *attr, > > + union bpf_attr *attr, > > union bpf_attr __user *uattr); > > int generic_map_delete_batch(struct bpf_map *map, > > - const union bpf_attr *attr, > > + union bpf_attr *attr, > > union bpf_attr __user *uattr); > > struct bpf_map *bpf_map_get_curr_or_next(u32 *id); > > struct bpf_prog *bpf_prog_get_curr_or_next(u32 *id); > > diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c > > index 5b9146fa825f..b777bd8d4f8d 100644 > > --- a/kernel/bpf/hashtab.c > > +++ b/kernel/bpf/hashtab.c > > @@ -1673,7 +1673,7 @@ static int htab_lru_percpu_map_lookup_and_delete_elem(struct bpf_map *map, > > > > static int > > __htab_map_lookup_and_delete_batch(struct bpf_map *map, > > - const union bpf_attr *attr, > > + union bpf_attr *attr, > > union bpf_attr __user *uattr, > > bool do_delete, bool is_lru_map, > > bool is_percpu) > > @@ -1708,6 +1708,7 @@ __htab_map_lookup_and_delete_batch(struct bpf_map *map, > > if (!max_count) > > return 0; > > > > + attr->batch.count = 0; > > if (put_user(0, &uattr->batch.count)) > > return -EFAULT; > > > > @@ -1845,6 +1846,7 @@ __htab_map_lookup_and_delete_batch(struct bpf_map *map, > > } > > dst_key += key_size; > > dst_val += value_size; > > + attr->batch.count++; > > } > > > > htab_unlock_bucket(htab, b, batch, flags); > > [...] > > > diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c > > index efda2353a7d5..d2641e51a1a7 100644 > > --- a/kernel/bpf/syscall.c > > +++ b/kernel/bpf/syscall.c > > @@ -1695,7 +1695,7 @@ static int map_get_next_key(union bpf_attr *attr) > > } > > > > int generic_map_delete_batch(struct bpf_map *map, > > - const union bpf_attr *attr, > > + union bpf_attr *attr, > > union bpf_attr __user *uattr) > > { > > void __user *keys = u64_to_user_ptr(attr->batch.keys); > > @@ -1715,6 +1715,7 @@ int generic_map_delete_batch(struct bpf_map *map, > > if (!max_count) > > return 0; > > > > + attr->batch.count = 0; > > if (put_user(0, &uattr->batch.count)) > > return -EFAULT; > > > > @@ -1742,6 +1743,8 @@ int generic_map_delete_batch(struct bpf_map *map, > > break; > > cond_resched(); > > } > > + > > + attr->batch.count = cp; > > if (copy_to_user(&uattr->batch.count, &cp, sizeof(cp))) > > err = -EFAULT; > > > > @@ -1751,7 +1754,7 @@ int generic_map_delete_batch(struct bpf_map *map, > > } > > > > int generic_map_update_batch(struct bpf_map *map, struct file *map_file, > > - const union bpf_attr *attr, > > + union bpf_attr *attr, > > union bpf_attr __user *uattr) > > { > > void __user *values = u64_to_user_ptr(attr->batch.values); > > @@ -1774,6 +1777,7 @@ int generic_map_update_batch(struct bpf_map *map, struct file *map_file, > > if (!max_count) > > return 0; > > > > + attr->batch.count = 0; > > if (put_user(0, &uattr->batch.count)) > > return -EFAULT; > > > > @@ -1802,6 +1806,7 @@ int generic_map_update_batch(struct bpf_map *map, struct file *map_file, > > cond_resched(); > > } > > > > + attr->batch.count = cp; > > if (copy_to_user(&uattr->batch.count, &cp, sizeof(cp))) > > err = -EFAULT; > > > > @@ -1813,9 +1818,8 @@ int generic_map_update_batch(struct bpf_map *map, struct file *map_file, > > > > #define MAP_LOOKUP_RETRIES 3 > > > > -int generic_map_lookup_batch(struct bpf_map *map, > > - const union bpf_attr *attr, > > - union bpf_attr __user *uattr) > > +int generic_map_lookup_batch(struct bpf_map *map, union bpf_attr *attr, > > + union bpf_attr __user *uattr) > > { > > void __user *uobatch = u64_to_user_ptr(attr->batch.out_batch); > > void __user *ubatch = u64_to_user_ptr(attr->batch.in_batch); > > @@ -1838,6 +1842,7 @@ int generic_map_lookup_batch(struct bpf_map *map, > > if (!max_count) > > return 0; > > > > + attr->batch.count = 0; > > if (put_user(0, &uattr->batch.count)) > > return -EFAULT; > > > > @@ -1903,6 +1908,7 @@ int generic_map_lookup_batch(struct bpf_map *map, > > if (err == -EFAULT) > > goto free_buf; > > > > + attr->batch.count = cp; > > You don't need to change generic_map_lookup_batch() here. It won't trigger > maybe_wait_bpf_programs(). > > > if ((copy_to_user(&uattr->batch.count, &cp, sizeof(cp)) || > > (cp && copy_to_user(uobatch, prev_key, map->key_size)))) > > err = -EFAULT; > > @@ -4926,7 +4932,7 @@ static int bpf_task_fd_query(const union bpf_attr *attr, > > err = fn(__VA_ARGS__); \ > > } while (0) > > > > -static int bpf_map_do_batch(const union bpf_attr *attr, > > +static int bpf_map_do_batch(union bpf_attr *attr, > > union bpf_attr __user *uattr, > > int cmd) > > { > > @@ -4966,7 +4972,8 @@ static int bpf_map_do_batch(const union bpf_attr *attr, > > BPF_DO_BATCH(map->ops->map_delete_batch, map, attr, uattr); > > err_put: > > if (has_write) { > > - maybe_wait_bpf_programs(map); > > + if (attr->batch.count) > > + maybe_wait_bpf_programs(map); > > Your code logic sounds correct but I feel you are optimizing for extreme > corner cases. In really esp production environment, a fault with something > like copy_to_user() should be extremely rare. So in my opinion, this optimization > is not needed. +1 the code is fine as-is.