A new member memcg_fd is introduced into bpf attr of BPF_MAP_CREATE command, which is the fd of an opened cgroup directory. In this cgroup, the memory subsystem must be enabled. This value is valid only when BPF_F_SELECTABLE_MEMCG is set in map_flags. Once the kernel get the memory cgroup from this fd, it will set this memcg into bpf map, then all the subsequent memory allocation of this map will be charge to the memcg. The map creation paths in libbpf are also changed consequently. Currently it is only supported for cgroup2 directory. The usage of this new member as follows, struct bpf_map_create_opts map_opts = { .sz = sizeof(map_opts), .map_flags = BPF_F_SELECTABLE_MEMCG, }; int memcg_fd, int map_fd; int key, value; memcg_fd = open("/cgroup2", O_DIRECTORY); if (memcg_fd < 0) { perror("memcg dir open"); return -1; } map_opts.memcg_fd = memcg_fd; map_fd = bpf_map_create(BPF_MAP_TYPE_HASH, "map_for_memcg", sizeof(key), sizeof(value), 1024, &map_opts); if (map_fd <= 0) { perror("map create"); return -1; } Signed-off-by: Yafang Shao <laoar.shao@xxxxxxxxx> --- include/uapi/linux/bpf.h | 2 ++ kernel/bpf/syscall.c | 47 ++++++++++++++++++++++++++-------- tools/include/uapi/linux/bpf.h | 2 ++ tools/lib/bpf/bpf.c | 1 + tools/lib/bpf/bpf.h | 3 ++- tools/lib/bpf/libbpf.c | 2 ++ 6 files changed, 46 insertions(+), 11 deletions(-) diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index d5fc1ea70b59..a6e02c8be924 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1296,6 +1296,8 @@ union bpf_attr { * struct stored as the * map value */ + __s32 memcg_fd; /* selectable memcg */ + __s32 :32; /* hole */ /* Any per-map-type extra fields * * BPF_MAP_TYPE_BLOOM_FILTER - the lowest 4 bits indicate the diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 6401cc417fa9..9900e2b87315 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -402,14 +402,30 @@ void bpf_map_free_id(struct bpf_map *map, bool do_idr_lock) } #ifdef CONFIG_MEMCG_KMEM -static void bpf_map_save_memcg(struct bpf_map *map) +static int bpf_map_save_memcg(struct bpf_map *map, union bpf_attr *attr) { - /* Currently if a map is created by a process belonging to the root - * memory cgroup, get_obj_cgroup_from_current() will return NULL. - * So we have to check map->objcg for being NULL each time it's - * being used. - */ - map->objcg = get_obj_cgroup_from_current(); + struct obj_cgroup *objcg; + struct cgroup *cgrp; + + if (attr->map_flags & BPF_F_SELECTABLE_MEMCG) { + cgrp = cgroup_get_from_fd(attr->memcg_fd); + if (IS_ERR(cgrp)) + return -EINVAL; + + objcg = get_obj_cgroup_from_cgroup(cgrp); + if (IS_ERR(objcg)) + return PTR_ERR(objcg); + } else { + /* Currently if a map is created by a process belonging to the root + * memory cgroup, get_obj_cgroup_from_current() will return NULL. + * So we have to check map->objcg for being NULL each time it's + * being used. + */ + objcg = get_obj_cgroup_from_current(); + } + + map->objcg = objcg; + return 0; } static void bpf_map_release_memcg(struct bpf_map *map) @@ -485,8 +501,9 @@ void __percpu *bpf_map_alloc_percpu(const struct bpf_map *map, size_t size, } #else -static void bpf_map_save_memcg(struct bpf_map *map) +static int bpf_map_save_memcg(struct bpf_map *map, union bpf_attr *attr) { + return 0; } static void bpf_map_release_memcg(struct bpf_map *map) @@ -530,13 +547,18 @@ void *bpf_map_container_alloc(union bpf_attr *attr, u64 size, int numa_node) { struct bpf_map *map; void *container; + int ret; container = __bpf_map_area_alloc(size, numa_node, false); if (!container) return ERR_PTR(-ENOMEM); map = (struct bpf_map *)container; - bpf_map_save_memcg(map); + ret = bpf_map_save_memcg(map, attr); + if (ret) { + bpf_map_area_free(container); + return ERR_PTR(ret); + } return container; } @@ -547,6 +569,7 @@ void *bpf_map_container_mmapable_alloc(union bpf_attr *attr, u64 size, struct bpf_map *map; void *container; void *ptr; + int ret; /* kmalloc'ed memory can't be mmap'ed, use explicit vmalloc */ ptr = __bpf_map_area_alloc(size, numa_node, true); @@ -555,7 +578,11 @@ void *bpf_map_container_mmapable_alloc(union bpf_attr *attr, u64 size, container = ptr + align - offset; map = (struct bpf_map *)container; - bpf_map_save_memcg(map); + ret = bpf_map_save_memcg(map, attr); + if (ret) { + bpf_map_area_free(ptr); + return ERR_PTR(ret); + } return ptr; } diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index d5fc1ea70b59..a6e02c8be924 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -1296,6 +1296,8 @@ union bpf_attr { * struct stored as the * map value */ + __s32 memcg_fd; /* selectable memcg */ + __s32 :32; /* hole */ /* Any per-map-type extra fields * * BPF_MAP_TYPE_BLOOM_FILTER - the lowest 4 bits indicate the diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index 5eb0df90eb2b..662ce5808386 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -199,6 +199,7 @@ int bpf_map_create(enum bpf_map_type map_type, attr.map_extra = OPTS_GET(opts, map_extra, 0); attr.numa_node = OPTS_GET(opts, numa_node, 0); attr.map_ifindex = OPTS_GET(opts, map_ifindex, 0); + attr.memcg_fd = OPTS_GET(opts, memcg_fd, 0); fd = sys_bpf_fd(BPF_MAP_CREATE, &attr, attr_sz); return libbpf_err_errno(fd); diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index 88a7cc4bd76f..481aad49422b 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -51,8 +51,9 @@ struct bpf_map_create_opts { __u32 numa_node; __u32 map_ifindex; + __u32 memcg_fd; }; -#define bpf_map_create_opts__last_field map_ifindex +#define bpf_map_create_opts__last_field memcg_fd LIBBPF_API int bpf_map_create(enum bpf_map_type map_type, const char *map_name, diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 50d41815f431..86916d550031 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -505,6 +505,7 @@ struct bpf_map { bool pinned; bool reused; bool autocreate; + __s32 memcg_fd; __u64 map_extra; }; @@ -4928,6 +4929,7 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b create_attr.map_ifindex = map->map_ifindex; create_attr.map_flags = def->map_flags; create_attr.numa_node = map->numa_node; + create_attr.memcg_fd = map->memcg_fd; create_attr.map_extra = map->map_extra; if (bpf_map__is_struct_ops(map)) -- 2.17.1