In order to implement devices controller with cgroup v2 we need to add support for BPF programs, cgroup v2 doesn't have devices controller. This introduces required helpers wrapping linux syscalls. Signed-off-by: Pavel Hrdina <phrdina@xxxxxxxxxx> --- configure.ac | 5 + include/libvirt/virterror.h | 2 + src/libvirt_private.syms | 16 ++ src/util/Makefile.inc.am | 2 + src/util/virbpf.c | 438 ++++++++++++++++++++++++++++++++++++ src/util/virbpf.h | 259 +++++++++++++++++++++ src/util/virerror.c | 2 + 7 files changed, 724 insertions(+) create mode 100644 src/util/virbpf.c create mode 100644 src/util/virbpf.h diff --git a/configure.ac b/configure.ac index dcd78f64bf..fbbc88303a 100644 --- a/configure.ac +++ b/configure.ac @@ -876,6 +876,11 @@ AC_CHECK_DECLS([clock_serv_t, host_get_clock_service, clock_get_time], #include <mach/mach.h> ]) +# Check if we have new enough kernel to support BPF devices for cgroups v2 +if test "$with_linux" = "yes"; then + AC_CHECK_DECLS([BPF_PROG_QUERY], [], [], [#include <linux/bpf.h>]) +fi + # Check if we need to look for ifconfig if test "$want_ifconfig" = "yes"; then AC_PATH_PROG([IFCONFIG_PATH], [ifconfig]) diff --git a/include/libvirt/virterror.h b/include/libvirt/virterror.h index 6dc83a17cc..f1fb9d4721 100644 --- a/include/libvirt/virterror.h +++ b/include/libvirt/virterror.h @@ -134,6 +134,8 @@ typedef enum { VIR_FROM_FIREWALLD = 68, /* Error from firewalld */ VIR_FROM_DOMAIN_CHECKPOINT = 69, /* Error from domain checkpoint */ + VIR_FROM_BPF = 70, /* Error from BPF code */ + # ifdef VIR_ENUM_SENTINELS VIR_ERR_DOMAIN_LAST # endif diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index a03cf0b645..56db5d92cd 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -1519,6 +1519,22 @@ virBitmapToDataBuf; virBitmapToString; +# util/virbpf.h +virBPFAttachProg; +virBPFCreateMap; +virBPFDeleteElem; +virBPFDetachProg; +virBPFGetMap; +virBPFGetMapInfo; +virBPFGetNextElem; +virBPFGetProg; +virBPFGetProgInfo; +virBPFLoadProg; +virBPFLookupElem; +virBPFQueryProg; +virBPFUpdateElem; + + # util/virbuffer.h virBufferAdd; virBufferAddBuffer; diff --git a/src/util/Makefile.inc.am b/src/util/Makefile.inc.am index c757f5a6ae..0c2ee03c2f 100644 --- a/src/util/Makefile.inc.am +++ b/src/util/Makefile.inc.am @@ -20,6 +20,8 @@ UTIL_SOURCES = \ util/virautoclean.h \ util/virbitmap.c \ util/virbitmap.h \ + util/virbpf.c \ + util/virbpf.h \ util/virbuffer.c \ util/virbuffer.h \ util/virperf.c \ diff --git a/src/util/virbpf.c b/src/util/virbpf.c new file mode 100644 index 0000000000..a79a97d578 --- /dev/null +++ b/src/util/virbpf.c @@ -0,0 +1,438 @@ +/* + * virbpf.c: methods for eBPF + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library. If not, see + * <http://www.gnu.org/licenses/>. + */ +#include <config.h> + +#include <sys/syscall.h> + +#include "internal.h" + +#include "viralloc.h" +#include "virbpf.h" +#include "virerror.h" +#include "virfile.h" +#include "virlog.h" +#include "virstring.h" + +VIR_LOG_INIT("util.bpf"); + +#define VIR_FROM_THIS VIR_FROM_BPF + +#if HAVE_DECL_BPF_PROG_QUERY +int +virBPFCreateMap(unsigned int mapType, + unsigned int keySize, + unsigned int valSize, + unsigned int maxEntries) +{ + union bpf_attr attr; + + memset(&attr, 0, sizeof(attr)); + + attr.map_type = mapType; + attr.key_size = keySize; + attr.value_size = valSize; + attr.max_entries = maxEntries; + + return syscall(SYS_bpf, BPF_MAP_CREATE, &attr, sizeof(attr)); +} + + +# define LOG_BUF_SIZE (256 * 1024) + +int +virBPFLoadProg(struct bpf_insn *insns, + int progType, + unsigned int insnCnt) +{ + VIR_AUTOFREE(char *) logbuf = NULL; + int progfd = -1; + union bpf_attr attr; + + if (VIR_ALLOC_N(logbuf, LOG_BUF_SIZE) < 0) + return -1; + + memset(&attr, 0, sizeof(attr)); + + attr.prog_type = progType; + attr.insn_cnt = (uint32_t)insnCnt; + attr.insns = (uint64_t)insns; + attr.license = (uint64_t)"GPL"; + attr.log_buf = (uint64_t)logbuf; + attr.log_size = LOG_BUF_SIZE; + attr.log_level = 1; + + progfd = syscall(SYS_bpf, BPF_PROG_LOAD, &attr, sizeof(attr)); + + if (progfd < 0) + VIR_DEBUG("%s", logbuf); + + return progfd; +} + + +int +virBPFAttachProg(int progfd, + int targetfd, + int attachType) +{ + union bpf_attr attr; + + memset(&attr, 0, sizeof(attr)); + + attr.target_fd = targetfd; + attr.attach_bpf_fd = progfd; + attr.attach_type = attachType; + + return syscall(SYS_bpf, BPF_PROG_ATTACH, &attr, sizeof(attr)); +} + + +int +virBPFDetachProg(int progfd, + int targetfd, + int attachType) +{ + union bpf_attr attr; + + memset(&attr, 0, sizeof(attr)); + + attr.target_fd = targetfd; + attr.attach_bpf_fd = progfd; + attr.attach_type = attachType; + + return syscall(SYS_bpf, BPF_PROG_DETACH, &attr, sizeof(attr)); +} + + +int +virBPFQueryProg(int targetfd, + unsigned int maxprogids, + int attachType, + unsigned int *progcnt, + void *progids) +{ + union bpf_attr attr; + int rc; + + memset(&attr, 0, sizeof(attr)); + + attr.query.target_fd = targetfd; + attr.query.attach_type = attachType; + attr.query.prog_cnt = maxprogids; + attr.query.prog_ids = (uint64_t)progids; + + rc = syscall(SYS_bpf, BPF_PROG_QUERY, &attr, sizeof(attr)); + + if (rc >= 0) + *progcnt = attr.query.prog_cnt; + + return rc; +} + + +int +virBPFGetProg(unsigned int id) +{ + union bpf_attr attr; + + memset(&attr, 0, sizeof(attr)); + + attr.prog_id = id; + + return syscall(SYS_bpf, BPF_PROG_GET_FD_BY_ID, &attr, sizeof(attr)); +} + + +int +virBPFGetProgInfo(int progfd, + struct bpf_prog_info *info, + unsigned int **mapIDs) +{ + union bpf_attr attr; + int rc; + + memset(&attr, 0, sizeof(attr)); + + attr.info.bpf_fd = progfd; + attr.info.info_len = sizeof(struct bpf_prog_info); + attr.info.info = (uint64_t)info; + + rc = syscall(SYS_bpf, BPF_OBJ_GET_INFO_BY_FD, &attr, sizeof(attr)); + if (rc < 0) + return rc; + + if (mapIDs && info->nr_map_ids > 0) { + unsigned int maplen = info->nr_map_ids; + VIR_AUTOFREE(unsigned int *) retmapIDs = NULL; + + if (VIR_ALLOC_N(retmapIDs, maplen) < 0) + return -1; + + memset(info, 0, sizeof(struct bpf_prog_info)); + info->nr_map_ids = maplen; + info->map_ids = (uint64_t)retmapIDs; + + memset(&attr, 0, sizeof(attr)); + attr.info.bpf_fd = progfd; + attr.info.info_len = sizeof(struct bpf_prog_info); + attr.info.info = (uint64_t)info; + + rc = syscall(SYS_bpf, BPF_OBJ_GET_INFO_BY_FD, &attr, sizeof(attr)); + if (rc < 0) + return rc; + + VIR_STEAL_PTR(*mapIDs, retmapIDs); + } + + return rc; +} + + +int +virBPFGetMap(unsigned int id) +{ + union bpf_attr attr; + + memset(&attr, 0, sizeof(attr)); + + attr.map_id = id; + + return syscall(SYS_bpf, BPF_MAP_GET_FD_BY_ID, &attr, sizeof(attr)); +} + + +int +virBPFGetMapInfo(int mapfd, + struct bpf_map_info *info) +{ + union bpf_attr attr; + + memset(&attr, 0, sizeof(attr)); + + attr.info.bpf_fd = mapfd; + attr.info.info_len = sizeof(struct bpf_map_info); + attr.info.info = (uint64_t)info; + + return syscall(SYS_bpf, BPF_OBJ_GET_INFO_BY_FD, &attr, sizeof(attr)); +} + + +int +virBPFLookupElem(int mapfd, + void *key, + void *val) +{ + union bpf_attr attr; + + memset(&attr, 0, sizeof(attr)); + + attr.map_fd = mapfd; + attr.key = (uint64_t)key; + attr.value = (uint64_t)val; + + return syscall(SYS_bpf, BPF_MAP_LOOKUP_ELEM, &attr, sizeof(attr)); +} + + +int +virBPFGetNextElem(int mapfd, + void *key, + void *nextKey) +{ + union bpf_attr attr; + + memset(&attr, 0, sizeof(attr)); + + attr.map_fd = mapfd; + attr.key = (uint64_t)key; + attr.next_key = (uint64_t)nextKey; + + return syscall(SYS_bpf, BPF_MAP_GET_NEXT_KEY, &attr, sizeof(attr)); +} + + +int +virBPFUpdateElem(int mapfd, + void *key, + void *val) +{ + union bpf_attr attr; + + memset(&attr, 0, sizeof(attr)); + + attr.map_fd = mapfd; + attr.key = (uint64_t)key; + attr.value = (uint64_t)val; + + return syscall(SYS_bpf, BPF_MAP_UPDATE_ELEM, &attr, sizeof(attr)); +} + + +int +virBPFDeleteElem(int mapfd, + void *key) +{ + union bpf_attr attr; + + memset(&attr, 0, sizeof(attr)); + + attr.map_fd = mapfd; + attr.key = (uint64_t)key; + + return syscall(SYS_bpf, BPF_MAP_DELETE_ELEM, &attr, sizeof(attr)); +} +#else /* HAVE_DECL_BPF_PROG_QUERY */ +int +virBPFCreateMap(unsigned int mapType ATTRIBUTE_UNUSED, + unsigned int keySize ATTRIBUTE_UNUSED, + unsigned int valSize ATTRIBUTE_UNUSED, + unsigned int maxEntries ATTRIBUTE_UNUSED) +{ + virReportSystemError(ENOSYS, "%s", + _("BPF not supported with this kernel")); + return -1; +} + + +int +virBPFLoadProg(struct bpf_insn *insns ATTRIBUTE_UNUSED, + int progType ATTRIBUTE_UNUSED, + unsigned int insnCnt ATTRIBUTE_UNUSED) +{ + virReportSystemError(ENOSYS, "%s", + _("BPF not supported with this kernel")); + return -1; +} + + +int +virBPFAttachProg(int progfd ATTRIBUTE_UNUSED, + int targetfd ATTRIBUTE_UNUSED, + int attachType ATTRIBUTE_UNUSED) +{ + virReportSystemError(ENOSYS, "%s", + _("BPF not supported with this kernel")); + return -1; +} + + +int +virBPFDetachProg(int progfd ATTRIBUTE_UNUSED, + int targetfd ATTRIBUTE_UNUSED, + int attachType ATTRIBUTE_UNUSED) +{ + virReportSystemError(ENOSYS, "%s", + _("BPF not supported with this kernel")); + return -1; +} + + +int +virBPFQueryProg(int targetfd ATTRIBUTE_UNUSED, + unsigned int maxprogids ATTRIBUTE_UNUSED, + int attachType ATTRIBUTE_UNUSED, + unsigned int *progcnt ATTRIBUTE_UNUSED, + void *progids ATTRIBUTE_UNUSED) +{ + virReportSystemError(ENOSYS, "%s", + _("BPF not supported with this kernel")); + return -1; +} + + +int +virBPFGetProg(unsigned int id ATTRIBUTE_UNUSED) +{ + virReportSystemError(ENOSYS, "%s", + _("BPF not supported with this kernel")); + return -1; +} + + +int +virBPFGetProgInfo(int progfd ATTRIBUTE_UNUSED, + struct bpf_prog_info *info ATTRIBUTE_UNUSED, + unsigned int **mapIDs ATTRIBUTE_UNUSED) +{ + virReportSystemError(ENOSYS, "%s", + _("BPF not supported with this kernel")); + return -1; +} + + +int +virBPFGetMap(unsigned int id ATTRIBUTE_UNUSED) +{ + virReportSystemError(ENOSYS, "%s", + _("BPF not supported with this kernel")); + return -1; +} + + +int +virBPFGetMapInfo(int mapfd ATTRIBUTE_UNUSED, + struct bpf_map_info *info ATTRIBUTE_UNUSED) +{ + virReportSystemError(ENOSYS, "%s", + _("BPF not supported with this kernel")); + return -1; +} + + +int +virBPFLookupElem(int mapfd ATTRIBUTE_UNUSED, + void *key ATTRIBUTE_UNUSED, + void *val ATTRIBUTE_UNUSED) +{ + virReportSystemError(ENOSYS, "%s", + _("BPF not supported with this kernel")); + return -1; +} + + +int +virBPFGetNextElem(int mapfd ATTRIBUTE_UNUSED, + void *key ATTRIBUTE_UNUSED, + void *nextKey ATTRIBUTE_UNUSED) +{ + virReportSystemError(ENOSYS, "%s", + _("BPF not supported with this kernel")); + return -1; +} + + +int +virBPFUpdateElem(int mapfd ATTRIBUTE_UNUSED, + void *key ATTRIBUTE_UNUSED, + void *val ATTRIBUTE_UNUSED) +{ + virReportSystemError(ENOSYS, "%s", + _("BPF not supported with this kernel")); + return -1; +} + + +int +virBPFDeleteElem(int mapfd ATTRIBUTE_UNUSED, + void *key ATTRIBUTE_UNUSED) +{ + virReportSystemError(ENOSYS, "%s", + _("BPF not supported with this kernel")); + return -1; +} +#endif /* HAVE_DECL_BPF_PROG_QUERY */ diff --git a/src/util/virbpf.h b/src/util/virbpf.h new file mode 100644 index 0000000000..b5874e1e8d --- /dev/null +++ b/src/util/virbpf.h @@ -0,0 +1,259 @@ +/* + * virbpf.h: methods for eBPF + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library. If not, see + * <http://www.gnu.org/licenses/>. + */ + +#ifndef LIBVIRT_VIRBPF_H +# define LIBVIRT_VIRBPF_H + +# if HAVE_DECL_BPF_PROG_QUERY + +# include <linux/bpf.h> + +/* ALU ops on registers, bpf_add|sub|...: dst_reg += src_reg */ + +# define VIR_BPF_ALU64_REG(op, dst, src) \ + ((struct bpf_insn) { \ + .code = BPF_ALU64 | BPF_OP(op) | BPF_X, \ + .dst_reg = dst, \ + .src_reg = src, \ + .off = 0, \ + .imm = 0, \ + }) + +/* ALU ops on immediates, bpf_add|sub|...: dst_reg += imm32 */ + +# define VIR_BPF_ALU64_IMM(op, dst, immval) \ + ((struct bpf_insn) { \ + .code = BPF_ALU64 | BPF_OP(op) | BPF_K, \ + .dst_reg = dst, \ + .src_reg = 0, \ + .off = 0, \ + .imm = immval, \ + }) + +/* mov of registers, dst_reg = src_reg */ + +# define VIR_BPF_MOV64_REG(dst, src) \ + ((struct bpf_insn) { \ + .code = BPF_ALU64 | BPF_MOV | BPF_X, \ + .dst_reg = dst, \ + .src_reg = src, \ + .off = 0, \ + .imm = 0, \ + }) + +/* mov of immediates, dst_reg = imm32 */ + +# define VIR_BPF_MOV64_IMM(dst, immval) \ + ((struct bpf_insn) { \ + .code = BPF_ALU64 | BPF_MOV | BPF_K, \ + .dst_reg = dst, \ + .src_reg = 0, \ + .off = 0, \ + .imm = immval, \ + }) + +/* helper to encode 16 byte instruction */ + +# define _VIR_BPF_LD_IMM64_RAW(dst, src, immval) \ + ((struct bpf_insn) { \ + .code = BPF_LD | BPF_DW | BPF_IMM, \ + .dst_reg = dst, \ + .src_reg = src, \ + .off = 0, \ + .imm = (uint32_t)immval, \ + }), \ + ((struct bpf_insn) { \ + .code = 0, \ + .dst_reg = 0, \ + .src_reg = 0, \ + .off = 0, \ + .imm = ((uint64_t)immval) >> 32, \ + }) + +/* encodes single 'load 64-bit immediate' insn, dst_reg = imm ll */ + +# define VIR_BPF_LD_IMM64(dst, imm) \ + _VIR_BPF_LD_IMM64_RAW(dst, 0, imm) + +/* pseudo VIR_BPF_LD_IMM64 insn used to refer to process-local map_fd */ + +# define VIR_BPF_LD_MAP_FD(dst, mapfd) \ + _VIR_BPF_LD_IMM64_RAW(dst, 1, mapfd) + +/* memory load, dst_reg = *(size *) (src_reg + off16) */ + +# define VIR_BPF_LDX_MEM(size, dst, src, offval) \ + ((struct bpf_insn) { \ + .code = BPF_LDX | BPF_SIZE(size) | BPF_MEM, \ + .dst_reg = dst, \ + .src_reg = src, \ + .off = offval, \ + .imm = 0, \ + }) + +/* memory store of registers, *(size *) (dst_reg + off16) = src_reg */ + +# define VIR_BPF_STX_MEM(size, dst, src, offval) \ + ((struct bpf_insn) { \ + .code = BPF_STX | BPF_SIZE(size) | BPF_MEM, \ + .dst_reg = dst, \ + .src_reg = src, \ + .off = offval, \ + .imm = 0, \ + }) + +/* memory store of immediates, *(size *) (dst_reg + off16) = imm32 */ + +# define VIR_BPF_ST_MEM(size, dst, immval, offval) \ + ((struct bpf_insn) { \ + .code = BPF_ST | BPF_SIZE(size) | BPF_MEM, \ + .dst_reg = dst, \ + .src_reg = 0, \ + .off = offval, \ + .imm = immval, \ + }) + +/* conditional jumps against registers, if (dst_reg 'op' src_reg) goto pc + off16 */ + +# define VIR_BPF_JMP_REG(op, dst, src, offval) \ + ((struct bpf_insn) { \ + .code = BPF_JMP | BPF_OP(op) | BPF_X, \ + .dst_reg = dst, \ + .src_reg = src, \ + .off = offval, \ + .imm = 0, \ + }) + +/* conditional jumps against immediates, if (dst_reg 'op' imm32) goto pc + off16 */ + +# define VIR_BPF_JMP_IMM(op, dst, immval, offval) \ + ((struct bpf_insn) { \ + .code = BPF_JMP | BPF_OP(op) | BPF_K, \ + .dst_reg = dst, \ + .src_reg = 0, \ + .off = offval, \ + .imm = immval, \ + }) + +/* call eBPF function, call imm32 */ + +# define VIR_BPF_CALL_INSN(func) \ + ((struct bpf_insn) { \ + .code = BPF_JMP | BPF_CALL, \ + .dst_reg = 0, \ + .src_reg = 0, \ + .off = 0, \ + .imm = func, \ + }) + +/* program exit */ + +# define VIR_BPF_EXIT_INSN() \ + ((struct bpf_insn) { \ + .code = BPF_JMP | BPF_EXIT, \ + .dst_reg = 0, \ + .src_reg = 0, \ + .off = 0, \ + .imm = 0, \ + }) + +# else /* HAVE_DECL_BPF_PROG_QUERY */ + +struct bpf_prog_info; +struct bpf_map_info; +struct bpf_insn; + +# define VIR_BPF_ALU64_REG(op, dst, src) +# define VIR_BPF_ALU64_IMM(op, dst, immval) +# define VIR_BPF_MOV64_REG(dst, src) +# define VIR_BPF_MOV64_IMM(dst, immval) +# define VIR_BPF_LD_IMM64(dst, imm) +# define VIR_BPF_LD_MAP_FD(dst, mapfd) +# define VIR_BPF_LDX_MEM(size, dst, src, offval) +# define VIR_BPF_STX_MEM(size, dst, src, offval) +# define VIR_BPF_ST_MEM(size, dst, immval, offval) +# define VIR_BPF_JMP_REG(op, dst, src, offval) +# define VIR_BPF_JMP_IMM(op, dst, immval, offval) +# define VIR_BPF_CALL_INSN(func) +# define VIR_BPF_EXIT_INSN() + +# endif /* HAVE_DECL_BPF_PROG_QUERY */ + +int +virBPFCreateMap(unsigned int mapType, + unsigned int keySize, + unsigned int valSize, + unsigned int maxEntries); + +int +virBPFGetMapInfo(int mapfd, + struct bpf_map_info *info); + +int +virBPFLoadProg(struct bpf_insn *insns, + int progType, + unsigned int insnCnt); + +int +virBPFAttachProg(int progfd, + int targetfd, + int attachType); + +int +virBPFDetachProg(int progfd, + int targetfd, + int attachType); + +int +virBPFQueryProg(int targetfd, + unsigned int maxprogids, + int attachType, + unsigned int *progcnt, + void *progids); + +int +virBPFGetProg(unsigned int id); + +int +virBPFGetProgInfo(int progfd, + struct bpf_prog_info *info, + unsigned int **mapIDs); + +int +virBPFGetMap(unsigned int id); + +int +virBPFLookupElem(int mapfd, + void *key, + void *val); + +int +virBPFGetNextElem(int mapfd, + void *key, + void *nextKey); + +int +virBPFUpdateElem(int mapfd, + void *key, + void *val); + +int +virBPFDeleteElem(int mapfd, + void *key); + +#endif /* LIBVIRT_VIRBPF_H */ diff --git a/src/util/virerror.c b/src/util/virerror.c index 37b5b2f3f9..f8944698d7 100644 --- a/src/util/virerror.c +++ b/src/util/virerror.c @@ -142,6 +142,8 @@ VIR_ENUM_IMPL(virErrorDomain, "Resource control", "FirewallD", "Domain Checkpoint", + + "BPF", /* 70 */ ); -- 2.20.1 -- libvir-list mailing list libvir-list@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/libvir-list