Hello, I have managed to extract a reproducer that crashes the kernel. We observe the crash with all drivers that support XDP zerocopy (i40e, ixgbe, mlx5_core). The program source is attached (C++). Compile with a newer clang++ or g++ using flag -std=c++17. It must also be linked with libbpf. Use libbpf v0.0.5 and make sure to revert 5771dacd3dc2fdd041c51242819a9f212e04af55. The crash is a kernel NULL pointer dereference in xsk_umem_consume_tx. Hopefully this is useful, Kal On Wed, Aug 7, 2019 at 10:48 PM Kal Cutter Conley <kal.conley@xxxxxxxxxxx> wrote: > > Hello, > I am trying to get AF_XDP working with the i40e driver (Ethernet > Controller X710 for 10GbE SFP+). After bind() with XDP_ZEROCOPY the > kernel (machine) freezes hard. I have reproduced this on varying > kernel versions between 5.1 and 5.3-rc3 with 5.3 kernels freezing > also, but at a later stage. I tried replacing my XDP program with a > trivial one that simply returns XDP_PASS but it didn't help. On the > same system, the xdpsock sample does appear to work with the -z flag, > however. Are there any current known issues that could be causing > this? I will try to extract a minimal example that exercises the > freeze. > > Thanks, > Kal
#include <linux/if_link.h> #include <net/if.h> #include <numaif.h> #include <stddef.h> #include <stdint.h> #include <sys/mman.h> #include <sys/resource.h> #include <sys/socket.h> #include <sys/types.h> #include <sys/wait.h> #include <unistd.h> #include <algorithm> #include <cstdlib> #include <future> #include <iostream> #include <limits> #include <type_traits> #include "libbpf/include/uapi/linux/if_xdp.h" #include "libbpf/src/bpf.h" #include "libbpf/src/libbpf.h" #include "libbpf/src/xsk.h" #if !defined(AF_XDP) #define AF_XDP 44 #endif #if !defined(SOL_XDP) #define SOL_XDP 283 #endif constexpr size_t KERNEL_MAX_FRAMES = 256 * 1024; constexpr size_t KERNEL_RX_BATCH_SIZE = 16; constexpr size_t UMEM_HEADROOM = 16; constexpr size_t UMEM_SIZE = 500'000'000; static const char* XDP_PROGRAM_PATH = "xdp_bpf.o"; struct XskConsumerRing : public xsk_ring_cons { void* mapping; }; struct XskProducerRing : public xsk_ring_prod { void* mapping; }; void ErrorExit(const char* text) { std::cerr << text << ": errno: " << errno << "\n"; exit(1); } size_t ceil2(size_t x) noexcept { constexpr auto N = std::numeric_limits<size_t>::digits; x--; x |= (x >> 1); x |= (x >> 2); x |= (x >> 4); x |= (x >> (N > 8 ? 8 : 0)); x |= (x >> (N > 16 ? 16 : 0)); x |= (x >> (N > 32 ? 32 : 0)); x |= (x >> (N > 64 ? 64 : 0)); return x + 1; } template <typename T> constexpr std::enable_if_t<std::is_integral_v<T> && std::is_unsigned_v<T>, T> CeilDivide(T value, T divisor) { const T divided = value / divisor; return divided + (value % divisor != 0); } constexpr auto AlignSize(std::size_t size, std::size_t align) { return CeilDivide(size, align) * align; } template <typename RingType, typename DescType> RingType InitializeRing(size_t ring_size, const xdp_ring_offset& offsets, int xdp_fd, off_t xdp_pgoff) { void* mapping = mmap(0, offsets.desc + ring_size * sizeof(DescType), PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, xdp_fd, xdp_pgoff); if (mapping == MAP_FAILED) { ErrorExit("mmap"); } char* addr = static_cast<char*>(mapping); RingType ring; ring.cached_prod = 0; ring.cached_cons = 0; ring.mask = ring_size - 1; ring.size = ring_size; ring.producer = reinterpret_cast<uint32_t*>(addr + offsets.producer); ring.consumer = reinterpret_cast<uint32_t*>(addr + offsets.consumer); ring.ring = reinterpret_cast<DescType*>(addr + offsets.desc); ring.mapping = mapping; return ring; } void CreateXsk(uint32_t if_index, uint32_t queue_id, size_t nr_frames, int umem_fd) { const int xsk_fd = socket(AF_XDP, SOCK_RAW, 0); if (xsk_fd < 0) { ErrorExit("socket"); } int rx_size = ceil2(nr_frames); for (;; rx_size /= 2) { int ret = setsockopt(xsk_fd, SOL_XDP, XDP_RX_RING, &rx_size, sizeof(rx_size)); if (ret == 0) { break; } else if (errno != ENOMEM) { ErrorExit("socket"); } } xdp_mmap_offsets offsets{}; socklen_t offsets_len = sizeof(offsets); if (getsockopt(xsk_fd, SOL_XDP, XDP_MMAP_OFFSETS, &offsets, &offsets_len) < 0) { ErrorExit("getsockopt"); } if (offsets_len != sizeof(offsets)) { std::cerr << "XDP_MMAP_OFFSETS length (" << offsets_len << ") not supported\n"; exit(1); } auto rx = InitializeRing<XskConsumerRing, xdp_desc>( rx_size, offsets.rx, xsk_fd, XDP_PGOFF_RX_RING); sockaddr_xdp sxdp{}; sxdp.sxdp_family = AF_XDP; sxdp.sxdp_flags = XDP_SHARED_UMEM; sxdp.sxdp_ifindex = if_index; sxdp.sxdp_queue_id = queue_id; sxdp.sxdp_shared_umem_fd = umem_fd; if (bind(xsk_fd, reinterpret_cast<sockaddr*>(&sxdp), sizeof(sxdp)) < 0) { ErrorExit("bind"); } xdp_options options{}; socklen_t options_len = sizeof(options); if (getsockopt(xsk_fd, SOL_XDP, XDP_OPTIONS, &options, &options_len) < 0) { ErrorExit("getsockopt"); } std::cout << "Created AF_XDP socket (" << rx_size << " rx descs)" << ((options.flags & XDP_OPTIONS_ZEROCOPY) ? " (zc)" : "") << "\n"; } void CreateUmemXsk(uint32_t if_index, uint32_t queue_id, size_t umem_size) { const uint32_t page_size = getpagesize(); const uint32_t frame_size = page_size; const uint32_t frame_mask = frame_size - 1; constexpr size_t MAX_FRAMES = KERNEL_MAX_FRAMES - KERNEL_RX_BATCH_SIZE; const size_t nr_frames = std::min( AlignSize(umem_size, static_cast<size_t>(page_size)) / frame_size, MAX_FRAMES); const size_t nr_frames_ring = ceil2(nr_frames + KERNEL_RX_BATCH_SIZE); umem_size = frame_size * nr_frames; std::cout << "Allocating UMEM buffer of " << umem_size << " bytes\n"; int umem_fd = socket(AF_XDP, SOCK_RAW, 0); if (umem_fd == -1) { ErrorExit("socket"); } void* umem_area = mmap(0, umem_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_POPULATE, -1, 0); if (umem_area == MAP_FAILED) { ErrorExit("mmap"); } xdp_umem_reg umem_reg{}; umem_reg.addr = reinterpret_cast<uint64_t>(umem_area); umem_reg.len = umem_size; umem_reg.chunk_size = frame_size; umem_reg.headroom = UMEM_HEADROOM; if (setsockopt(umem_fd, SOL_XDP, XDP_UMEM_REG, &umem_reg, sizeof(umem_reg)) < 0) { ErrorExit("setsockopt"); } const int fr_size = nr_frames_ring; if (setsockopt(umem_fd, SOL_XDP, XDP_UMEM_FILL_RING, &fr_size, sizeof(fr_size)) < 0) { ErrorExit("setsockopt"); } const int cr_size = nr_frames_ring; if (setsockopt(umem_fd, SOL_XDP, XDP_UMEM_COMPLETION_RING, &cr_size, sizeof(cr_size)) < 0) { ErrorExit("setsockopt"); } const int dummy_rx_size = 1; if (setsockopt(umem_fd, SOL_XDP, XDP_RX_RING, &dummy_rx_size, sizeof(dummy_rx_size)) < 0) { ErrorExit("setsockopt"); } xdp_mmap_offsets offsets{}; socklen_t offsets_len = sizeof(offsets); if (getsockopt(umem_fd, SOL_XDP, XDP_MMAP_OFFSETS, &offsets, &offsets_len) < 0) { ErrorExit("getsockopt"); } if (offsets_len != sizeof(offsets)) { std::cerr << "XDP_MMAP_OFFSETS length (" << offsets_len << ") not supported\n"; exit(1); } auto fr = InitializeRing<XskProducerRing, uint64_t>( fr_size, offsets.fr, umem_fd, XDP_UMEM_PGOFF_FILL_RING); uint32_t idx; if (int ret = xsk_ring_prod__reserve(&fr, nr_frames, &idx); ret < 0) { errno = -ret; ErrorExit("xsk_ring_prod__reserve"); } else if (static_cast<size_t>(ret) != nr_frames) { ErrorExit("xsk_ring_prod__reserve"); } for (size_t i = 0; i < nr_frames; ++i) *xsk_ring_prod__fill_addr(&fr, idx++) = i * frame_size; xsk_ring_prod__submit(&fr, nr_frames); sockaddr_xdp sxdp{}; sxdp.sxdp_family = AF_XDP; sxdp.sxdp_flags = XDP_ZEROCOPY; sxdp.sxdp_ifindex = if_index; sxdp.sxdp_queue_id = queue_id; sxdp.sxdp_shared_umem_fd = -1; if (bind(umem_fd, reinterpret_cast<sockaddr*>(&sxdp), sizeof(sxdp)) < 0) { ErrorExit("bind"); } CreateXsk(if_index, queue_id, nr_frames, umem_fd); } int main(int argc, char* argv[]) { if (argc != 3) { std::cerr << "Usage: xdp_bomb <interface_name> <queue_id>\n"; return 1; } const char* if_name = argv[1]; const uint32_t queue_id = std::atoi(argv[2]); const size_t umem_size = UMEM_SIZE; const rlimit rlimit_infinity = {RLIM_INFINITY, RLIM_INFINITY}; if (setrlimit(RLIMIT_MEMLOCK, &rlimit_infinity) != 0) { ErrorExit("setrlimit"); } bpf_prog_load_attr prog_load_attr{}; prog_load_attr.file = XDP_PROGRAM_PATH; prog_load_attr.prog_type = BPF_PROG_TYPE_XDP; bpf_object* prog; int prog_fd; if (bpf_prog_load_xattr(&prog_load_attr, &prog, &prog_fd) != 0) { ErrorExit("bpf_prog_load_xattr"); } std::cout << "Loaded XDP program: `" << prog_load_attr.file << "`\n"; const int if_index = if_nametoindex(if_name); if (if_index == 0) { std::cerr << "Could not get interface index for `" << if_name << "`\n"; } (void)bpf_set_link_xdp_fd(if_index, -1, XDP_FLAGS_SKB_MODE); (void)bpf_set_link_xdp_fd(if_index, -1, XDP_FLAGS_DRV_MODE); (void)bpf_set_link_xdp_fd(if_index, -1, XDP_FLAGS_HW_MODE); if (int ret = bpf_set_link_xdp_fd(if_index, prog_fd, 0); ret < 0) { errno = -ret; ErrorExit("bpf_set_link_xdp_fd"); } uint32_t prog_id; if (int ret = bpf_get_link_xdp_id(if_index, &prog_id, 0); ret < 0) { errno = -ret; ErrorExit("bpf_get_link_xdp_id"); } std::cout << "XDP program attached to interface `" << if_name << "` (id=" << prog_id << ")\n"; for (int i = 0; i < 32; ++i) { pid_t pid = fork(); if (pid == -1) { ErrorExit("fork"); } else if (pid != 0) { wait(nullptr); } else { CreateUmemXsk(if_index, queue_id, umem_size); return 0; } } std::cout << "You win.\n"; return 0; }