Re: i40e: Kernel freezes with XDP_ZEROCOPY

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hello,
I have managed to extract a reproducer that crashes the kernel. We
observe the crash with all drivers that support XDP zerocopy (i40e,
ixgbe, mlx5_core). The program source is attached (C++). Compile with
a newer clang++ or g++ using flag -std=c++17. It must also be linked
with libbpf. Use libbpf v0.0.5 and make sure to revert
5771dacd3dc2fdd041c51242819a9f212e04af55.

The crash is a kernel NULL pointer dereference in xsk_umem_consume_tx.

Hopefully this is useful,
Kal

On Wed, Aug 7, 2019 at 10:48 PM Kal Cutter Conley
<kal.conley@xxxxxxxxxxx> wrote:
>
> Hello,
> I am trying to get AF_XDP working with the i40e driver (Ethernet
> Controller X710 for 10GbE SFP+). After bind() with XDP_ZEROCOPY the
> kernel (machine) freezes hard. I have reproduced this on varying
> kernel versions between 5.1 and 5.3-rc3 with 5.3 kernels freezing
> also, but at a later stage. I tried replacing my XDP program with a
> trivial one that simply returns XDP_PASS but it didn't help. On the
> same system, the xdpsock sample does appear to work with the -z flag,
> however. Are there any current known issues that could be causing
> this? I will try to extract a minimal example that exercises the
> freeze.
>
> Thanks,
> Kal
#include <linux/if_link.h>
#include <net/if.h>
#include <numaif.h>
#include <stddef.h>
#include <stdint.h>
#include <sys/mman.h>
#include <sys/resource.h>
#include <sys/socket.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <unistd.h>
#include <algorithm>
#include <cstdlib>
#include <future>
#include <iostream>
#include <limits>
#include <type_traits>

#include "libbpf/include/uapi/linux/if_xdp.h"
#include "libbpf/src/bpf.h"
#include "libbpf/src/libbpf.h"
#include "libbpf/src/xsk.h"

#if !defined(AF_XDP)
#define AF_XDP 44
#endif

#if !defined(SOL_XDP)
#define SOL_XDP 283
#endif

constexpr size_t KERNEL_MAX_FRAMES = 256 * 1024;
constexpr size_t KERNEL_RX_BATCH_SIZE = 16;
constexpr size_t UMEM_HEADROOM = 16;
constexpr size_t UMEM_SIZE = 500'000'000;

static const char* XDP_PROGRAM_PATH = "xdp_bpf.o";

struct XskConsumerRing : public xsk_ring_cons {
    void* mapping;
};

struct XskProducerRing : public xsk_ring_prod {
    void* mapping;
};

void ErrorExit(const char* text) {
    std::cerr << text << ": errno: " << errno << "\n";
    exit(1);
}

size_t ceil2(size_t x) noexcept {
    constexpr auto N = std::numeric_limits<size_t>::digits;
    x--;
    x |= (x >> 1);
    x |= (x >> 2);
    x |= (x >> 4);
    x |= (x >> (N > 8 ? 8 : 0));
    x |= (x >> (N > 16 ? 16 : 0));
    x |= (x >> (N > 32 ? 32 : 0));
    x |= (x >> (N > 64 ? 64 : 0));
    return x + 1;
}

template <typename T>
constexpr std::enable_if_t<std::is_integral_v<T> && std::is_unsigned_v<T>, T>
CeilDivide(T value, T divisor) {
    const T divided = value / divisor;
    return divided + (value % divisor != 0);
}

constexpr auto AlignSize(std::size_t size, std::size_t align) {
    return CeilDivide(size, align) * align;
}

template <typename RingType, typename DescType>
RingType InitializeRing(size_t ring_size,
                        const xdp_ring_offset& offsets,
                        int xdp_fd,
                        off_t xdp_pgoff) {
    void* mapping = mmap(0, offsets.desc + ring_size * sizeof(DescType),
                         PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE,
                         xdp_fd, xdp_pgoff);
    if (mapping == MAP_FAILED) {
        ErrorExit("mmap");
    }

    char* addr = static_cast<char*>(mapping);

    RingType ring;
    ring.cached_prod = 0;
    ring.cached_cons = 0;
    ring.mask = ring_size - 1;
    ring.size = ring_size;
    ring.producer = reinterpret_cast<uint32_t*>(addr + offsets.producer);
    ring.consumer = reinterpret_cast<uint32_t*>(addr + offsets.consumer);
    ring.ring = reinterpret_cast<DescType*>(addr + offsets.desc);
    ring.mapping = mapping;
    return ring;
}

void CreateXsk(uint32_t if_index, uint32_t queue_id, size_t nr_frames, int umem_fd) {
    const int xsk_fd = socket(AF_XDP, SOCK_RAW, 0);
    if (xsk_fd < 0) {
        ErrorExit("socket");
    }

    int rx_size = ceil2(nr_frames);
    for (;; rx_size /= 2) {
        int ret = setsockopt(xsk_fd, SOL_XDP, XDP_RX_RING, &rx_size,
                             sizeof(rx_size));
        if (ret == 0) {
            break;
        } else if (errno != ENOMEM) {
            ErrorExit("socket");
        }
    }

    xdp_mmap_offsets offsets{};
    socklen_t offsets_len = sizeof(offsets);
    if (getsockopt(xsk_fd, SOL_XDP, XDP_MMAP_OFFSETS, &offsets, &offsets_len) < 0) {
        ErrorExit("getsockopt");
    }
    if (offsets_len != sizeof(offsets)) {
        std::cerr << "XDP_MMAP_OFFSETS length (" << offsets_len
                  << ") not supported\n";
        exit(1);
    }

    auto rx = InitializeRing<XskConsumerRing, xdp_desc>(
            rx_size, offsets.rx, xsk_fd, XDP_PGOFF_RX_RING);

    sockaddr_xdp sxdp{};
    sxdp.sxdp_family = AF_XDP;
    sxdp.sxdp_flags = XDP_SHARED_UMEM;
    sxdp.sxdp_ifindex = if_index;
    sxdp.sxdp_queue_id = queue_id;
    sxdp.sxdp_shared_umem_fd = umem_fd;
    if (bind(xsk_fd, reinterpret_cast<sockaddr*>(&sxdp), sizeof(sxdp)) < 0) {
        ErrorExit("bind");
    }

    xdp_options options{};
    socklen_t options_len = sizeof(options);
    if (getsockopt(xsk_fd, SOL_XDP, XDP_OPTIONS, &options, &options_len) < 0) {
        ErrorExit("getsockopt");
    }

    std::cout << "Created AF_XDP socket (" << rx_size << " rx descs)"
              << ((options.flags & XDP_OPTIONS_ZEROCOPY) ? " (zc)" : "")
              << "\n";
}

void CreateUmemXsk(uint32_t if_index, uint32_t queue_id, size_t umem_size) {
    const uint32_t page_size = getpagesize();
    const uint32_t frame_size = page_size;
    const uint32_t frame_mask = frame_size - 1;

    constexpr size_t MAX_FRAMES = KERNEL_MAX_FRAMES - KERNEL_RX_BATCH_SIZE;
    const size_t nr_frames = std::min(
            AlignSize(umem_size, static_cast<size_t>(page_size)) / frame_size,
            MAX_FRAMES);
    const size_t nr_frames_ring = ceil2(nr_frames + KERNEL_RX_BATCH_SIZE);
    umem_size = frame_size * nr_frames;

    std::cout << "Allocating UMEM buffer of " << umem_size << " bytes\n";

    int umem_fd = socket(AF_XDP, SOCK_RAW, 0);
    if (umem_fd == -1) {
        ErrorExit("socket");
    }

    void* umem_area = mmap(0, umem_size, PROT_READ | PROT_WRITE,
                           MAP_PRIVATE | MAP_ANONYMOUS | MAP_POPULATE, -1, 0);
    if (umem_area == MAP_FAILED) {
        ErrorExit("mmap");
    }

    xdp_umem_reg umem_reg{};
    umem_reg.addr = reinterpret_cast<uint64_t>(umem_area);
    umem_reg.len = umem_size;
    umem_reg.chunk_size = frame_size;
    umem_reg.headroom = UMEM_HEADROOM;
    if (setsockopt(umem_fd, SOL_XDP, XDP_UMEM_REG, &umem_reg, sizeof(umem_reg)) < 0) {
        ErrorExit("setsockopt");
    }

    const int fr_size = nr_frames_ring;
    if (setsockopt(umem_fd, SOL_XDP, XDP_UMEM_FILL_RING, &fr_size, sizeof(fr_size)) < 0) {
        ErrorExit("setsockopt");
    }

    const int cr_size = nr_frames_ring;
    if (setsockopt(umem_fd, SOL_XDP, XDP_UMEM_COMPLETION_RING, &cr_size, sizeof(cr_size)) < 0) {
        ErrorExit("setsockopt");
    }

    const int dummy_rx_size = 1;
    if (setsockopt(umem_fd, SOL_XDP, XDP_RX_RING, &dummy_rx_size, sizeof(dummy_rx_size)) < 0) {
        ErrorExit("setsockopt");
    }

    xdp_mmap_offsets offsets{};
    socklen_t offsets_len = sizeof(offsets);
    if (getsockopt(umem_fd, SOL_XDP, XDP_MMAP_OFFSETS, &offsets, &offsets_len) < 0) {
        ErrorExit("getsockopt");
    }
    if (offsets_len != sizeof(offsets)) {
        std::cerr << "XDP_MMAP_OFFSETS length (" << offsets_len
                  << ") not supported\n";
        exit(1);
    }

    auto fr = InitializeRing<XskProducerRing, uint64_t>(
            fr_size, offsets.fr, umem_fd, XDP_UMEM_PGOFF_FILL_RING);

    uint32_t idx;
    if (int ret = xsk_ring_prod__reserve(&fr, nr_frames, &idx); ret < 0) {
        errno = -ret;
        ErrorExit("xsk_ring_prod__reserve");
    } else if (static_cast<size_t>(ret) != nr_frames) {
        ErrorExit("xsk_ring_prod__reserve");
    }

    for (size_t i = 0; i < nr_frames; ++i)
        *xsk_ring_prod__fill_addr(&fr, idx++) = i * frame_size;

    xsk_ring_prod__submit(&fr, nr_frames);

    sockaddr_xdp sxdp{};
    sxdp.sxdp_family = AF_XDP;
    sxdp.sxdp_flags = XDP_ZEROCOPY;
    sxdp.sxdp_ifindex = if_index;
    sxdp.sxdp_queue_id = queue_id;
    sxdp.sxdp_shared_umem_fd = -1;
    if (bind(umem_fd, reinterpret_cast<sockaddr*>(&sxdp), sizeof(sxdp)) < 0) {
        ErrorExit("bind");
    }

    CreateXsk(if_index, queue_id, nr_frames, umem_fd);
}

int main(int argc, char* argv[]) {
    if (argc != 3) {
        std::cerr << "Usage: xdp_bomb <interface_name> <queue_id>\n";
        return 1;
    }
    const char* if_name = argv[1];
    const uint32_t queue_id = std::atoi(argv[2]);

    const size_t umem_size = UMEM_SIZE;

    const rlimit rlimit_infinity = {RLIM_INFINITY, RLIM_INFINITY};
    if (setrlimit(RLIMIT_MEMLOCK, &rlimit_infinity) != 0) {
        ErrorExit("setrlimit");
    }

    bpf_prog_load_attr prog_load_attr{};
    prog_load_attr.file = XDP_PROGRAM_PATH;
    prog_load_attr.prog_type = BPF_PROG_TYPE_XDP;
    bpf_object* prog;
    int prog_fd;
    if (bpf_prog_load_xattr(&prog_load_attr, &prog, &prog_fd) != 0) {
        ErrorExit("bpf_prog_load_xattr");
    }

    std::cout << "Loaded XDP program: `" << prog_load_attr.file << "`\n";

    const int if_index = if_nametoindex(if_name);
    if (if_index == 0) {
        std::cerr << "Could not get interface index for `" << if_name << "`\n";
    }

    (void)bpf_set_link_xdp_fd(if_index, -1, XDP_FLAGS_SKB_MODE);
    (void)bpf_set_link_xdp_fd(if_index, -1, XDP_FLAGS_DRV_MODE);
    (void)bpf_set_link_xdp_fd(if_index, -1, XDP_FLAGS_HW_MODE);

    if (int ret = bpf_set_link_xdp_fd(if_index, prog_fd, 0);
        ret < 0) {
        errno = -ret;
        ErrorExit("bpf_set_link_xdp_fd");
    }

    uint32_t prog_id;
    if (int ret = bpf_get_link_xdp_id(if_index, &prog_id, 0); ret < 0) {
        errno = -ret;
        ErrorExit("bpf_get_link_xdp_id");
    }

    std::cout << "XDP program attached to interface `" << if_name
              << "` (id=" << prog_id << ")\n";

    for (int i = 0; i < 32; ++i) {
        pid_t pid = fork();
        if (pid == -1) {
            ErrorExit("fork");
        } else if (pid != 0) {
            wait(nullptr);
        } else {
            CreateUmemXsk(if_index, queue_id, umem_size);
            return 0;
        }
    }

    std::cout << "You win.\n";
    return 0;
}

[Index of Archives]     [Linux Networking Development]     [Fedora Linux Users]     [Linux SCTP]     [DCCP]     [Gimp]     [Yosemite Campsites]

  Powered by Linux