Fwd: XDP CPU maps & shared umem

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hi,

Short Version
I’ve encountered an issue redirecting traffic to another CPU while using shared UMEM. The frame arrived at the redirected CPU with invalid receive queue. This caused a kernel crash on the patch that supports redirecting to packets to any queue with the same umem due to a test based on the receive queue.

Long Version
I’ve created a test program that redirects traffic between two interfaces (replacing the MAC address). See the code below. It starts two separate processes on two different cores. When directing traffic directly to an XSK socket all works well. I’m running on kernel 5.15.117 with Magnus patch for shared umem.

 The next step was an XDP program that redirects each packet to the other CPU, and the xdp CPU program redirect the packets to an XDP socket. Initially this resulted in a kernel crash. The reason is that the receive queue is not received correctly once the packet switches CPU.

I’ve patched the kernel so it does sanity check and does not crash but defaults to queue 0 just for the umem test.

The kernel no longer crashes but I don’t get the traffic to the XSK sockets, and I log the receive queues and they do not seem random:
On first interface:
0xC1323D50
0xC1343D19
0xC1343D50

On second interface:
0xC134BD50
0xC135BD50

Very few bits differences between each of the “receive queues”. I couldn’t really find something in my program with those or similar values, or in the packets so it makes it less likely it comes from something I do in userspace. Seems to suggest some flags field? The values are not always the same across reboots but pretty close.

Am I doing something wrong? It seems pretty basic. Most of the code in the XDP program was just added for debugging. The only oddity perhaps is using the shared umem.

Thanks,
Yuval.

I’m attaching the program and values from my "debug map”. In the debug map:
0. Values 0/1 - Which type of program ran (with/without CPU redirection).
1. Values 2XY - packet redirected from core X to core Y (1st stage).
2. Values 4XY - packet on core X returns action Y (1st stage).
(These suggest that 1st stage worked correctly).
3. Values 1XXYY - packet receive queue XX on core YY (XX = 10 is invalid receive queue outside 2..5).
4. 2XXXX - packet receive queue on any core (XXXX - 4999 imeans queue greater than 5000).
5. Random entry - a receive queue.

Debug map values without CPU redirection:
+------+------+--------------+
| Port | Key  |     Received |
+------+------+--------------+
|    0 | 1032 |          504 |
|    0 | 1024 |     37005992 |
|    0 | 1034 |     37544052 |
|    0 |    0 |     74546875 |
|    1 | 1034 |     35765961 |
|    1 |    0 |     71492773 |
|    1 | 1024 |     35729107 |
|    1 | 1032 |          277 |
+------+------+--------------+

Debug map value with CPU redirection (no other change):
+------+------+--------------+
| Port | Key  |     Received |
+------+------+--------------+
|    0 |    1 |         3400 |
|    0 |  223 |         2947 |
|    0 |  232 |          454 |
|    0 |  424 |          454 |
|    0 |  434 |         2947 |
|    0 | 11002 |          454 |
|    0 | 11003 |         2947 |
|    0 | 20000 |          106 |
|    0 | 20016 |            7 |
|    0 | 20024 |         1058 |
|    0 | 20582 |            2 |
|    0 | 24999 |         2231 |
|    0 | 3241295184 |          339 |
|    0 | 3241426201 |            3 |
|    0 | 3241426256 |         1891 |
|    1 |    1 |         1013 |
|    1 |  223 |          610 |
|    1 |  232 |          404 |
|    1 |  424 |          404 |
|    1 |  434 |          610 |
|    1 | 11002 |          404 |
|    1 | 11003 |          610 |
|    1 | 20000 |           17 |
|    1 | 20024 |          309 |
|    1 | 24999 |          689 |
|    1 | 3241459024 |          309 |
|    1 | 3241524560 |          381 |
+------+------+--------------+

The XDP code (#if 0 is for the no CPU redirection):
/* SPDX-License-Identifier: GPL-2.0 */

#include <linux/if_ether.h>
#include <linux/bpf.h>
#include <linux/in.h>

#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>

#include "xdp/parsing_helpers.h"

/* Redirect to SP queues map. */
struct
{
    __uint (type, BPF_MAP_TYPE_XSKMAP);
    __type (key, __u32);
    __type (value, __u32);
    __uint (max_entries, 128);
} xsks_map SEC(".maps");

/* Redirect to CPUs map. */
struct
{
    __uint (type, BPF_MAP_TYPE_CPUMAP);
    __uint (key_size, sizeof (__u32));
    __uint (value_size, sizeof (struct bpf_cpumap_val));
    __uint (max_entries, 32);
} cpu_map SEC(".maps");

/* Statistics and debug maps. */
struct
{
    __uint (type, BPF_MAP_TYPE_PERCPU_ARRAY);
    __type (key, __u32);
    __type (value, __u32);
    __uint (max_entries, 64);
} xdp_stats_map SEC(".maps");

struct
{
    __uint (type, BPF_MAP_TYPE_HASH);
    __type (key, __u32);
    __type (value, __u32);
    __uint (max_entries, 64);
} xdp_debug_proto SEC(".maps");

static __u32 oneval = 1;

int my_parse_ethhdr (struct hdr_cursor* nh, void* dataend, struct ethhdr** eth)
{
    __u16 ethtype;
    *eth = nh->pos;
    if ((void*) &(*eth) [1] > dataend)
    {
      return (-1);
    }

    ethtype = (*eth)->h_proto;
    nh->pos = &(*eth) [1];
    if (ethtype == bpf_ntohs (ETH_P_8021Q))
    {
      struct vlan_hdr* vlan = (struct vlan_hdr*) &(*eth) [1];
if ((void*) &vlan [1] > dataend)
{
            return (-1);
}
        ethtype = vlan->h_vlan_encapsulated_proto;
/* inc_debug_map (10000+(bpf_ntohs (vlan->h_vlan_TCI) & 0x0FFF)); */
/* inc_debug_map (vlan->h_vlan_TCI); */
nh->pos = &vlan [1];
    }

    return (ethtype);
}

static void inc_debug_map (__u32 key)
{
#ifdef WITH_STATS
    bpf_map_update_elem (&xdp_debug_proto,
                         &key, (__u32*) &oneval, BPF_NOEXIST);
    __u32* rec = bpf_map_lookup_elem (&xdp_debug_proto, (__u32*) &key);
    if (rec != NULL)
    {
      ++(*rec);
    }
#endif
}



SEC ("xdp")
int rdwr_xsk_prog (struct xdp_md *ctx)
{
    enum xdp_action action;
    int index = ctx->rx_queue_index;
    void* dataend = (void *)(long)ctx->data_end;
    void* data = (void *)(long)ctx->data;
    int stage = 0;

    struct hdr_cursor nh;

    __u32 *pkt_count;
    struct ethhdr* eth;

#ifdef WITH_STATS
    pkt_count = bpf_map_lookup_elem(&xdp_stats_map, &index);
    if (pkt_count != NULL)
    {
      ++*pkt_count;
    }
#endif

    /* Parse Ethernet & VLAN */
    nh.pos = data;
    int ethtype = my_parse_ethhdr (&nh, dataend, &eth);
    if (ethtype != bpf_ntohs (0x0800))
    {
      return (XDP_PASS);
    }

    /* Direct XSK redirect. */
#if 0
    inc_debug_map (stage);
    action =  bpf_redirect_map (&xsks_map, index, XDP_PASS);
    inc_debug_map (1000 + index*10 + action);
#endif
    /* CPU map redirect. */
    __u32 cpuid = bpf_get_smp_processor_id ();
    __u32 targetcpu = cpuid ^ 0x01;
    ++stage;
    inc_debug_map (stage);
    inc_debug_map (200 + cpuid * 10 + targetcpu);
    action =  bpf_redirect_map (&cpu_map, targetcpu, XDP_PASS);
    inc_debug_map (400 + targetcpu * 10 + action);
    return (action);
}

SEC ("xdp/cpumap")
int rdwr_cpu_prog(struct xdp_md *ctx)
{
    __u64* dataend64 = (__u64*) (long) ctx->data_end;
    __u64* data64 = (__u64*) (long) ctx->data;
    __u32 rxqueue = ctx->rx_queue_index;
    if (rxqueue < 5000)
    {
      inc_debug_map (20000 + rxqueue);
    }
    else
    {
      inc_debug_map (rxqueue);
inc_debug_map (24999);
    }

    rxqueue = (rxqueue >= 2 && rxqueue < 6) ? rxqueue : 10;
    __u32 cpuid = bpf_get_smp_processor_id ();
    cpuid = (cpuid < 12) ? cpuid : 20;
    inc_debug_map (10000 + 100*rxqueue + cpuid);

    return bpf_redirect_map (&xsks_map, bpf_get_smp_processor_id (), 0);
}






[Index of Archives]     [Linux Networking Development]     [Fedora Linux Users]     [Linux SCTP]     [DCCP]     [Gimp]     [Yosemite Campsites]

  Powered by Linux