Re: [PATCH bpf-next 2/6] bpf, net: rework cookie generator as per-cpu one

Eric Dumazet <eric.dumazet@xxxxxxxxx> · Fri, 25 Sep 2020 09:49:29 +0200

On 9/25/20 12:03 AM, Daniel Borkmann wrote:
> On 9/24/20 8:58 PM, Eric Dumazet wrote:
>> On 9/24/20 8:21 PM, Daniel Borkmann wrote:
> [...]
>>> diff --git a/include/linux/cookie.h b/include/linux/cookie.h
>>> new file mode 100644
>>> index 000000000000..2488203dc004
>>> --- /dev/null
>>> +++ b/include/linux/cookie.h
>>> @@ -0,0 +1,41 @@
>>> +/* SPDX-License-Identifier: GPL-2.0 */
>>> +#ifndef __LINUX_COOKIE_H
>>> +#define __LINUX_COOKIE_H
>>> +
>>> +#include <linux/atomic.h>
>>> +#include <linux/percpu.h>
>>> +
>>> +struct gen_cookie {
>>> +    u64 __percpu    *local_last;
>>> +    atomic64_t     shared_last ____cacheline_aligned_in_smp;
>>> +};
>>> +
>>> +#define COOKIE_LOCAL_BATCH    4096
>>> +
>>> +#define DEFINE_COOKIE(name)                    \
>>> +    static DEFINE_PER_CPU(u64, __##name);            \
>>> +    static struct gen_cookie name = {            \
>>> +        .local_last    = &__##name,            \
>>> +        .shared_last    = ATOMIC64_INIT(0),        \
>>> +    }
>>> +
>>> +static inline u64 gen_cookie_next(struct gen_cookie *gc)
>>> +{
>>> +    u64 *local_last = &get_cpu_var(*gc->local_last);
>>> +    u64 val = *local_last;
>>> +
>>> +    if (__is_defined(CONFIG_SMP) &&
>>> +        unlikely((val & (COOKIE_LOCAL_BATCH - 1)) == 0)) {
>>> +        s64 next = atomic64_add_return(COOKIE_LOCAL_BATCH,
>>> +                           &gc->shared_last);
>>> +        val = next - COOKIE_LOCAL_BATCH;
>>> +    }
>>> +    val++;
>>> +    if (unlikely(!val))
>>> +        val++;
>>> +    *local_last = val;
>>> +    put_cpu_var(local_last);
>>> +    return val;
>>
>> This is not interrupt safe.
>>
>> I think sock_gen_cookie() can be called from interrupt context.
>>
>> get_next_ino() is only called from process context, that is what I used get_cpu_var()
>> and put_cpu_var()
> 
> Hmm, agree, good point. Need to experiment a bit more .. initial thinking
> potentially something like the below could do where we fall back to atomic
> counter iff we encounter nesting (which should be an extremely rare case
> normally).
> 
> BPF progs where this can be called from are non-preemptible, so we could
> actually move the temp preempt_disable/enable() from get/put_cpu_var() into
> a wrapper func for slow path non-BPF users as well.
> 
> static inline u64 gen_cookie_next(struct gen_cookie *gc)
> {
>         u64 val;
> 

I presume you would use a single structure to hold level_nesting and local_last
in the same cache line.

struct pcpu_gen_cookie {
    int level_nesting;
    u64 local_last;
} __aligned(16);

>         if (likely(this_cpu_inc_return(*gc->level_nesting) == 1)) {
>                 u64 *local_last = this_cpu_ptr(gc->local_last);
> 
>                 val = *local_last;
>                 if (__is_defined(CONFIG_SMP) &&
>                     unlikely((val & (COOKIE_LOCAL_BATCH - 1)) == 0)) {
>                         s64 next = atomic64_add_return(COOKIE_LOCAL_BATCH,
>                                                        &gc->shared_last);
>                         val = next - COOKIE_LOCAL_BATCH;
>                 }
>                 val++;

>                 if (unlikely(!val))
>                         val++;

Note that we really expect this wrapping will never happen, with 64bit value.
(We had to take care of the wrapping in get_next_ino() as it was dealing with 32bit values)

>                 *local_last = val;
>         } else {
>                 val = atomic64_add_return(COOKIE_LOCAL_BATCH,
>                                           &gc->shared_last);

Or val = atomic64_dec_return(&reverse_counter)

With reverse_counter initial value set to ATOMIC64_INIT(0) ? 

This will start sending 'big cookies like 0xFFFFFFFFxxxxxxxx' to make sure applications
are not breaking with them, after few months of uptime.

This would also not consume COOKIE_LOCAL_BATCH units per value,
but this seems minor based on the available space.

>         }
>         this_cpu_dec(*gc->level_nesting);
>         return val;
> }
> 
> Thanks,
> Daniel