Re: [RFC PATCH bpf-next v2 1/2] bpf, x64: Fix tailcall infinite loop

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 




On 2023/8/18 23:12, Leon Hwang wrote:
> From commit ebf7d1f508a73871 ("bpf, x64: rework pro/epilogue and tailcall
> handling in JIT"), the tailcall on x64 works better than before.
> 
> From commit e411901c0b775a3a ("bpf: allow for tailcalls in BPF subprograms
> for x64 JIT"), tailcall is able to run in BPF subprograms on x64.
> 
> From commit 5b92a28aae4dd0f8 ("bpf: Support attaching tracing BPF program
> to other BPF programs"), BPF program is able to trace other BPF programs.
> 
> How about combining them all together?
> 
> 1. FENTRY/FEXIT on a BPF subprogram.
> 2. A tailcall runs in the BPF subprogram.
> 3. The tailcall calls itself.
> 
> As a result, a tailcall infinite loop comes up. And the loop would halt
> the machine.
> 
> As we know, in tail call context, the tail_call_cnt propagates by stack
> and RAX register between BPF subprograms. So do it in trampolines.
> 
> Signed-off-by: Leon Hwang <hffilwlqm@xxxxxxxxx>
> ---
>  arch/x86/net/bpf_jit_comp.c | 40 +++++++++++++++++++++++++++++--------
>  include/linux/bpf.h         |  5 +++++
>  kernel/bpf/trampoline.c     |  4 ++--
>  kernel/bpf/verifier.c       | 31 +++++++++++++++++++++-------
>  4 files changed, 63 insertions(+), 17 deletions(-)
> 
> diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
> index a5930042139d3..1ad17d7de5eee 100644
> --- a/arch/x86/net/bpf_jit_comp.c
> +++ b/arch/x86/net/bpf_jit_comp.c
> @@ -303,8 +303,12 @@ static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf,
>  	prog += X86_PATCH_SIZE;
>  	if (!ebpf_from_cbpf) {
>  		if (tail_call_reachable && !is_subprog)
> +			/* When it's the entry of the whole tailcall context,
> +			 * zeroing rax means initialising tail_call_cnt.
> +			 */
>  			EMIT2(0x31, 0xC0); /* xor eax, eax */
>  		else
> +			// Keep the same instruction layout.
>  			EMIT2(0x66, 0x90); /* nop2 */
>  	}
>  	EMIT1(0x55);             /* push rbp */
> @@ -1018,6 +1022,10 @@ static void emit_shiftx(u8 **pprog, u32 dst_reg, u8 src_reg, bool is64, u8 op)
>  
>  #define INSN_SZ_DIFF (((addrs[i] - addrs[i - 1]) - (prog - temp)))
>  
> +/* mov rax, qword ptr [rbp - rounded_stack_depth - 8] */
> +#define RESTORE_TAIL_CALL_CNT(stack)				\
> +	EMIT3_off32(0x48, 0x8B, 0x85, -round_up(stack, 8) - 8)
> +
>  static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image,
>  		  int oldproglen, struct jit_context *ctx, bool jmp_padding)
>  {
> @@ -1623,9 +1631,7 @@ st:			if (is_imm8(insn->off))
>  
>  			func = (u8 *) __bpf_call_base + imm32;
>  			if (tail_call_reachable) {
> -				/* mov rax, qword ptr [rbp - rounded_stack_depth - 8] */
> -				EMIT3_off32(0x48, 0x8B, 0x85,
> -					    -round_up(bpf_prog->aux->stack_depth, 8) - 8);
> +				RESTORE_TAIL_CALL_CNT(bpf_prog->aux->stack_depth);
>  				if (!imm32)
>  					return -EINVAL;
>  				offs = 7 + x86_call_depth_emit_accounting(&prog, func);
> @@ -2298,7 +2304,9 @@ static int invoke_bpf_mod_ret(const struct btf_func_model *m, u8 **pprog,
>   * push rbp
>   * mov rbp, rsp
>   * sub rsp, 16                     // space for skb and dev
> - * push rbx                        // temp regs to pass start time
> + * mov qword ptr [rbp - 40], rbx   // temp regs to pass start time
> + * mov rax, 2                      // cache number of argument to rax
> + * mov qword ptr [rbp - 32], rax   // save number of argument to stack
>   * mov qword ptr [rbp - 16], rdi   // save skb pointer to stack
>   * mov qword ptr [rbp - 8], rsi    // save dev pointer to stack
>   * call __bpf_prog_enter           // rcu_read_lock and preempt_disable
> @@ -2323,7 +2331,9 @@ static int invoke_bpf_mod_ret(const struct btf_func_model *m, u8 **pprog,
>   * push rbp
>   * mov rbp, rsp
>   * sub rsp, 24                     // space for skb, dev, return value
> - * push rbx                        // temp regs to pass start time
> + * mov qword ptr [rbp - 40], rbx   // temp regs to pass start time
> + * mov rax, 2                      // cache number of argument to rax
> + * mov qword ptr [rbp - 32], rax   // save number of argument to stack
>   * mov qword ptr [rbp - 24], rdi   // save skb pointer to stack
>   * mov qword ptr [rbp - 16], rsi   // save dev pointer to stack
>   * call __bpf_prog_enter           // rcu_read_lock and preempt_disable
> @@ -2400,6 +2410,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
>  	 *                     [ ...        ]
>  	 *                     [ stack_arg2 ]
>  	 * RBP - arg_stack_off [ stack_arg1 ]
> +	 * RSP                 [ tail_call_cnt ] BPF_TRAMP_F_TAIL_CALL_CTX
>  	 */
>  
>  	/* room for return value of orig_call or fentry prog */
> @@ -2464,6 +2475,8 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
>  	else
>  		/* sub rsp, stack_size */
>  		EMIT4(0x48, 0x83, 0xEC, stack_size);
> +	if (flags & BPF_TRAMP_F_TAIL_CALL_CTX)
> +		EMIT1(0x50);		/* push rax */
>  	/* mov QWORD PTR [rbp - rbx_off], rbx */
>  	emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_6, -rbx_off);
>  
> @@ -2516,9 +2529,15 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i
>  		restore_regs(m, &prog, regs_off);
>  		save_args(m, &prog, arg_stack_off, true);
>  
> +		if (flags & BPF_TRAMP_F_TAIL_CALL_CTX)
> +			/* Before calling the original function, restore the
> +			 * tail_call_cnt from stack to rax.
> +			 */
> +			RESTORE_TAIL_CALL_CNT(stack_size);
> +
>  		if (flags & BPF_TRAMP_F_ORIG_STACK) {
> -			emit_ldx(&prog, BPF_DW, BPF_REG_0, BPF_REG_FP, 8);
> -			EMIT2(0xff, 0xd0); /* call *rax */
> +			emit_ldx(&prog, BPF_DW, BPF_REG_6, BPF_REG_FP, 8);
> +			EMIT2(0xff, 0xd3); /* call *rbx */ // FIXME: Confirm 0xd3?

To avoiding rax conflict with tail call, change the calling register
from rax to rbx

But, I'm unable to confirm the opcode.

Then, I asked chatGPT to list `call` and corresponding opcode:

Certainly! Here's a table that provides `call` instructions along with
their corresponding opcodes in x86-64 assembly:

| `call` Register | Opcode (Hex) | Opcode (Binary) |
|-----------------|--------------|-----------------|
| `rax`           | `FF D0`      | `11111111 11010000` |
| `rcx`           | `FF D1`      | `11111111 11010001` |
| `rdx`           | `FF D2`      | `11111111 11010010` |
| `rbx`           | `FF D3`      | `11111111 11010011` |
| `rsp`           | `FF D4`      | `11111111 11010100` |
| `rbp`           | `FF D5`      | `11111111 11010101` |
| `rsi`           | `FF D6`      | `11111111 11010110` |
| `rdi`           | `FF D7`      | `11111111 11010111` |
| `r8`            | `41 FF D0`   | `01000001 11111111 11010000` |
| `r9`            | `41 FF D1`   | `01000001 11111111 11010001` |
| `r10`           | `41 FF D2`   | `01000001 11111111 11010010` |
| `r11`           | `41 FF D3`   | `01000001 11111111 11010011` |
| `r12`           | `41 FF D4`   | `01000001 11111111 11010100` |
| `r13`           | `41 FF D5`   | `01000001 11111111 11010101` |
| `r14`           | `41 FF D6`   | `01000001 11111111 11010110` |
| `r15`           | `41 FF D7`   | `01000001 11111111 11010111` |

EMIT2(0xff, 0xd3); /* call *rbx */, is it right?

Thanks,
Leon

[...]




[Index of Archives]     [Linux Samsung SoC]     [Linux Rockchip SoC]     [Linux Actions SoC]     [Linux for Synopsys ARC Processors]     [Linux NFS]     [Linux NILFS]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]


  Powered by Linux