Re: [PATCH LLVM 2/3] bpf: enable sub-register code-gen for XADD

Yonghong Song <yhs@xxxxxx> · Thu, 28 Feb 2019 06:51:20 +0000

On 2/27/19 12:50 PM, Jiong Wang wrote:
> Support sub-register code-gen for XADD is like supporting any other Load
> and Store patterns.
> 
> No new instruction is introduced.
> 
>    lock *(u32 *)(r1 + 0) += w2
> 
> has exactly the same underlying insn as:
> 
>    lock *(u32 *)(r1 + 0) += r2
> 
> BPF_W width modifier has guaranteed they behave the same at runtime. This
> patch merely teaches BPF back-end that BPF_W width modifier could work
> GPR32 register class and that's all needed for sub-register code-gen
> support for XADD.
> 
> test/CodeGen/BPF/xadd.ll updated to include sub-register code-gen tests.
> 
> A new testcase test/CodeGen/BPF/xadd_legal.ll is added to make sure the
> legal case could pass on all code-gen modes. It could also test dead Def
> check on GPR32. If there is no proper handling like what has been done
> inside BPFMIChecking.cpp:hasLivingDefs, then this testcase will fail.
> 
> Signed-off-by: Jiong Wang <jiong.wang@xxxxxxxxxxxxx>
> ---
>   lib/Target/BPF/BPFInstrInfo.td   | 28 ++++++++++++++++++++++++----
>   lib/Target/BPF/BPFMIChecking.cpp |  4 +++-
>   test/CodeGen/BPF/xadd.ll         |  2 ++
>   test/CodeGen/BPF/xadd_legal.ll   | 26 ++++++++++++++++++++++++++
>   4 files changed, 55 insertions(+), 5 deletions(-)
>   create mode 100644 test/CodeGen/BPF/xadd_legal.ll
> 
> diff --git a/lib/Target/BPF/BPFInstrInfo.td b/lib/Target/BPF/BPFInstrInfo.td
> index 36724b8..c44702a 100644
> --- a/lib/Target/BPF/BPFInstrInfo.td
> +++ b/lib/Target/BPF/BPFInstrInfo.td
> @@ -614,11 +614,31 @@ class XADD<BPFWidthModifer SizeOp, string OpcodeStr, PatFrag OpNode>
>     let BPFClass = BPF_STX;
>   }
>   
> +class XADD32<BPFWidthModifer SizeOp, string OpcodeStr, PatFrag OpNode>
> +    : TYPE_LD_ST<BPF_XADD.Value, SizeOp.Value,
> +                 (outs GPR32:$dst),
> +                 (ins MEMri:$addr, GPR32:$val),
> +                 "lock *("#OpcodeStr#" *)($addr) += $val",
> +                 [(set GPR32:$dst, (OpNode ADDRri:$addr, GPR32:$val))]> {
> +  bits<4> dst;
> +  bits<20> addr;
> +
> +  let Inst{51-48} = addr{19-16}; // base reg
> +  let Inst{55-52} = dst;
> +  let Inst{47-32} = addr{15-0}; // offset
> +  let BPFClass = BPF_STX;
> +}
> +
>   let Constraints = "$dst = $val" in {
> -def XADD32 : XADD<BPF_W, "u32", atomic_load_add_32>;
> -def XADD64 : XADD<BPF_DW, "u64", atomic_load_add_64>;
> -// undefined def XADD16 : XADD<1, "xadd16", atomic_load_add_16>;
> -// undefined def XADD8  : XADD<2, "xadd8", atomic_load_add_8>;
> +  let Predicates = [BPFNoALU32] in {
> +    def XADDW : XADD<BPF_W, "u32", atomic_load_add_32>;
> +  }
> +
> +  let Predicates = [BPFHasALU32], DecoderNamespace = "BPFALU32" in {
> +    def XADDW32 : XADD32<BPF_W, "u32", atomic_load_add_32>;
> +  }
> +
> +  def XADDD : XADD<BPF_DW, "u64", atomic_load_add_64>;

The XADD64 is changed to XADDD. I guess I am okay with that since
it is consistent with LDD/STD.

>   }
>   
>   // bswap16, bswap32, bswap64
> diff --git a/lib/Target/BPF/BPFMIChecking.cpp b/lib/Target/BPF/BPFMIChecking.cpp
> index 7a4ba96..938e539 100644
> --- a/lib/Target/BPF/BPFMIChecking.cpp
> +++ b/lib/Target/BPF/BPFMIChecking.cpp
> @@ -165,7 +165,9 @@ static bool hasLivingDefs(const MachineInstr &MI,
>   void BPFMIPreEmitChecking::checkingIllegalXADD(void) {
>     for (MachineBasicBlock &MBB : *MF) {
>       for (MachineInstr &MI : MBB) {
> -      if (MI.getOpcode() != BPF::XADD32 && MI.getOpcode() != BPF::XADD64)
> +      if (MI.getOpcode() != BPF::XADDW &&
> +          MI.getOpcode() != BPF::XADDD &&
> +          MI.getOpcode() != BPF::XADDW32)
>           continue;
>   
>         LLVM_DEBUG(MI.dump());
> diff --git a/test/CodeGen/BPF/xadd.ll b/test/CodeGen/BPF/xadd.ll
> index c5f2620..49f0c10 100644
> --- a/test/CodeGen/BPF/xadd.ll
> +++ b/test/CodeGen/BPF/xadd.ll
> @@ -1,5 +1,7 @@
>   ; RUN: not llc -march=bpfel < %s 2>&1 | FileCheck %s
>   ; RUN: not llc -march=bpfeb < %s 2>&1 | FileCheck %s
> +; RUN: not llc -march=bpfel -mattr=+alu32 < %s 2>&1 | FileCheck %s
> +; RUN: not llc -march=bpfeb -mattr=+alu32 < %s 2>&1 | FileCheck %s
>   
>   ; This file is generated with the source command and source
>   ; $ clang -target bpf -O2 -g -S -emit-llvm t.c
> diff --git a/test/CodeGen/BPF/xadd_legal.ll b/test/CodeGen/BPF/xadd_legal.ll
> new file mode 100644
> index 0000000..0d30084
> --- /dev/null
> +++ b/test/CodeGen/BPF/xadd_legal.ll
> @@ -0,0 +1,26 @@
> +; RUN: llc -march=bpfel < %s 2>&1 | FileCheck --check-prefix=CHECK-64 %s
> +; RUN: llc -march=bpfeb < %s 2>&1 | FileCheck --check-prefix=CHECK-64 %s
> +; RUN: llc -march=bpfel -mattr=+alu32 < %s 2>&1 | FileCheck --check-prefix=CHECK-32 %s
> +; RUN: llc -march=bpfeb -mattr=+alu32 < %s 2>&1 | FileCheck --check-prefix=CHECK-32 %s
> +
> +; This file is generated with the source command and source
> +; $ clang -target bpf -O2 -S -emit-llvm t.c
> +; $ cat t.c
> +; int test(int *ptr, unsigned long long a) {
> +;    __sync_fetch_and_add(ptr, a);
> +;    return *ptr;
> +; }
> +;
> +; NOTE: passing unsigned long long as the second operand of __sync_fetch_and_add
> +; could effectively create sub-register reference coming from indexing a full
> +; register which could then exerceise hasLivingDefs inside BPFMIChecker.cpp.
> +
> +define dso_local i32 @test(i32* nocapture %ptr, i64 %a) {
> +entry:
> +  %conv = trunc i64 %a to i32
> +  %0 = atomicrmw add i32* %ptr, i32 %conv seq_cst
> +; CHECK-64: lock *(u32 *)(r1 + 0) += r2
> +; CHECK-32: lock *(u32 *)(r1 + 0) += w2
> +  %1 = load i32, i32* %ptr, align 4
> +  ret i32 %1
> +}
>