From: Yixing Liu <liuyixing1@xxxxxxxxxx> The newly added SVE Direct WQE function only supports sve ldr and str instructions, this patch adds ldr and str assembly to achieve this function. Signed-off-by: Yixing Liu <liuyixing1@xxxxxxxxxx> --- CMakeLists.txt | 2 ++ buildlib/RDMA_EnableCStd.cmake | 7 +++++++ providers/hns/CMakeLists.txt | 2 ++ providers/hns/hns_roce_u_hw_v2.c | 10 +++++++++- util/mmio.h | 11 +++++++++++ 5 files changed, 31 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 0cb68264..ee1024d5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -417,6 +417,8 @@ endif() RDMA_Check_SSE(HAVE_TARGET_SSE) +RDMA_Check_SVE(HAVE_TARGET_SVE) + # Enable development support features # Prune unneeded shared libraries during linking RDMA_AddOptLDFlag(CMAKE_EXE_LINKER_FLAGS SUPPORTS_AS_NEEDED "-Wl,--as-needed") diff --git a/buildlib/RDMA_EnableCStd.cmake b/buildlib/RDMA_EnableCStd.cmake index 3c42824f..c6bd6603 100644 --- a/buildlib/RDMA_EnableCStd.cmake +++ b/buildlib/RDMA_EnableCStd.cmake @@ -127,3 +127,10 @@ int main(int argc, char *argv[]) endif() set(${TO_VAR} "${HAVE_TARGET_SSE}" PARENT_SCOPE) endFunction() + +function(RDMA_Check_SVE TO_VAR) + RDMA_Check_C_Compiles(HAVE_TARGET_SVE "${SVE_CHECK_PROGRAM}") + + set(SVE_FLAGS "-march=armv8.2-a+sve" PARENT_SCOPE) + set(${TO_VAR} "${HAVE_TARGET_SVE}" PARENT_SCOPE) +endFunction() diff --git a/providers/hns/CMakeLists.txt b/providers/hns/CMakeLists.txt index 7aaca757..5c2bcf3b 100644 --- a/providers/hns/CMakeLists.txt +++ b/providers/hns/CMakeLists.txt @@ -5,3 +5,5 @@ rdma_provider(hns hns_roce_u_hw_v2.c hns_roce_u_verbs.c ) + +set_source_files_properties(hns_roce_u_hw_v2.c PROPERTIES COMPILE_FLAGS "${SVE_FLAGS}") diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c index 3a294968..bd457217 100644 --- a/providers/hns/hns_roce_u_hw_v2.c +++ b/providers/hns/hns_roce_u_hw_v2.c @@ -299,6 +299,11 @@ static void hns_roce_update_sq_db(struct hns_roce_context *ctx, hns_roce_write64(qp->sq.db_reg, (__le32 *)&sq_db); } +static void hns_roce_sve_write512(uint64_t *dest, uint64_t *val) +{ + mmio_memcpy_x64_sve(dest, val); +} + static void hns_roce_write512(uint64_t *dest, uint64_t *val) { mmio_memcpy_x64(dest, val, sizeof(struct hns_roce_rc_sq_wqe)); @@ -314,7 +319,10 @@ static void hns_roce_write_dwqe(struct hns_roce_qp *qp, void *wqe) hr_reg_write(rc_sq_wqe, RCWQE_DB_SL_H, qp->sl >> HNS_ROCE_SL_SHIFT); hr_reg_write(rc_sq_wqe, RCWQE_WQE_IDX, qp->sq.head); - hns_roce_write512(qp->sq.db_reg, wqe); + if (qp->flags & HNS_ROCE_QP_CAP_SVE_DIRECT_WQE) + hns_roce_sve_write512(qp->sq.db_reg, wqe); + else + hns_roce_write512(qp->sq.db_reg, wqe); } static void update_cq_db(struct hns_roce_context *ctx, struct hns_roce_cq *cq) diff --git a/util/mmio.h b/util/mmio.h index b60935c4..13fd2654 100644 --- a/util/mmio.h +++ b/util/mmio.h @@ -207,6 +207,17 @@ __le64 mmio_read64_le(const void *addr); /* This strictly guarantees the order of TLP generation for the memory copy to be in ascending address order. */ +#if defined(__aarch64__) || defined(__arm__) +static inline void mmio_memcpy_x64_sve(void *dest, const void *src) +{ + asm volatile( + "ldr z0, [%0]\n" + "str z0, [%1]\n" + ::"r" (val), "r"(dest):"cc", "memory" + ); +} +#endif + #if defined(__aarch64__) || defined(__arm__) #include <arm_neon.h> -- 2.30.0