[RFC PATCH for-next 3/3] libhns: Add support for SVE Direct WQE function

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



From: Yixing Liu <liuyixing1@xxxxxxxxxx>

The newly added SVE Direct WQE function only supports sve ldr and str
instructions, this patch adds ldr and str assembly to achieve this function.

Signed-off-by: Yixing Liu <liuyixing1@xxxxxxxxxx>
---
 CMakeLists.txt                   |  2 ++
 buildlib/RDMA_EnableCStd.cmake   |  7 +++++++
 providers/hns/CMakeLists.txt     |  2 ++
 providers/hns/hns_roce_u_hw_v2.c | 10 +++++++++-
 util/mmio.h                      | 11 +++++++++++
 5 files changed, 31 insertions(+), 1 deletion(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 0cb68264..ee1024d5 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -417,6 +417,8 @@ endif()
 
 RDMA_Check_SSE(HAVE_TARGET_SSE)
 
+RDMA_Check_SVE(HAVE_TARGET_SVE)
+
 # Enable development support features
 # Prune unneeded shared libraries during linking
 RDMA_AddOptLDFlag(CMAKE_EXE_LINKER_FLAGS SUPPORTS_AS_NEEDED "-Wl,--as-needed")
diff --git a/buildlib/RDMA_EnableCStd.cmake b/buildlib/RDMA_EnableCStd.cmake
index 3c42824f..c6bd6603 100644
--- a/buildlib/RDMA_EnableCStd.cmake
+++ b/buildlib/RDMA_EnableCStd.cmake
@@ -127,3 +127,10 @@ int main(int argc, char *argv[])
   endif()
   set(${TO_VAR} "${HAVE_TARGET_SSE}" PARENT_SCOPE)
 endFunction()
+
+function(RDMA_Check_SVE TO_VAR)
+  RDMA_Check_C_Compiles(HAVE_TARGET_SVE "${SVE_CHECK_PROGRAM}")
+
+  set(SVE_FLAGS "-march=armv8.2-a+sve" PARENT_SCOPE)
+  set(${TO_VAR} "${HAVE_TARGET_SVE}" PARENT_SCOPE)
+endFunction()
diff --git a/providers/hns/CMakeLists.txt b/providers/hns/CMakeLists.txt
index 7aaca757..5c2bcf3b 100644
--- a/providers/hns/CMakeLists.txt
+++ b/providers/hns/CMakeLists.txt
@@ -5,3 +5,5 @@ rdma_provider(hns
   hns_roce_u_hw_v2.c
   hns_roce_u_verbs.c
 )
+
+set_source_files_properties(hns_roce_u_hw_v2.c PROPERTIES COMPILE_FLAGS "${SVE_FLAGS}")
diff --git a/providers/hns/hns_roce_u_hw_v2.c b/providers/hns/hns_roce_u_hw_v2.c
index 3a294968..bd457217 100644
--- a/providers/hns/hns_roce_u_hw_v2.c
+++ b/providers/hns/hns_roce_u_hw_v2.c
@@ -299,6 +299,11 @@ static void hns_roce_update_sq_db(struct hns_roce_context *ctx,
 	hns_roce_write64(qp->sq.db_reg, (__le32 *)&sq_db);
 }
 
+static void hns_roce_sve_write512(uint64_t *dest, uint64_t *val)
+{
+	mmio_memcpy_x64_sve(dest, val);
+}
+
 static void hns_roce_write512(uint64_t *dest, uint64_t *val)
 {
 	mmio_memcpy_x64(dest, val, sizeof(struct hns_roce_rc_sq_wqe));
@@ -314,7 +319,10 @@ static void hns_roce_write_dwqe(struct hns_roce_qp *qp, void *wqe)
 	hr_reg_write(rc_sq_wqe, RCWQE_DB_SL_H, qp->sl >> HNS_ROCE_SL_SHIFT);
 	hr_reg_write(rc_sq_wqe, RCWQE_WQE_IDX, qp->sq.head);
 
-	hns_roce_write512(qp->sq.db_reg, wqe);
+	if (qp->flags & HNS_ROCE_QP_CAP_SVE_DIRECT_WQE)
+		hns_roce_sve_write512(qp->sq.db_reg, wqe);
+	else
+		hns_roce_write512(qp->sq.db_reg, wqe);
 }
 
 static void update_cq_db(struct hns_roce_context *ctx, struct hns_roce_cq *cq)
diff --git a/util/mmio.h b/util/mmio.h
index b60935c4..13fd2654 100644
--- a/util/mmio.h
+++ b/util/mmio.h
@@ -207,6 +207,17 @@ __le64 mmio_read64_le(const void *addr);
 /* This strictly guarantees the order of TLP generation for the memory copy to
    be in ascending address order.
 */
+#if defined(__aarch64__) || defined(__arm__)
+static inline void mmio_memcpy_x64_sve(void *dest, const void *src)
+{
+	asm volatile(
+		"ldr z0, [%0]\n"
+		"str z0, [%1]\n"
+		::"r" (val), "r"(dest):"cc", "memory"
+	);
+}
+#endif
+
 #if defined(__aarch64__) || defined(__arm__)
 #include <arm_neon.h>
 
-- 
2.30.0




[Index of Archives]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Photo]     [Yosemite News]     [Yosemite Photos]     [Linux Kernel]     [Linux SCSI]     [XFree86]

  Powered by Linux