Test the NUMA node id extension rseq field. Compare it against the value returned by the getcpu(2) system call while pinned on a specific core. Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@xxxxxxxxxxxx> --- tools/testing/selftests/rseq/basic_test.c | 6 +++ tools/testing/selftests/rseq/rseq-abi.h | 51 +++++++++++++++++++++-- tools/testing/selftests/rseq/rseq.c | 37 ++++++++++++++-- tools/testing/selftests/rseq/rseq.h | 40 ++++++++++++++++++ 4 files changed, 127 insertions(+), 7 deletions(-) diff --git a/tools/testing/selftests/rseq/basic_test.c b/tools/testing/selftests/rseq/basic_test.c index d8efbfb89193..9601db964b72 100644 --- a/tools/testing/selftests/rseq/basic_test.c +++ b/tools/testing/selftests/rseq/basic_test.c @@ -22,6 +22,8 @@ void test_cpu_pointer(void) CPU_ZERO(&test_affinity); for (i = 0; i < CPU_SETSIZE; i++) { if (CPU_ISSET(i, &affinity)) { + int node; + CPU_SET(i, &test_affinity); sched_setaffinity(0, sizeof(test_affinity), &test_affinity); @@ -29,6 +31,10 @@ void test_cpu_pointer(void) assert(rseq_current_cpu() == i); assert(rseq_current_cpu_raw() == i); assert(rseq_cpu_start() == i); + node = rseq_fallback_current_node(); + assert(rseq_current_node() == node); + assert(rseq_current_node_raw() == node); + assert(rseq_node_start() == node); CPU_CLR(i, &test_affinity); } } diff --git a/tools/testing/selftests/rseq/rseq-abi.h b/tools/testing/selftests/rseq/rseq-abi.h index a8c44d9af71f..7aba1cc0990b 100644 --- a/tools/testing/selftests/rseq/rseq-abi.h +++ b/tools/testing/selftests/rseq/rseq-abi.h @@ -13,9 +13,9 @@ #include <linux/types.h> #include <asm/byteorder.h> -enum rseq_abi_cpu_id_state { - RSEQ_ABI_CPU_ID_UNINITIALIZED = -1, - RSEQ_ABI_CPU_ID_REGISTRATION_FAILED = -2, +enum rseq_abi_id_state { + RSEQ_ABI_ID_UNINITIALIZED = -1, + RSEQ_ABI_ID_REGISTRATION_FAILED = -2, }; enum rseq_abi_flags { @@ -146,6 +146,51 @@ struct rseq_abi { * this thread. */ __u32 flags; + __u32 padding1[3]; + + /* + * This is the end of the original rseq ABI. + * This is a valid end of rseq ABI for the purpose of rseq registration + * rseq_len. + * The original rseq ABI use "sizeof(struct rseq)" on registration, + * thus requiring the padding above. + */ + + /* + * Restartable sequences node_id_start field. Updated by the + * kernel. Read by user-space with single-copy atomicity + * semantics. This field should only be read by the thread which + * registered this data structure. Aligned on 32-bit. Always + * contains a value in the range of possible NUMA node IDs, although the + * value may not be the actual current NUMA node ID (e.g. if rseq is not + * initialized). This NUMA node ID number value should always be compared + * against the value of the node_id field before performing a rseq + * commit or returning a value read from a data structure indexed using + * the node_id_start value. + */ + __u32 node_id_start; + + /* + * Restartable sequences node_id field. Updated by the kernel. + * Read by user-space with single-copy atomicity semantics. This + * field should only be read by the thread which registered this + * data structure. Aligned on 32-bit. Values + * RSEQ_ID_UNINITIALIZED and RSEQ_ID_REGISTRATION_FAILED + * have a special semantic: the former means "rseq uninitialized", + * and latter means "rseq initialization failed". This value is + * meant to be read within rseq critical sections and compared + * with the node_id_start value previously read, before performing + * the commit instruction, or read and compared with the + * node_id_start value before returning a value loaded from a data + * structure indexed using the node_id_start value. + */ + __u32 node_id; + + /* + * This is a valid end of rseq ABI for the purpose of rseq registration + * rseq_len. Use the offset immediately after the node_id field as + * rseq_len. + */ } __attribute__((aligned(4 * sizeof(__u64)))); #endif /* _RSEQ_ABI_H */ diff --git a/tools/testing/selftests/rseq/rseq.c b/tools/testing/selftests/rseq/rseq.c index 07ba0d463a96..99b5c3b71ef0 100644 --- a/tools/testing/selftests/rseq/rseq.c +++ b/tools/testing/selftests/rseq/rseq.c @@ -27,10 +27,20 @@ #include <signal.h> #include <limits.h> #include <dlfcn.h> +#include <stddef.h> #include "../kselftest.h" #include "rseq.h" +#ifndef sizeof_field +#define sizeof_field(TYPE, MEMBER) sizeof((((TYPE *)0)->MEMBER)) +#endif + +#ifndef offsetofend +#define offsetofend(TYPE, MEMBER) \ + (offsetof(TYPE, MEMBER) + sizeof_field(TYPE, MEMBER)) +#endif + static const int *libc_rseq_offset_p; static const unsigned int *libc_rseq_size_p; static const unsigned int *libc_rseq_flags_p; @@ -49,7 +59,8 @@ static int rseq_ownership; static __thread struct rseq_abi __rseq_abi __attribute__((tls_model("initial-exec"))) = { - .cpu_id = RSEQ_ABI_CPU_ID_UNINITIALIZED, + .cpu_id = RSEQ_ABI_ID_UNINITIALIZED, + .node_id = RSEQ_ABI_ID_UNINITIALIZED, }; static int sys_rseq(struct rseq_abi *rseq_abi, uint32_t rseq_len, @@ -58,6 +69,11 @@ static int sys_rseq(struct rseq_abi *rseq_abi, uint32_t rseq_len, return syscall(__NR_rseq, rseq_abi, rseq_len, flags, sig); } +static int sys_getcpu(unsigned *cpu, unsigned *node) +{ + return syscall(__NR_getcpu, cpu, node, NULL); +} + int rseq_available(void) { int rc; @@ -83,7 +99,7 @@ int rseq_register_current_thread(void) /* Treat libc's ownership as a successful registration. */ return 0; } - rc = sys_rseq(&__rseq_abi, sizeof(struct rseq_abi), 0, RSEQ_SIG); + rc = sys_rseq(&__rseq_abi, offsetofend(struct rseq_abi, node_id), 0, RSEQ_SIG); if (rc) return -1; assert(rseq_current_cpu_raw() >= 0); @@ -98,7 +114,7 @@ int rseq_unregister_current_thread(void) /* Treat libc's ownership as a successful unregistration. */ return 0; } - rc = sys_rseq(&__rseq_abi, sizeof(struct rseq_abi), RSEQ_ABI_FLAG_UNREGISTER, RSEQ_SIG); + rc = sys_rseq(&__rseq_abi, offsetofend(struct rseq_abi, node_id), RSEQ_ABI_FLAG_UNREGISTER, RSEQ_SIG); if (rc) return -1; return 0; @@ -121,7 +137,7 @@ void rseq_init(void) return; rseq_ownership = 1; rseq_offset = (void *)&__rseq_abi - rseq_thread_pointer(); - rseq_size = sizeof(struct rseq_abi); + rseq_size = offsetofend(struct rseq_abi, node_id); rseq_flags = 0; } @@ -146,3 +162,16 @@ int32_t rseq_fallback_current_cpu(void) } return cpu; } + +int32_t rseq_fallback_current_node(void) +{ + uint32_t cpu_id, node_id; + int ret; + + ret = sys_getcpu(&cpu_id, &node_id); + if (ret) { + perror("sys_getcpu()"); + return ret; + } + return (int32_t) node_id; +} diff --git a/tools/testing/selftests/rseq/rseq.h b/tools/testing/selftests/rseq/rseq.h index 6bd0ac466b4a..6fccc87f9025 100644 --- a/tools/testing/selftests/rseq/rseq.h +++ b/tools/testing/selftests/rseq/rseq.h @@ -115,6 +115,11 @@ int rseq_unregister_current_thread(void); */ int32_t rseq_fallback_current_cpu(void); +/* + * Restartable sequence fallback for reading the current node number. + */ +int32_t rseq_fallback_current_node(void); + /* * Values returned can be either the current CPU number, -1 (rseq is * uninitialized), or -2 (rseq initialization has failed). @@ -124,6 +129,15 @@ static inline int32_t rseq_current_cpu_raw(void) return RSEQ_ACCESS_ONCE(rseq_get_abi()->cpu_id); } +/* + * Values returned can be either the current NUMA node number, -1 (rseq is + * uninitialized), or -2 (rseq initialization has failed). + */ +static inline int32_t rseq_current_node_raw(void) +{ + return RSEQ_ACCESS_ONCE(rseq_get_abi()->node_id); +} + /* * Returns a possible CPU number, which is typically the current CPU. * The returned CPU number can be used to prepare for an rseq critical @@ -140,6 +154,22 @@ static inline uint32_t rseq_cpu_start(void) return RSEQ_ACCESS_ONCE(rseq_get_abi()->cpu_id_start); } +/* + * Returns a possible NUMA node number, which is typically the current NUMA + * node. The returned NUMA node number can be used to prepare for an rseq + * critical section, which will confirm whether the NUMA node number is indeed + * the current one, and whether rseq is initialized. + * + * The NUMA node number returned by rseq_node_start should always be validated + * by passing it to a rseq asm sequence, or by comparing it to the return value + * of rseq_current_node_raw() if the rseq asm sequence does not need to be + * invoked. + */ +static inline uint32_t rseq_node_start(void) +{ + return RSEQ_ACCESS_ONCE(rseq_get_abi()->node_id_start); +} + static inline uint32_t rseq_current_cpu(void) { int32_t cpu; @@ -150,6 +180,16 @@ static inline uint32_t rseq_current_cpu(void) return cpu; } +static inline uint32_t rseq_current_node(void) +{ + int32_t node; + + node = rseq_current_node_raw(); + if (rseq_unlikely(node < 0)) + node = rseq_fallback_current_node(); + return node; +} + static inline void rseq_clear_rseq_cs(void) { RSEQ_WRITE_ONCE(rseq_get_abi()->rseq_cs.arch.ptr, 0); -- 2.17.1