[RFC PATCH 2/4] rseq: Allow extending struct rseq

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Add a __rseq_abi.flags "RSEQ_TLS_FLAG_SIZE", which indicates support for
extending struct rseq. This adds two new fields to struct rseq:
user_size and kernel_size.

The user_size field allows the size of the __rseq_abi definition (which
can be overridden by symbol interposition either by a preloaded library
or by the application) to be handed over to the kernel at registration.
This registration can be performed by a library, e.g. glibc, which does
not know there is interposition taking place.

The kernel_size is populated by the kernel when the "RSEQ_TLS_FLAG_SIZE"
flag is set in __rseq_abi.flags to the minimum between user_size and
the offset of the "end" field of struct rseq as known by the kernel.
This allows user-space to query which fields are effectively populated
by the kernel.

A rseq_size field is added to the task struct to keep track of the
"kernel_size" effective for each thread.

Signed-off-by: Mathieu Desnoyers <mathieu.desnoyers@xxxxxxxxxxxx>
---
 include/linux/sched.h     |  4 ++++
 include/uapi/linux/rseq.h | 37 ++++++++++++++++++++++++++++++++--
 kernel/rseq.c             | 42 +++++++++++++++++++++++++++++++++------
 3 files changed, 75 insertions(+), 8 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 692e327d7455..5d61a3197987 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1147,6 +1147,7 @@ struct task_struct {
 #ifdef CONFIG_RSEQ
 	struct rseq __user *rseq;
 	u32 rseq_sig;
+	u32 rseq_size;
 	/*
 	 * RmW on rseq_event_mask must be performed atomically
 	 * with respect to preemption.
@@ -1976,10 +1977,12 @@ static inline void rseq_fork(struct task_struct *t, unsigned long clone_flags)
 	if (clone_flags & CLONE_VM) {
 		t->rseq = NULL;
 		t->rseq_sig = 0;
+		t->rseq_size = 0;
 		t->rseq_event_mask = 0;
 	} else {
 		t->rseq = current->rseq;
 		t->rseq_sig = current->rseq_sig;
+		t->rseq_size = current->rseq_size;
 		t->rseq_event_mask = current->rseq_event_mask;
 	}
 }
@@ -1988,6 +1991,7 @@ static inline void rseq_execve(struct task_struct *t)
 {
 	t->rseq = NULL;
 	t->rseq_sig = 0;
+	t->rseq_size = 0;
 	t->rseq_event_mask = 0;
 }
 
diff --git a/include/uapi/linux/rseq.h b/include/uapi/linux/rseq.h
index e11d9df5e564..03c0b5e9a859 100644
--- a/include/uapi/linux/rseq.h
+++ b/include/uapi/linux/rseq.h
@@ -37,6 +37,15 @@ enum rseq_cs_flags {
 		(1U << RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE_BIT),
 };
 
+enum rseq_tls_flags_bit {
+	/* enum rseq_cs_flags reserves bits 0-2. */
+	RSEQ_TLS_FLAG_SIZE_BIT = 3,
+};
+
+enum rseq_tls_flags {
+	RSEQ_TLS_FLAG_SIZE = (1U << RSEQ_TLS_FLAG_SIZE_BIT),
+};
+
 /* The rseq_len expected by rseq registration is always 32 bytes. */
 enum rseq_len_expected {
 	RSEQ_LEN_EXPECTED = 32,
@@ -133,8 +142,9 @@ struct rseq {
 	 *
 	 * This field should only be updated by the thread which
 	 * registered this data structure. Read by the kernel.
-	 * Mainly used for single-stepping through rseq critical sections
-	 * with debuggers.
+	 *
+	 * The RSEQ_CS flags are mainly used for single-stepping through rseq
+	 * critical sections with debuggers.
 	 *
 	 * - RSEQ_CS_FLAG_NO_RESTART_ON_PREEMPT
 	 *     Inhibit instruction sequence block restart on preemption
@@ -145,8 +155,31 @@ struct rseq {
 	 * - RSEQ_CS_FLAG_NO_RESTART_ON_MIGRATE
 	 *     Inhibit instruction sequence block restart on migration for
 	 *     this thread.
+	 *
+	 * - RSEQ_TLS_FLAG_SIZE
+	 *     Extensible struct rseq ABI. This flag should be statically
+	 *     initialized.
 	 */
 	__u32 flags;
+	/*
+	 * With __rseq_abi.flags RSEQ_TLS_FLAG_SIZE set, user_size should be
+	 * statically initialized to offsetof(struct rseq, end).
+	 */
+	__u16 user_size;
+	/*
+	 * With __rseq_abi.flags RSEQ_TLS_FLAG_SIZE set, if the kernel supports
+	 * extensible struct rseq ABI, the kernel_size field is populated by
+	 * the kernel to the minimum between user_size and the offset of the
+	 * "end" field within the struct rseq supported by the kernel on
+	 * successful registration. Should be initialized to 0.
+	 */
+	__u16 kernel_size;
+
+	/*
+	 * Very last field of the structure, to calculate size excluding padding
+	 * with offsetof().
+	 */
+	char end[];
 } __attribute__((aligned(4 * sizeof(__u64))));
 
 #endif /* _UAPI_LINUX_RSEQ_H */
diff --git a/kernel/rseq.c b/kernel/rseq.c
index a4f86a9d6937..bbc57fc18573 100644
--- a/kernel/rseq.c
+++ b/kernel/rseq.c
@@ -96,6 +96,7 @@ static int rseq_update_cpu_id(struct task_struct *t)
 static int rseq_reset_rseq_cpu_id(struct task_struct *t)
 {
 	u32 cpu_id_start = 0, cpu_id = RSEQ_CPU_ID_UNINITIALIZED;
+	u16 kernel_size = 0;
 
 	/*
 	 * Reset cpu_id_start to its initial state (0).
@@ -109,6 +110,11 @@ static int rseq_reset_rseq_cpu_id(struct task_struct *t)
 	 */
 	if (put_user(cpu_id, &t->rseq->cpu_id))
 		return -EFAULT;
+	/*
+	 * Reset kernel_size to its initial state (0).
+	 */
+	if (put_user(kernel_size, &t->rseq->kernel_size))
+		return -EFAULT;
 	return 0;
 }
 
@@ -266,7 +272,7 @@ void __rseq_handle_notify_resume(struct ksignal *ksig, struct pt_regs *regs)
 
 	if (unlikely(t->flags & PF_EXITING))
 		return;
-	if (unlikely(!access_ok(t->rseq, sizeof(*t->rseq))))
+	if (unlikely(!access_ok(t->rseq, t->rseq_size)))
 		goto error;
 	ret = rseq_ip_fixup(regs);
 	if (unlikely(ret < 0))
@@ -294,7 +300,7 @@ void rseq_syscall(struct pt_regs *regs)
 
 	if (!t->rseq)
 		return;
-	if (!access_ok(t->rseq, sizeof(*t->rseq)) ||
+	if (!access_ok(t->rseq, t->rseq_size) ||
 	    rseq_get_rseq_cs(t, &rseq_cs) || in_rseq_cs(ip, &rseq_cs))
 		force_sig(SIGSEGV);
 }
@@ -308,6 +314,7 @@ SYSCALL_DEFINE4(rseq, struct rseq __user *, rseq, u32, rseq_len,
 		int, flags, u32, sig)
 {
 	int ret;
+	u32 tls_flags;
 
 	if (flags & RSEQ_FLAG_UNREGISTER) {
 		if (flags & ~RSEQ_FLAG_UNREGISTER)
@@ -315,7 +322,7 @@ SYSCALL_DEFINE4(rseq, struct rseq __user *, rseq, u32, rseq_len,
 		/* Unregister rseq for current thread. */
 		if (current->rseq != rseq || !current->rseq)
 			return -EINVAL;
-		if (rseq_len != sizeof(*rseq))
+		if (rseq_len != RSEQ_LEN_EXPECTED)
 			return -EINVAL;
 		if (current->rseq_sig != sig)
 			return -EPERM;
@@ -323,6 +330,7 @@ SYSCALL_DEFINE4(rseq, struct rseq __user *, rseq, u32, rseq_len,
 		if (ret)
 			return ret;
 		current->rseq = NULL;
+		current->rseq_size = 0;
 		current->rseq_sig = 0;
 		return 0;
 	}
@@ -336,7 +344,7 @@ SYSCALL_DEFINE4(rseq, struct rseq __user *, rseq, u32, rseq_len,
 		 * the provided address differs from the prior
 		 * one.
 		 */
-		if (current->rseq != rseq || rseq_len != sizeof(*rseq))
+		if (current->rseq != rseq || rseq_len != RSEQ_LEN_EXPECTED)
 			return -EINVAL;
 		if (current->rseq_sig != sig)
 			return -EPERM;
@@ -349,10 +357,32 @@ SYSCALL_DEFINE4(rseq, struct rseq __user *, rseq, u32, rseq_len,
 	 * ensure the provided rseq is properly aligned and valid.
 	 */
 	if (!IS_ALIGNED((unsigned long)rseq, __alignof__(*rseq)) ||
-	    rseq_len != sizeof(*rseq))
+	    rseq_len != RSEQ_LEN_EXPECTED)
 		return -EINVAL;
-	if (!access_ok(rseq, rseq_len))
+	if (!access_ok(rseq, RSEQ_LEN_EXPECTED))
 		return -EFAULT;
+
+	/* Handle extensible struct rseq ABI. */
+	ret = get_user(tls_flags, &rseq->flags);
+	if (ret)
+		return ret;
+	if (tls_flags & RSEQ_TLS_FLAG_SIZE) {
+		u16 user_size, kernel_size;
+
+		ret = get_user(user_size, &rseq->user_size);
+		if (ret)
+			return ret;
+		if (user_size < offsetof(struct rseq, kernel_size) + sizeof(u16))
+			return -EINVAL;
+		kernel_size = min_t(u16, user_size, offsetof(struct rseq, end));
+		ret = put_user(kernel_size, &rseq->kernel_size);
+		if (ret)
+			return ret;
+		current->rseq_size = kernel_size;
+	} else {
+		current->rseq_size = offsetof(struct rseq, user_size);
+	}
+
 	current->rseq = rseq;
 	current->rseq_sig = sig;
 	/*
-- 
2.17.1




[Index of Archives]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite News]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux