Patch: Enable 'kernel read' on reiser4 sfrn4/5 for Linux 5.10.20-23 (at least)

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Niltze-

The attached patch represent an ephemeral hack which has allowed me to
build functional reiser4 -enabled Linux v5.10.20 - v5.10.23 kernels.

The currently testing Debian Linux 5.10.15-2 cloud flavour kernel has
been humming along nicely for thirty days already in Google Compute
Engine (GCE) / AMD Epyc region, hence 'reizer4' label.

< https://metztli.it/buster/cloud-5.10.15-2+reizer4.0.2.png >

I will be upgrading that image to Debian Linux v5.10.20-2 soon -- as I
have the non-cloud kernel working locally in Debian Buster backports
GCC8 for AMD64. Additionally, I have the upcoming Debian Bullseye GCC10
with a Linux 5.10.23-2 kernel running without issues.

None of the above would be possible without the attached 'kernel read'
patch and I would have fallen far behind Debian kernel building --
which thus far I have managed to stay at least one version ahead of the
official releases for Buster backports and upcoming Bullseye versions.

Attached, please also find a patch to reiser4 -enable GNU Parted 3.4
*probe only*. This parted version is already being used in upcoming
Debian Bullseye and I decided to backport it for Buster -- as can be
discerned in the image referenced a priori.

Finally, I have also patched, attached as well, reiser4progs which
fixes the UUID null issue which Ed resolved this past weekend. I made
the judgement (aided by autoconf) that the micro number should be
increased by a unit and it is reflected in the image referenced a
priori, as well.

Of course, the call to increase -- or not -- the micro number is not
mine to make. However, I wanted to avoid any confusion as I use the
special reiser4progs v1.2.2 UDEBs for my custom Debian Installer (d-i).
And indeed, I had noticed some null UUIDs now and then during my past
Debian reiser4 installations -- when I was struggling to come up with
my reiser4 custom (d-i) ; notwithstanding I was at a loss as to what
variable was at fault.

Personally, it was not too bad, though, because when the UUID string
was null I would use the tiny xvi and/or elvis-tiny, vi-clone editors,
integrated in my custom d-i to fix the string after a re-formating of
the target partition.


Best Professional Regards.
-- 
-- 
Jose R R
http://metztli.it
-----------------------------------------------------------------------
----------------------
Download Metztli Reiser4: Debian Buster w/ Linux 5.9.16 AMD64
-----------------------------------------------------------------------
----------------------
feats ZSTD compression https://sf.net/projects/metztli-reiser4/
-----------------------------------------------------------------------
----------------------
or SFRN 5.1.3, Metztli Reiser5 https://sf.net/projects/debian-reiser4/
-----------------------------------------------------------------------
--------------------
Official current Reiser4 resources: https://reiser4.wiki.kernel.org/

From f5f30b4772ab0fcd30edb145f3c70262a6454eb6 Mon Sep 17 00:00:00 2001
From: Metztli Information Technology <jose@xxxxxxxxxx>
Date: Fri, 5 Mar 2021 03:17:57 -0800
Subject: [PATCH] Ic ce (First) commit on 5.10.20 manually modifying
 fs/proc/proc_sysctl.c after 'kernel read' patch applied

---
 arch/Kconfig                            |   3 -
 arch/x86/ia32/ia32_aout.c               |   1 +
 arch/x86/include/asm/page_32_types.h    |  11 --
 arch/x86/include/asm/page_64_types.h    |  38 -------
 arch/x86/include/asm/processor.h        |  60 ++++++++++-
 arch/x86/include/asm/thread_info.h      |   2 +
 arch/x86/include/asm/uaccess.h          |  26 ++++-
 arch/x86/kernel/asm-offsets.c           |   3 +
 arch/x86/lib/getuser.S                  |  47 +++++----
 arch/x86/lib/putuser.S                  |  25 +++--
 drivers/misc/lkdtm/bugs.c               |  10 ++
 drivers/misc/lkdtm/core.c               |   2 +
 drivers/misc/lkdtm/lkdtm.h              |   2 +
 drivers/misc/lkdtm/usercopy.c           |  15 +++
 fs/proc/cpuinfo.c                       |   2 +-
 fs/proc/generic.c                       |   4 +-
 fs/proc/inode.c                         | 124 ++++++++--------------
 fs/proc/proc_sysctl.c                   |  48 ++++-----
 fs/proc/stat.c                          |   2 +-
 fs/read_write.c                         |  71 +++++--------
 fs/splice.c                             | 130 +++++++++++++++++++++---
 include/linux/bpf-cgroup.h              |   2 +-
 include/linux/fs.h                      |   2 +
 include/linux/proc_fs.h                 |   1 -
 include/linux/uaccess.h                 |  22 +---
 kernel/bpf/cgroup.c                     |   2 +-
 lib/test_bitmap.c                       |  91 +++++++++++++----
 tools/testing/selftests/lkdtm/tests.txt |   2 +
 28 files changed, 442 insertions(+), 306 deletions(-)

diff --git a/arch/Kconfig b/arch/Kconfig
index 69fe7133c765..f8769e61757b 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -24,9 +24,6 @@ config KEXEC_ELF
 config HAVE_IMA_KEXEC
 	bool
 
-config SET_FS
-	bool
-
 config HOTPLUG_SMT
 	bool
 
diff --git a/arch/x86/ia32/ia32_aout.c b/arch/x86/ia32/ia32_aout.c
index a09fc37ead9d..ca8a657edf59 100644
--- a/arch/x86/ia32/ia32_aout.c
+++ b/arch/x86/ia32/ia32_aout.c
@@ -239,6 +239,7 @@ static int load_aout_binary(struct linux_binprm *bprm)
 	(regs)->ss = __USER32_DS;
 	regs->r8 = regs->r9 = regs->r10 = regs->r11 =
 	regs->r12 = regs->r13 = regs->r14 = regs->r15 = 0;
+	set_fs(USER_DS);
 	return 0;
 }
 
diff --git a/arch/x86/include/asm/page_32_types.h b/arch/x86/include/asm/page_32_types.h
index f462895a33e4..565ad755c785 100644
--- a/arch/x86/include/asm/page_32_types.h
+++ b/arch/x86/include/asm/page_32_types.h
@@ -41,17 +41,6 @@
 #define __VIRTUAL_MASK_SHIFT	32
 #endif	/* CONFIG_X86_PAE */
 
-/*
- * User space process size: 3GB (default).
- */
-#define IA32_PAGE_OFFSET	__PAGE_OFFSET
-#define TASK_SIZE		__PAGE_OFFSET
-#define TASK_SIZE_LOW		TASK_SIZE
-#define TASK_SIZE_MAX		TASK_SIZE
-#define DEFAULT_MAP_WINDOW	TASK_SIZE
-#define STACK_TOP		TASK_SIZE
-#define STACK_TOP_MAX		STACK_TOP
-
 /*
  * Kernel image size is limited to 512 MB (see in arch/x86/kernel/head_32.S)
  */
diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h
index 3f49dac03617..d0c6c10c18a0 100644
--- a/arch/x86/include/asm/page_64_types.h
+++ b/arch/x86/include/asm/page_64_types.h
@@ -59,44 +59,6 @@
 #define __VIRTUAL_MASK_SHIFT	47
 #endif
 
-/*
- * User space process size.  This is the first address outside the user range.
- * There are a few constraints that determine this:
- *
- * On Intel CPUs, if a SYSCALL instruction is at the highest canonical
- * address, then that syscall will enter the kernel with a
- * non-canonical return address, and SYSRET will explode dangerously.
- * We avoid this particular problem by preventing anything executable
- * from being mapped at the maximum canonical address.
- *
- * On AMD CPUs in the Ryzen family, there's a nasty bug in which the
- * CPUs malfunction if they execute code from the highest canonical page.
- * They'll speculate right off the end of the canonical space, and
- * bad things happen.  This is worked around in the same way as the
- * Intel problem.
- *
- * With page table isolation enabled, we map the LDT in ... [stay tuned]
- */
-#define TASK_SIZE_MAX	((_AC(1,UL) << __VIRTUAL_MASK_SHIFT) - PAGE_SIZE)
-
-#define DEFAULT_MAP_WINDOW	((1UL << 47) - PAGE_SIZE)
-
-/* This decides where the kernel will search for a free chunk of vm
- * space during mmap's.
- */
-#define IA32_PAGE_OFFSET	((current->personality & ADDR_LIMIT_3GB) ? \
-					0xc0000000 : 0xFFFFe000)
-
-#define TASK_SIZE_LOW		(test_thread_flag(TIF_ADDR32) ? \
-					IA32_PAGE_OFFSET : DEFAULT_MAP_WINDOW)
-#define TASK_SIZE		(test_thread_flag(TIF_ADDR32) ? \
-					IA32_PAGE_OFFSET : TASK_SIZE_MAX)
-#define TASK_SIZE_OF(child)	((test_tsk_thread_flag(child, TIF_ADDR32)) ? \
-					IA32_PAGE_OFFSET : TASK_SIZE_MAX)
-
-#define STACK_TOP		TASK_SIZE_LOW
-#define STACK_TOP_MAX		TASK_SIZE_MAX
-
 /*
  * Maximum kernel image size is limited to 1 GiB, due to the fixmap living
  * in the next 1 GiB (see level2_kernel_pgt in arch/x86/kernel/head_64.S).
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 82a08b585818..5ac507586769 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -482,6 +482,10 @@ extern unsigned int fpu_user_xstate_size;
 
 struct perf_event;
 
+typedef struct {
+	unsigned long		seg;
+} mm_segment_t;
+
 struct thread_struct {
 	/* Cached TLS descriptors: */
 	struct desc_struct	tls_array[GDT_ENTRY_TLS_ENTRIES];
@@ -534,6 +538,8 @@ struct thread_struct {
 	 */
 	unsigned long		iopl_emul;
 
+	mm_segment_t		addr_limit;
+
 	unsigned int		sig_on_uaccess_err:1;
 
 	/* Floating point and extended processor state */
@@ -777,15 +783,67 @@ static inline void spin_lock_prefetch(const void *x)
 })
 
 #ifdef CONFIG_X86_32
+/*
+ * User space process size: 3GB (default).
+ */
+#define IA32_PAGE_OFFSET	PAGE_OFFSET
+#define TASK_SIZE		PAGE_OFFSET
+#define TASK_SIZE_LOW		TASK_SIZE
+#define TASK_SIZE_MAX		TASK_SIZE
+#define DEFAULT_MAP_WINDOW	TASK_SIZE
+#define STACK_TOP		TASK_SIZE
+#define STACK_TOP_MAX		STACK_TOP
+
 #define INIT_THREAD  {							  \
 	.sp0			= TOP_OF_INIT_STACK,			  \
 	.sysenter_cs		= __KERNEL_CS,				  \
+	.addr_limit		= KERNEL_DS,				  \
 }
 
 #define KSTK_ESP(task)		(task_pt_regs(task)->sp)
 
 #else
-#define INIT_THREAD { }
+/*
+ * User space process size.  This is the first address outside the user range.
+ * There are a few constraints that determine this:
+ *
+ * On Intel CPUs, if a SYSCALL instruction is at the highest canonical
+ * address, then that syscall will enter the kernel with a
+ * non-canonical return address, and SYSRET will explode dangerously.
+ * We avoid this particular problem by preventing anything executable
+ * from being mapped at the maximum canonical address.
+ *
+ * On AMD CPUs in the Ryzen family, there's a nasty bug in which the
+ * CPUs malfunction if they execute code from the highest canonical page.
+ * They'll speculate right off the end of the canonical space, and
+ * bad things happen.  This is worked around in the same way as the
+ * Intel problem.
+ *
+ * With page table isolation enabled, we map the LDT in ... [stay tuned]
+ */
+#define TASK_SIZE_MAX	((1UL << __VIRTUAL_MASK_SHIFT) - PAGE_SIZE)
+
+#define DEFAULT_MAP_WINDOW	((1UL << 47) - PAGE_SIZE)
+
+/* This decides where the kernel will search for a free chunk of vm
+ * space during mmap's.
+ */
+#define IA32_PAGE_OFFSET	((current->personality & ADDR_LIMIT_3GB) ? \
+					0xc0000000 : 0xFFFFe000)
+
+#define TASK_SIZE_LOW		(test_thread_flag(TIF_ADDR32) ? \
+					IA32_PAGE_OFFSET : DEFAULT_MAP_WINDOW)
+#define TASK_SIZE		(test_thread_flag(TIF_ADDR32) ? \
+					IA32_PAGE_OFFSET : TASK_SIZE_MAX)
+#define TASK_SIZE_OF(child)	((test_tsk_thread_flag(child, TIF_ADDR32)) ? \
+					IA32_PAGE_OFFSET : TASK_SIZE_MAX)
+
+#define STACK_TOP		TASK_SIZE_LOW
+#define STACK_TOP_MAX		TASK_SIZE_MAX
+
+#define INIT_THREAD  {						\
+	.addr_limit		= KERNEL_DS,			\
+}
 
 extern unsigned long KSTK_ESP(struct task_struct *task);
 
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index 44733a4bfc42..267701ae3d86 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -102,6 +102,7 @@ struct thread_info {
 #define TIF_SYSCALL_TRACEPOINT	28	/* syscall tracepoint instrumentation */
 #define TIF_ADDR32		29	/* 32-bit address space on 64 bits */
 #define TIF_X32			30	/* 32-bit native x86-64 binary */
+#define TIF_FSCHECK		31	/* Check FS is USER_DS on return */
 
 #define _TIF_SYSCALL_TRACE	(1 << TIF_SYSCALL_TRACE)
 #define _TIF_NOTIFY_RESUME	(1 << TIF_NOTIFY_RESUME)
@@ -130,6 +131,7 @@ struct thread_info {
 #define _TIF_SYSCALL_TRACEPOINT	(1 << TIF_SYSCALL_TRACEPOINT)
 #define _TIF_ADDR32		(1 << TIF_ADDR32)
 #define _TIF_X32		(1 << TIF_X32)
+#define _TIF_FSCHECK		(1 << TIF_FSCHECK)
 
 /* flags to check in __switch_to() */
 #define _TIF_WORK_CTXSW_BASE					\
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index c9fa7be3df82..a2c937245753 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -12,6 +12,30 @@
 #include <asm/smap.h>
 #include <asm/extable.h>
 
+/*
+ * The fs value determines whether argument validity checking should be
+ * performed or not.  If get_fs() == USER_DS, checking is performed, with
+ * get_fs() == KERNEL_DS, checking is bypassed.
+ *
+ * For historical reasons, these macros are grossly misnamed.
+ */
+
+#define MAKE_MM_SEG(s)	((mm_segment_t) { (s) })
+
+#define KERNEL_DS	MAKE_MM_SEG(-1UL)
+#define USER_DS 	MAKE_MM_SEG(TASK_SIZE_MAX)
+
+#define get_fs()	(current->thread.addr_limit)
+static inline void set_fs(mm_segment_t fs)
+{
+	current->thread.addr_limit = fs;
+	/* On user-mode return, check fs is correct */
+	set_thread_flag(TIF_FSCHECK);
+}
+
+#define uaccess_kernel() (get_fs().seg == KERNEL_DS.seg)
+#define user_addr_max() (current->thread.addr_limit.seg)
+
 /*
  * Test whether a block of memory is a valid user space address.
  * Returns 0 if the range is valid, nonzero otherwise.
@@ -69,7 +93,7 @@ static inline bool pagefault_disabled(void);
 #define access_ok(addr, size)					\
 ({									\
 	WARN_ON_IN_IRQ();						\
-	likely(!__range_not_ok(addr, size, TASK_SIZE_MAX));		\
+	likely(!__range_not_ok(addr, size, user_addr_max()));		\
 })
 
 extern int __get_user_1(void);
diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c
index 70b7154f4bdd..3ca07ad552ae 100644
--- a/arch/x86/kernel/asm-offsets.c
+++ b/arch/x86/kernel/asm-offsets.c
@@ -37,6 +37,9 @@ static void __used common(void)
 	OFFSET(TASK_stack_canary, task_struct, stack_canary);
 #endif
 
+	BLANK();
+	OFFSET(TASK_addr_limit, task_struct, thread.addr_limit);
+
 	BLANK();
 	OFFSET(crypto_tfm_ctx_offset, crypto_tfm, __crt_ctx);
 
diff --git a/arch/x86/lib/getuser.S b/arch/x86/lib/getuser.S
index fa1bc2104b32..2cd902e06062 100644
--- a/arch/x86/lib/getuser.S
+++ b/arch/x86/lib/getuser.S
@@ -37,19 +37,10 @@
 
 #define ASM_BARRIER_NOSPEC ALTERNATIVE "", "lfence", X86_FEATURE_LFENCE_RDTSC
 
-#ifdef CONFIG_X86_5LEVEL
-#define LOAD_TASK_SIZE_MINUS_N(n) \
-	ALTERNATIVE __stringify(mov $((1 << 47) - 4096 - (n)),%rdx), \
-		    __stringify(mov $((1 << 56) - 4096 - (n)),%rdx), X86_FEATURE_LA57
-#else
-#define LOAD_TASK_SIZE_MINUS_N(n) \
-	mov $(TASK_SIZE_MAX - (n)),%_ASM_DX
-#endif
-
 	.text
 SYM_FUNC_START(__get_user_1)
-	LOAD_TASK_SIZE_MINUS_N(0)
-	cmp %_ASM_DX,%_ASM_AX
+	mov PER_CPU_VAR(current_task), %_ASM_DX
+	cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX
 	jae bad_get_user
 	sbb %_ASM_DX, %_ASM_DX		/* array_index_mask_nospec() */
 	and %_ASM_DX, %_ASM_AX
@@ -62,13 +53,15 @@ SYM_FUNC_END(__get_user_1)
 EXPORT_SYMBOL(__get_user_1)
 
 SYM_FUNC_START(__get_user_2)
-	LOAD_TASK_SIZE_MINUS_N(1)
-	cmp %_ASM_DX,%_ASM_AX
+	add $1,%_ASM_AX
+	jc bad_get_user
+	mov PER_CPU_VAR(current_task), %_ASM_DX
+	cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX
 	jae bad_get_user
 	sbb %_ASM_DX, %_ASM_DX		/* array_index_mask_nospec() */
 	and %_ASM_DX, %_ASM_AX
 	ASM_STAC
-2:	movzwl (%_ASM_AX),%edx
+2:	movzwl -1(%_ASM_AX),%edx
 	xor %eax,%eax
 	ASM_CLAC
 	ret
@@ -76,13 +69,15 @@ SYM_FUNC_END(__get_user_2)
 EXPORT_SYMBOL(__get_user_2)
 
 SYM_FUNC_START(__get_user_4)
-	LOAD_TASK_SIZE_MINUS_N(3)
-	cmp %_ASM_DX,%_ASM_AX
+	add $3,%_ASM_AX
+	jc bad_get_user
+	mov PER_CPU_VAR(current_task), %_ASM_DX
+	cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX
 	jae bad_get_user
 	sbb %_ASM_DX, %_ASM_DX		/* array_index_mask_nospec() */
 	and %_ASM_DX, %_ASM_AX
 	ASM_STAC
-3:	movl (%_ASM_AX),%edx
+3:	movl -3(%_ASM_AX),%edx
 	xor %eax,%eax
 	ASM_CLAC
 	ret
@@ -91,25 +86,29 @@ EXPORT_SYMBOL(__get_user_4)
 
 SYM_FUNC_START(__get_user_8)
 #ifdef CONFIG_X86_64
-	LOAD_TASK_SIZE_MINUS_N(7)
-	cmp %_ASM_DX,%_ASM_AX
+	add $7,%_ASM_AX
+	jc bad_get_user
+	mov PER_CPU_VAR(current_task), %_ASM_DX
+	cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX
 	jae bad_get_user
 	sbb %_ASM_DX, %_ASM_DX		/* array_index_mask_nospec() */
 	and %_ASM_DX, %_ASM_AX
 	ASM_STAC
-4:	movq (%_ASM_AX),%rdx
+4:	movq -7(%_ASM_AX),%rdx
 	xor %eax,%eax
 	ASM_CLAC
 	ret
 #else
-	LOAD_TASK_SIZE_MINUS_N(7)
-	cmp %_ASM_DX,%_ASM_AX
+	add $7,%_ASM_AX
+	jc bad_get_user_8
+	mov PER_CPU_VAR(current_task), %_ASM_DX
+	cmp TASK_addr_limit(%_ASM_DX),%_ASM_AX
 	jae bad_get_user_8
 	sbb %_ASM_DX, %_ASM_DX		/* array_index_mask_nospec() */
 	and %_ASM_DX, %_ASM_AX
 	ASM_STAC
-4:	movl (%_ASM_AX),%edx
-5:	movl 4(%_ASM_AX),%ecx
+4:	movl -7(%_ASM_AX),%edx
+5:	movl -3(%_ASM_AX),%ecx
 	xor %eax,%eax
 	ASM_CLAC
 	ret
diff --git a/arch/x86/lib/putuser.S b/arch/x86/lib/putuser.S
index 0ea344c5ea43..b34a17763f28 100644
--- a/arch/x86/lib/putuser.S
+++ b/arch/x86/lib/putuser.S
@@ -33,19 +33,12 @@
  * as they get called from within inline assembly.
  */
 
-#ifdef CONFIG_X86_5LEVEL
-#define LOAD_TASK_SIZE_MINUS_N(n) \
-	ALTERNATIVE __stringify(mov $((1 << 47) - 4096 - (n)),%rbx), \
-		    __stringify(mov $((1 << 56) - 4096 - (n)),%rbx), X86_FEATURE_LA57
-#else
-#define LOAD_TASK_SIZE_MINUS_N(n) \
-	mov $(TASK_SIZE_MAX - (n)),%_ASM_BX
-#endif
+#define ENTER	mov PER_CPU_VAR(current_task), %_ASM_BX
 
 .text
 SYM_FUNC_START(__put_user_1)
-	LOAD_TASK_SIZE_MINUS_N(0)
-	cmp %_ASM_BX,%_ASM_CX
+	ENTER
+	cmp TASK_addr_limit(%_ASM_BX),%_ASM_CX
 	jae .Lbad_put_user
 SYM_INNER_LABEL(__put_user_nocheck_1, SYM_L_GLOBAL)
 	ASM_STAC
@@ -58,7 +51,9 @@ EXPORT_SYMBOL(__put_user_1)
 EXPORT_SYMBOL(__put_user_nocheck_1)
 
 SYM_FUNC_START(__put_user_2)
-	LOAD_TASK_SIZE_MINUS_N(1)
+	ENTER
+	mov TASK_addr_limit(%_ASM_BX),%_ASM_BX
+	sub $1,%_ASM_BX
 	cmp %_ASM_BX,%_ASM_CX
 	jae .Lbad_put_user
 SYM_INNER_LABEL(__put_user_nocheck_2, SYM_L_GLOBAL)
@@ -72,7 +67,9 @@ EXPORT_SYMBOL(__put_user_2)
 EXPORT_SYMBOL(__put_user_nocheck_2)
 
 SYM_FUNC_START(__put_user_4)
-	LOAD_TASK_SIZE_MINUS_N(3)
+	ENTER
+	mov TASK_addr_limit(%_ASM_BX),%_ASM_BX
+	sub $3,%_ASM_BX
 	cmp %_ASM_BX,%_ASM_CX
 	jae .Lbad_put_user
 SYM_INNER_LABEL(__put_user_nocheck_4, SYM_L_GLOBAL)
@@ -86,7 +83,9 @@ EXPORT_SYMBOL(__put_user_4)
 EXPORT_SYMBOL(__put_user_nocheck_4)
 
 SYM_FUNC_START(__put_user_8)
-	LOAD_TASK_SIZE_MINUS_N(7)
+	ENTER
+	mov TASK_addr_limit(%_ASM_BX),%_ASM_BX
+	sub $7,%_ASM_BX
 	cmp %_ASM_BX,%_ASM_CX
 	jae .Lbad_put_user
 SYM_INNER_LABEL(__put_user_nocheck_8, SYM_L_GLOBAL)
diff --git a/drivers/misc/lkdtm/bugs.c b/drivers/misc/lkdtm/bugs.c
index a0675d4154d2..4dfbfd51bdf7 100644
--- a/drivers/misc/lkdtm/bugs.c
+++ b/drivers/misc/lkdtm/bugs.c
@@ -312,6 +312,16 @@ void lkdtm_CORRUPT_LIST_DEL(void)
 		pr_err("list_del() corruption not detected!\n");
 }
 
+/* Test if unbalanced set_fs(KERNEL_DS)/set_fs(USER_DS) check exists. */
+void lkdtm_CORRUPT_USER_DS(void)
+{
+	pr_info("setting bad task size limit\n");
+	set_fs(KERNEL_DS);
+
+	/* Make sure we do not keep running with a KERNEL_DS! */
+	force_sig(SIGKILL);
+}
+
 /* Test that VMAP_STACK is actually allocating with a leading guard page */
 void lkdtm_STACK_GUARD_PAGE_LEADING(void)
 {
diff --git a/drivers/misc/lkdtm/core.c b/drivers/misc/lkdtm/core.c
index 97803f213d9d..a5e344df9166 100644
--- a/drivers/misc/lkdtm/core.c
+++ b/drivers/misc/lkdtm/core.c
@@ -112,6 +112,7 @@ static const struct crashtype crashtypes[] = {
 	CRASHTYPE(CORRUPT_STACK_STRONG),
 	CRASHTYPE(CORRUPT_LIST_ADD),
 	CRASHTYPE(CORRUPT_LIST_DEL),
+	CRASHTYPE(CORRUPT_USER_DS),
 	CRASHTYPE(STACK_GUARD_PAGE_LEADING),
 	CRASHTYPE(STACK_GUARD_PAGE_TRAILING),
 	CRASHTYPE(UNSET_SMEP),
@@ -171,6 +172,7 @@ static const struct crashtype crashtypes[] = {
 	CRASHTYPE(USERCOPY_STACK_FRAME_FROM),
 	CRASHTYPE(USERCOPY_STACK_BEYOND),
 	CRASHTYPE(USERCOPY_KERNEL),
+	CRASHTYPE(USERCOPY_KERNEL_DS),
 	CRASHTYPE(STACKLEAK_ERASING),
 	CRASHTYPE(CFI_FORWARD_PROTO),
 #ifdef CONFIG_X86_32
diff --git a/drivers/misc/lkdtm/lkdtm.h b/drivers/misc/lkdtm/lkdtm.h
index 6dec4c9b442f..8878538b2c13 100644
--- a/drivers/misc/lkdtm/lkdtm.h
+++ b/drivers/misc/lkdtm/lkdtm.h
@@ -27,6 +27,7 @@ void lkdtm_OVERFLOW_UNSIGNED(void);
 void lkdtm_ARRAY_BOUNDS(void);
 void lkdtm_CORRUPT_LIST_ADD(void);
 void lkdtm_CORRUPT_LIST_DEL(void);
+void lkdtm_CORRUPT_USER_DS(void);
 void lkdtm_STACK_GUARD_PAGE_LEADING(void);
 void lkdtm_STACK_GUARD_PAGE_TRAILING(void);
 void lkdtm_UNSET_SMEP(void);
@@ -95,6 +96,7 @@ void lkdtm_USERCOPY_STACK_FRAME_TO(void);
 void lkdtm_USERCOPY_STACK_FRAME_FROM(void);
 void lkdtm_USERCOPY_STACK_BEYOND(void);
 void lkdtm_USERCOPY_KERNEL(void);
+void lkdtm_USERCOPY_KERNEL_DS(void);
 
 /* lkdtm_stackleak.c */
 void lkdtm_STACKLEAK_ERASING(void);
diff --git a/drivers/misc/lkdtm/usercopy.c b/drivers/misc/lkdtm/usercopy.c
index 109e8d4302c1..b833367a45d0 100644
--- a/drivers/misc/lkdtm/usercopy.c
+++ b/drivers/misc/lkdtm/usercopy.c
@@ -325,6 +325,21 @@ void lkdtm_USERCOPY_KERNEL(void)
 	vm_munmap(user_addr, PAGE_SIZE);
 }
 
+void lkdtm_USERCOPY_KERNEL_DS(void)
+{
+	char __user *user_ptr =
+		(char __user *)(0xFUL << (sizeof(unsigned long) * 8 - 4));
+	mm_segment_t old_fs = get_fs();
+	char buf[10] = {0};
+
+	pr_info("attempting copy_to_user() to noncanonical address: %px\n",
+		user_ptr);
+	set_fs(KERNEL_DS);
+	if (copy_to_user(user_ptr, buf, sizeof(buf)) == 0)
+		pr_err("copy_to_user() to noncanonical address succeeded!?\n");
+	set_fs(old_fs);
+}
+
 void __init lkdtm_usercopy_init(void)
 {
 	/* Prepare cache that lacks SLAB_USERCOPY flag. */
diff --git a/fs/proc/cpuinfo.c b/fs/proc/cpuinfo.c
index 419760fd77bd..d0989a443c77 100644
--- a/fs/proc/cpuinfo.c
+++ b/fs/proc/cpuinfo.c
@@ -19,7 +19,7 @@ static int cpuinfo_open(struct inode *inode, struct file *file)
 static const struct proc_ops cpuinfo_proc_ops = {
 	.proc_flags	= PROC_ENTRY_PERMANENT,
 	.proc_open	= cpuinfo_open,
-	.proc_read_iter	= seq_read_iter,
+	.proc_read	= seq_read,
 	.proc_lseek	= seq_lseek,
 	.proc_release	= seq_release,
 };
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index 6c0a05f55d6b..63fd4a6c2dec 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -610,7 +610,7 @@ static int proc_seq_release(struct inode *inode, struct file *file)
 static const struct proc_ops proc_seq_ops = {
 	/* not permanent -- can call into arbitrary seq_operations */
 	.proc_open	= proc_seq_open,
-	.proc_read_iter	= seq_read_iter,
+	.proc_read	= seq_read,
 	.proc_lseek	= seq_lseek,
 	.proc_release	= proc_seq_release,
 };
@@ -641,7 +641,7 @@ static int proc_single_open(struct inode *inode, struct file *file)
 static const struct proc_ops proc_single_ops = {
 	/* not permanent -- can call into arbitrary ->single_show */
 	.proc_open	= proc_single_open,
-	.proc_read_iter = seq_read_iter,
+	.proc_read	= seq_read,
 	.proc_lseek	= seq_lseek,
 	.proc_release	= single_release,
 };
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index bde6b6f69852..68e55483d6f3 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -297,21 +297,6 @@ static loff_t proc_reg_llseek(struct file *file, loff_t offset, int whence)
 	return rv;
 }
 
-static ssize_t proc_reg_read_iter(struct kiocb *iocb, struct iov_iter *iter)
-{
-	struct proc_dir_entry *pde = PDE(file_inode(iocb->ki_filp));
-	ssize_t ret;
-
-	if (pde_is_permanent(pde))
-		return pde->proc_ops->proc_read_iter(iocb, iter);
-
-	if (!use_pde(pde))
-		return -EIO;
-	ret = pde->proc_ops->proc_read_iter(iocb, iter);
-	unuse_pde(pde);
-	return ret;
-}
-
 static ssize_t pde_read(struct proc_dir_entry *pde, struct file *file, char __user *buf, size_t count, loff_t *ppos)
 {
 	typeof_member(struct proc_ops, proc_read) read;
@@ -587,52 +572,32 @@ static const struct file_operations proc_reg_file_ops = {
 	.write		= proc_reg_write,
 	.poll		= proc_reg_poll,
 	.unlocked_ioctl	= proc_reg_unlocked_ioctl,
+#ifdef CONFIG_COMPAT
+	.compat_ioctl	= proc_reg_compat_ioctl,
+#endif
 	.mmap		= proc_reg_mmap,
 	.get_unmapped_area = proc_reg_get_unmapped_area,
 	.open		= proc_reg_open,
 	.release	= proc_reg_release,
 };
 
-static const struct file_operations proc_iter_file_ops = {
-	.llseek		= proc_reg_llseek,
-	.read_iter	= proc_reg_read_iter,
-	.write		= proc_reg_write,
-	.splice_read	= generic_file_splice_read,
-	.poll		= proc_reg_poll,
-	.unlocked_ioctl	= proc_reg_unlocked_ioctl,
-	.mmap		= proc_reg_mmap,
-	.get_unmapped_area = proc_reg_get_unmapped_area,
-	.open		= proc_reg_open,
-	.release	= proc_reg_release,
-};
-
+	/** .splice_read	= generic_file_splice_read,
+	 */
 #ifdef CONFIG_COMPAT
-static const struct file_operations proc_reg_file_ops_compat = {
+static const struct file_operations proc_reg_file_ops_no_compat = {
 	.llseek		= proc_reg_llseek,
 	.read		= proc_reg_read,
 	.write		= proc_reg_write,
 	.poll		= proc_reg_poll,
 	.unlocked_ioctl	= proc_reg_unlocked_ioctl,
-	.compat_ioctl	= proc_reg_compat_ioctl,
 	.mmap		= proc_reg_mmap,
 	.get_unmapped_area = proc_reg_get_unmapped_area,
 	.open		= proc_reg_open,
 	.release	= proc_reg_release,
 };
 
-static const struct file_operations proc_iter_file_ops_compat = {
-	.llseek		= proc_reg_llseek,
-	.read_iter	= proc_reg_read_iter,
-	.splice_read	= generic_file_splice_read,
-	.write		= proc_reg_write,
-	.poll		= proc_reg_poll,
-	.unlocked_ioctl	= proc_reg_unlocked_ioctl,
-	.compat_ioctl	= proc_reg_compat_ioctl,
-	.mmap		= proc_reg_mmap,
-	.get_unmapped_area = proc_reg_get_unmapped_area,
-	.open		= proc_reg_open,
-	.release	= proc_reg_release,
-};
+	/** .splice_read	= generic_file_splice_read,
+	 */
 #endif
 
 static void proc_put_link(void *p)
@@ -659,51 +624,42 @@ struct inode *proc_get_inode(struct super_block *sb, struct proc_dir_entry *de)
 {
 	struct inode *inode = new_inode(sb);
 
-	if (!inode) {
-		pde_put(de);
-		return NULL;
-	}
+	if (inode) {
+		inode->i_ino = de->low_ino;
+		inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
+		PROC_I(inode)->pde = de;
 
-	inode->i_ino = de->low_ino;
-	inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
-	PROC_I(inode)->pde = de;
-	if (is_empty_pde(de)) {
-		make_empty_dir_inode(inode);
-		return inode;
-	}
+		if (is_empty_pde(de)) {
+			make_empty_dir_inode(inode);
+			return inode;
+		}
+		if (de->mode) {
+			inode->i_mode = de->mode;
+			inode->i_uid = de->uid;
+			inode->i_gid = de->gid;
+		}
+		if (de->size)
+			inode->i_size = de->size;
+		if (de->nlink)
+			set_nlink(inode, de->nlink);
 
-	if (de->mode) {
-		inode->i_mode = de->mode;
-		inode->i_uid = de->uid;
-		inode->i_gid = de->gid;
-	}
-	if (de->size)
-		inode->i_size = de->size;
-	if (de->nlink)
-		set_nlink(inode, de->nlink);
-
-	if (S_ISREG(inode->i_mode)) {
-		inode->i_op = de->proc_iops;
-		if (de->proc_ops->proc_read_iter)
-			inode->i_fop = &proc_iter_file_ops;
-		else
+		if (S_ISREG(inode->i_mode)) {
+			inode->i_op = de->proc_iops;
 			inode->i_fop = &proc_reg_file_ops;
 #ifdef CONFIG_COMPAT
-		if (de->proc_ops->proc_compat_ioctl) {
-			if (de->proc_ops->proc_read_iter)
-				inode->i_fop = &proc_iter_file_ops_compat;
-			else
-				inode->i_fop = &proc_reg_file_ops_compat;
-		}
+			if (!de->proc_ops->proc_compat_ioctl) {
+				inode->i_fop = &proc_reg_file_ops_no_compat;
+			}
 #endif
-	} else if (S_ISDIR(inode->i_mode)) {
-		inode->i_op = de->proc_iops;
-		inode->i_fop = de->proc_dir_ops;
-	} else if (S_ISLNK(inode->i_mode)) {
-		inode->i_op = de->proc_iops;
-		inode->i_fop = NULL;
-	} else {
-		BUG();
-	}
+		} else if (S_ISDIR(inode->i_mode)) {
+			inode->i_op = de->proc_iops;
+			inode->i_fop = de->proc_dir_ops;
+		} else if (S_ISLNK(inode->i_mode)) {
+			inode->i_op = de->proc_iops;
+			inode->i_fop = NULL;
+		} else
+			BUG();
+	} else
+	       pde_put(de);
 	return inode;
 }
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index 070d2df8ab9c..6da439dde528 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -12,7 +12,6 @@
 #include <linux/cred.h>
 #include <linux/namei.h>
 #include <linux/mm.h>
-#include <linux/uio.h>
 #include <linux/module.h>
 #include <linux/bpf-cgroup.h>
 #include <linux/mount.h>
@@ -541,14 +540,13 @@ static struct dentry *proc_sys_lookup(struct inode *dir, struct dentry *dentry,
 	return err;
 }
 
-static ssize_t proc_sys_call_handler(struct kiocb *iocb, struct iov_iter *iter,
-		int write)
+static ssize_t proc_sys_call_handler(struct file *filp, void __user *ubuf,
+		size_t count, loff_t *ppos, int write)
 {
-	struct inode *inode = file_inode(iocb->ki_filp);
+	struct inode *inode = file_inode(filp);
 	struct ctl_table_header *head = grab_header(inode);
 	struct ctl_table *table = PROC_I(inode)->sysctl_entry;
-	size_t count = iov_iter_count(iter);
-	char *kbuf;
+	void *kbuf;
 	ssize_t error;
 
 	if (IS_ERR(head))
@@ -571,30 +569,32 @@ static ssize_t proc_sys_call_handler(struct kiocb *iocb, struct iov_iter *iter,
 	error = -ENOMEM;
 	if (count >= KMALLOC_MAX_SIZE)
 		goto out;
-	kbuf = kvzalloc(count + 1, GFP_KERNEL);
-	if (!kbuf)
-		goto out;
 
 	if (write) {
-		error = -EFAULT;
-		if (!copy_from_iter_full(kbuf, count, iter))
-			goto out_free_buf;
-		kbuf[count] = '\0';
+		kbuf = memdup_user_nul(ubuf, count);
+		if (IS_ERR(kbuf)) {
+			error = PTR_ERR(kbuf);
+			goto out;
+		}
+	} else {
+		kbuf = kzalloc(count, GFP_KERNEL);
+		if (!kbuf)
+			goto out;
 	}
 
 	error = BPF_CGROUP_RUN_PROG_SYSCTL(head, table, write, &kbuf, &count,
-					   &iocb->ki_pos);
+					   ppos);
 	if (error)
 		goto out_free_buf;
 
 	/* careful: calling conventions are nasty here */
-	error = table->proc_handler(table, write, kbuf, &count, &iocb->ki_pos);
+	error = table->proc_handler(table, write, kbuf, &count, ppos);
 	if (error)
 		goto out_free_buf;
 
 	if (!write) {
 		error = -EFAULT;
-		if (copy_to_iter(kbuf, count, iter) < count)
+		if (copy_to_user(ubuf, kbuf, count))
 			goto out_free_buf;
 	}
 
@@ -607,14 +607,16 @@ static ssize_t proc_sys_call_handler(struct kiocb *iocb, struct iov_iter *iter,
 	return error;
 }
 
-static ssize_t proc_sys_read(struct kiocb *iocb, struct iov_iter *iter)
+static ssize_t proc_sys_read(struct file *filp, char __user *buf,
+				size_t count, loff_t *ppos)
 {
-	return proc_sys_call_handler(iocb, iter, 0);
+	return proc_sys_call_handler(filp, (void __user *)buf, count, ppos, 0);
 }
 
-static ssize_t proc_sys_write(struct kiocb *iocb, struct iov_iter *iter)
+static ssize_t proc_sys_write(struct file *filp, const char __user *buf,
+				size_t count, loff_t *ppos)
 {
-	return proc_sys_call_handler(iocb, iter, 1);
+	return proc_sys_call_handler(filp, (void __user *)buf, count, ppos, 1);
 }
 
 static int proc_sys_open(struct inode *inode, struct file *filp)
@@ -851,10 +853,8 @@ static int proc_sys_getattr(const struct path *path, struct kstat *stat,
 static const struct file_operations proc_sys_file_operations = {
 	.open		= proc_sys_open,
 	.poll		= proc_sys_poll,
-	.read_iter	= proc_sys_read,
-	.write_iter	= proc_sys_write,
-	.splice_read	= generic_file_splice_read,
-	.splice_write	= iter_file_splice_write,
+	.read		= proc_sys_read,
+	.write		= proc_sys_write,
 	.llseek		= default_llseek,
 };
 
diff --git a/fs/proc/stat.c b/fs/proc/stat.c
index 4695b6de3151..46b3293015fe 100644
--- a/fs/proc/stat.c
+++ b/fs/proc/stat.c
@@ -226,7 +226,7 @@ static int stat_open(struct inode *inode, struct file *file)
 static const struct proc_ops stat_proc_ops = {
 	.proc_flags	= PROC_ENTRY_PERMANENT,
 	.proc_open	= stat_open,
-	.proc_read_iter	= seq_read_iter,
+	.proc_read	= seq_read,
 	.proc_lseek	= seq_lseek,
 	.proc_release	= single_release,
 };
diff --git a/fs/read_write.c b/fs/read_write.c
index 75f764b43418..552b993c88ef 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -419,42 +419,27 @@ static ssize_t new_sync_read(struct file *filp, char __user *buf, size_t len, lo
 	return ret;
 }
 
-static int warn_unsupported(struct file *file, const char *op)
-{
-	pr_warn_ratelimited(
-		"kernel %s not supported for file %pD4 (pid: %d comm: %.20s)\n",
-		op, file, current->pid, current->comm);
-	return -EINVAL;
-}
-
 ssize_t __kernel_read(struct file *file, void *buf, size_t count, loff_t *pos)
 {
-	struct kvec iov = {
-		.iov_base	= buf,
-		.iov_len	= min_t(size_t, count, MAX_RW_COUNT),
-	};
-	struct kiocb kiocb;
-	struct iov_iter iter;
+	mm_segment_t old_fs = get_fs();
 	ssize_t ret;
 
 	if (WARN_ON_ONCE(!(file->f_mode & FMODE_READ)))
 		return -EINVAL;
 	if (!(file->f_mode & FMODE_CAN_READ))
 		return -EINVAL;
-	/*
-	 * Also fail if ->read_iter and ->read are both wired up as that
-	 * implies very convoluted semantics.
-	 */
-	if (unlikely(!file->f_op->read_iter || file->f_op->read))
-		return warn_unsupported(file, "read");
 
-	init_sync_kiocb(&kiocb, file);
-	kiocb.ki_pos = pos ? *pos : 0;
-	iov_iter_kvec(&iter, READ, &iov, 1, iov.iov_len);
-	ret = file->f_op->read_iter(&kiocb, &iter);
+	if (count > MAX_RW_COUNT)
+		count =  MAX_RW_COUNT;
+	set_fs(KERNEL_DS);
+	if (file->f_op->read)
+		ret = file->f_op->read(file, (void __user *)buf, count, pos);
+	else if (file->f_op->read_iter)
+		ret = new_sync_read(file, (void __user *)buf, count, pos);
+	else
+		ret = -EINVAL;
+	set_fs(old_fs);
 	if (ret > 0) {
-		if (pos)
-			*pos = kiocb.ki_pos;
 		fsnotify_access(file);
 		add_rchar(current, ret);
 	}
@@ -525,32 +510,28 @@ static ssize_t new_sync_write(struct file *filp, const char __user *buf, size_t
 /* caller is responsible for file_start_write/file_end_write */
 ssize_t __kernel_write(struct file *file, const void *buf, size_t count, loff_t *pos)
 {
-	struct kvec iov = {
-		.iov_base	= (void *)buf,
-		.iov_len	= min_t(size_t, count, MAX_RW_COUNT),
-	};
-	struct kiocb kiocb;
-	struct iov_iter iter;
+	mm_segment_t old_fs;
+	const char __user *p;
 	ssize_t ret;
 
 	if (WARN_ON_ONCE(!(file->f_mode & FMODE_WRITE)))
 		return -EBADF;
 	if (!(file->f_mode & FMODE_CAN_WRITE))
 		return -EINVAL;
-	/*
-	 * Also fail if ->write_iter and ->write are both wired up as that
-	 * implies very convoluted semantics.
-	 */
-	if (unlikely(!file->f_op->write_iter || file->f_op->write))
-		return warn_unsupported(file, "write");
 
-	init_sync_kiocb(&kiocb, file);
-	kiocb.ki_pos = pos ? *pos : 0;
-	iov_iter_kvec(&iter, WRITE, &iov, 1, iov.iov_len);
-	ret = file->f_op->write_iter(&kiocb, &iter);
+	old_fs = get_fs();
+	set_fs(KERNEL_DS);
+	p = (__force const char __user *)buf;
+	if (count > MAX_RW_COUNT)
+		count =  MAX_RW_COUNT;
+	if (file->f_op->write)
+		ret = file->f_op->write(file, p, count, pos);
+	else if (file->f_op->write_iter)
+		ret = new_sync_write(file, p, count, pos);
+	else
+		ret = -EINVAL;
+	set_fs(old_fs);
 	if (ret > 0) {
-		if (pos)
-			*pos = kiocb.ki_pos;
 		fsnotify_modify(file);
 		add_wchar(current, ret);
 	}
@@ -908,7 +889,7 @@ ssize_t vfs_iter_write(struct file *file, struct iov_iter *iter, loff_t *ppos,
 }
 EXPORT_SYMBOL(vfs_iter_write);
 
-static ssize_t vfs_readv(struct file *file, const struct iovec __user *vec,
+ssize_t vfs_readv(struct file *file, const struct iovec __user *vec,
 		  unsigned long vlen, loff_t *pos, rwf_t flags)
 {
 	struct iovec iovstack[UIO_FASTIOV];
diff --git a/fs/splice.c b/fs/splice.c
index 866d5c2367b2..d9305af930d8 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -341,6 +341,89 @@ const struct pipe_buf_operations nosteal_pipe_buf_ops = {
 };
 EXPORT_SYMBOL(nosteal_pipe_buf_ops);
 
+static ssize_t kernel_readv(struct file *file, const struct kvec *vec,
+			    unsigned long vlen, loff_t offset)
+{
+	mm_segment_t old_fs;
+	loff_t pos = offset;
+	ssize_t res;
+
+	old_fs = get_fs();
+	set_fs(KERNEL_DS);
+	/* The cast to a user pointer is valid due to the set_fs() */
+	res = vfs_readv(file, (const struct iovec __user *)vec, vlen, &pos, 0);
+	set_fs(old_fs);
+
+	return res;
+}
+
+static ssize_t default_file_splice_read(struct file *in, loff_t *ppos,
+				 struct pipe_inode_info *pipe, size_t len,
+				 unsigned int flags)
+{
+	struct kvec *vec, __vec[PIPE_DEF_BUFFERS];
+	struct iov_iter to;
+	struct page **pages;
+	unsigned int nr_pages;
+	unsigned int mask;
+	size_t offset, base, copied = 0;
+	ssize_t res;
+	int i;
+
+	if (pipe_full(pipe->head, pipe->tail, pipe->max_usage))
+		return -EAGAIN;
+
+	/*
+	 * Try to keep page boundaries matching to source pagecache ones -
+	 * it probably won't be much help, but...
+	 */
+	offset = *ppos & ~PAGE_MASK;
+
+	iov_iter_pipe(&to, READ, pipe, len + offset);
+
+	res = iov_iter_get_pages_alloc(&to, &pages, len + offset, &base);
+	if (res <= 0)
+		return -ENOMEM;
+
+	nr_pages = DIV_ROUND_UP(res + base, PAGE_SIZE);
+
+	vec = __vec;
+	if (nr_pages > PIPE_DEF_BUFFERS) {
+		vec = kmalloc_array(nr_pages, sizeof(struct kvec), GFP_KERNEL);
+		if (unlikely(!vec)) {
+			res = -ENOMEM;
+			goto out;
+		}
+	}
+
+	mask = pipe->ring_size - 1;
+	pipe->bufs[to.head & mask].offset = offset;
+	pipe->bufs[to.head & mask].len -= offset;
+
+	for (i = 0; i < nr_pages; i++) {
+		size_t this_len = min_t(size_t, len, PAGE_SIZE - offset);
+		vec[i].iov_base = page_address(pages[i]) + offset;
+		vec[i].iov_len = this_len;
+		len -= this_len;
+		offset = 0;
+	}
+
+	res = kernel_readv(in, vec, nr_pages, *ppos);
+	if (res > 0) {
+		copied = res;
+		*ppos += res;
+	}
+
+	if (vec != __vec)
+		kfree(vec);
+out:
+	for (i = 0; i < nr_pages; i++)
+		put_page(pages[i]);
+	kvfree(pages);
+	iov_iter_advance(&to, copied);	/* truncates and discards */
+	return res;
+}
+
 /*
  * Send 'sd->len' bytes to socket from 'sd->file' at position 'sd->pos'
  * using sendpage(). Return the number of bytes sent.
@@ -724,6 +807,33 @@ iter_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
 
 EXPORT_SYMBOL(iter_file_splice_write);
 
+static int write_pipe_buf(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
+			  struct splice_desc *sd)
+{
+	int ret;
+	void *data;
+	loff_t tmp = sd->pos;
+
+	data = kmap(buf->page);
+	ret = __kernel_write(sd->u.file, data + buf->offset, sd->len, &tmp);
+	kunmap(buf->page);
+
+	return ret;
+}
+
+static ssize_t default_file_splice_write(struct pipe_inode_info *pipe,
+					 struct file *out, loff_t *ppos,
+					 size_t len, unsigned int flags)
+{
+	ssize_t ret;
+
+	ret = splice_from_pipe(pipe, out, ppos, len, flags, write_pipe_buf);
+	if (ret > 0)
+		*ppos += ret;
+
+	return ret;
+}
+
 /**
  * generic_splice_sendpage - splice data from a pipe to a socket
  * @pipe:	pipe to splice from
@@ -745,23 +855,15 @@ ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe, struct file *out,
 
 EXPORT_SYMBOL(generic_splice_sendpage);
 
-static int warn_unsupported(struct file *file, const char *op)
-{
-	pr_debug_ratelimited(
-		"splice %s not supported for file %pD4 (pid: %d comm: %.20s)\n",
-		op, file, current->pid, current->comm);
-	return -EINVAL;
-}
-
 /*
  * Attempt to initiate a splice from pipe to file.
  */
 static long do_splice_from(struct pipe_inode_info *pipe, struct file *out,
 			   loff_t *ppos, size_t len, unsigned int flags)
 {
-	if (unlikely(!out->f_op->splice_write))
-		return warn_unsupported(out, "write");
-	return out->f_op->splice_write(pipe, out, ppos, len, flags);
+	if (out->f_op->splice_write)
+		return out->f_op->splice_write(pipe, out, ppos, len, flags);
+	return default_file_splice_write(pipe, out, ppos, len, flags);
 }
 
 /*
@@ -783,9 +885,9 @@ static long do_splice_to(struct file *in, loff_t *ppos,
 	if (unlikely(len > MAX_RW_COUNT))
 		len = MAX_RW_COUNT;
 
-	if (unlikely(!in->f_op->splice_read))
-		return warn_unsupported(in, "read");
-	return in->f_op->splice_read(in, ppos, pipe, len, flags);
+	if (in->f_op->splice_read)
+		return in->f_op->splice_read(in, ppos, pipe, len, flags);
+	return default_file_splice_read(in, ppos, pipe, len, flags);
 }
 
 /**
diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
index ed71bd1a0825..2f98d2fce62e 100644
--- a/include/linux/bpf-cgroup.h
+++ b/include/linux/bpf-cgroup.h
@@ -136,7 +136,7 @@ int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor,
 
 int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head,
 				   struct ctl_table *table, int write,
-				   char **buf, size_t *pcount, loff_t *ppos,
+				   void **buf, size_t *pcount, loff_t *ppos,
 				   enum bpf_attach_type type);
 
 int __cgroup_bpf_run_filter_setsockopt(struct sock *sock, int *level,
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 8bde32cf9711..c2035afe79b0 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1910,6 +1910,8 @@ static inline int call_mmap(struct file *file, struct vm_area_struct *vma)
 
 extern ssize_t vfs_read(struct file *, char __user *, size_t, loff_t *);
 extern ssize_t vfs_write(struct file *, const char __user *, size_t, loff_t *);
+extern ssize_t vfs_readv(struct file *, const struct iovec __user *,
+		unsigned long, loff_t *, rwf_t);
 extern ssize_t vfs_copy_file_range(struct file *, loff_t , struct file *,
 				   loff_t, size_t, unsigned int);
 extern ssize_t generic_copy_file_range(struct file *file_in, loff_t pos_in,
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index 000cc0533c33..7b5b0765ff4c 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -30,7 +30,6 @@ struct proc_ops {
 	unsigned int proc_flags;
 	int	(*proc_open)(struct inode *, struct file *);
 	ssize_t	(*proc_read)(struct file *, char __user *, size_t, loff_t *);
-	ssize_t (*proc_read_iter)(struct kiocb *, struct iov_iter *);
 	ssize_t	(*proc_write)(struct file *, const char __user *, size_t, loff_t *);
 	loff_t	(*proc_lseek)(struct file *, loff_t, int);
 	int	(*proc_release)(struct inode *, struct file *);
diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h
index c7c6e8b8344d..e5da080a7181 100644
--- a/include/linux/uaccess.h
+++ b/include/linux/uaccess.h
@@ -10,7 +10,6 @@
 
 #include <asm/uaccess.h>
 
-#ifdef CONFIG_SET_FS
 /*
  * Force the uaccess routines to be wired up for actual userspace access,
  * overriding any possible set_fs(KERNEL_DS) still lingering around.  Undone
@@ -28,27 +27,10 @@ static inline void force_uaccess_end(mm_segment_t oldfs)
 {
 	set_fs(oldfs);
 }
-#else /* CONFIG_SET_FS */
-typedef struct {
-	/* empty dummy */
-} mm_segment_t;
-
-#ifndef TASK_SIZE_MAX
+/** #ifndef TASK_SIZE_MAX
 #define TASK_SIZE_MAX			TASK_SIZE
 #endif
-
-#define uaccess_kernel()		(false)
-#define user_addr_max()			(TASK_SIZE_MAX)
-
-static inline mm_segment_t force_uaccess_begin(void)
-{
-	return (mm_segment_t) { };
-}
-
-static inline void force_uaccess_end(mm_segment_t oldfs)
-{
-}
-#endif /* CONFIG_SET_FS */
+*/
 
 /*
  * Architectures should provide two primitives (raw_copy_{to,from}_user())
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index 6aa9e10c6335..41612e25c029 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -1226,7 +1226,7 @@ const struct bpf_verifier_ops cg_dev_verifier_ops = {
  */
 int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head,
 				   struct ctl_table *table, int write,
-				   char **buf, size_t *pcount, loff_t *ppos,
+				   void **buf, size_t *pcount, loff_t *ppos,
 				   enum bpf_attach_type type)
 {
 	struct bpf_sysctl_kern ctx = {
diff --git a/lib/test_bitmap.c b/lib/test_bitmap.c
index 4425a1dd4ef1..df903c53952b 100644
--- a/lib/test_bitmap.c
+++ b/lib/test_bitmap.c
@@ -354,37 +354,50 @@ static const struct test_bitmap_parselist parselist_tests[] __initconst = {
 
 };
 
-static void __init test_bitmap_parselist(void)
+static void __init __test_bitmap_parselist(int is_user)
 {
 	int i;
 	int err;
 	ktime_t time;
 	DECLARE_BITMAP(bmap, 2048);
+	char *mode = is_user ? "_user"  : "";
 
 	for (i = 0; i < ARRAY_SIZE(parselist_tests); i++) {
 #define ptest parselist_tests[i]
 
-		time = ktime_get();
-		err = bitmap_parselist(ptest.in, bmap, ptest.nbits);
-		time = ktime_get() - time;
+		if (is_user) {
+			mm_segment_t orig_fs = get_fs();
+			size_t len = strlen(ptest.in);
+
+			set_fs(KERNEL_DS);
+			time = ktime_get();
+			err = bitmap_parselist_user((__force const char __user *)ptest.in, len,
+						    bmap, ptest.nbits);
+			time = ktime_get() - time;
+			set_fs(orig_fs);
+		} else {
+			time = ktime_get();
+			err = bitmap_parselist(ptest.in, bmap, ptest.nbits);
+			time = ktime_get() - time;
+		}
 
 		if (err != ptest.errno) {
-			pr_err("parselist: %d: input is %s, errno is %d, expected %d\n",
-					i, ptest.in, err, ptest.errno);
+			pr_err("parselist%s: %d: input is %s, errno is %d, expected %d\n",
+					mode, i, ptest.in, err, ptest.errno);
 			continue;
 		}
 
 		if (!err && ptest.expected
 			 && !__bitmap_equal(bmap, ptest.expected, ptest.nbits)) {
-			pr_err("parselist: %d: input is %s, result is 0x%lx, expected 0x%lx\n",
-					i, ptest.in, bmap[0],
+			pr_err("parselist%s: %d: input is %s, result is 0x%lx, expected 0x%lx\n",
+					mode, i, ptest.in, bmap[0],
 					*ptest.expected);
 			continue;
 		}
 
 		if (ptest.flags & PARSE_TIME)
-			pr_err("parselist: %d: input is '%s' OK, Time: %llu\n",
-					i, ptest.in, time);
+			pr_err("parselist%s: %d: input is '%s' OK, Time: %llu\n",
+					mode, i, ptest.in, time);
 
 #undef ptest
 	}
@@ -430,41 +443,75 @@ static const struct test_bitmap_parselist parse_tests[] __initconst = {
 #undef step
 };
 
-static void __init test_bitmap_parse(void)
+static void __init __test_bitmap_parse(int is_user)
 {
 	int i;
 	int err;
 	ktime_t time;
 	DECLARE_BITMAP(bmap, 2048);
+	char *mode = is_user ? "_user"  : "";
 
 	for (i = 0; i < ARRAY_SIZE(parse_tests); i++) {
 		struct test_bitmap_parselist test = parse_tests[i];
-		size_t len = test.flags & NO_LEN ? UINT_MAX : strlen(test.in);
 
-		time = ktime_get();
-		err = bitmap_parse(test.in, len, bmap, test.nbits);
-		time = ktime_get() - time;
+		if (is_user) {
+			size_t len = strlen(test.in);
+			mm_segment_t orig_fs = get_fs();
+
+			set_fs(KERNEL_DS);
+			time = ktime_get();
+			err = bitmap_parse_user((__force const char __user *)test.in, len,
+						bmap, test.nbits);
+			time = ktime_get() - time;
+			set_fs(orig_fs);
+		} else {
+			size_t len = test.flags & NO_LEN ?
+				UINT_MAX : strlen(test.in);
+			time = ktime_get();
+			err = bitmap_parse(test.in, len, bmap, test.nbits);
+			time = ktime_get() - time;
+		}
 
 		if (err != test.errno) {
-			pr_err("parse: %d: input is %s, errno is %d, expected %d\n",
-					i, test.in, err, test.errno);
+			pr_err("parse%s: %d: input is %s, errno is %d, expected %d\n",
+					mode, i, test.in, err, test.errno);
 			continue;
 		}
 
 		if (!err && test.expected
 			 && !__bitmap_equal(bmap, test.expected, test.nbits)) {
-			pr_err("parse: %d: input is %s, result is 0x%lx, expected 0x%lx\n",
-					i, test.in, bmap[0],
+			pr_err("parse%s: %d: input is %s, result is 0x%lx, expected 0x%lx\n",
+					mode, i, test.in, bmap[0],
 					*test.expected);
 			continue;
 		}
 
 		if (test.flags & PARSE_TIME)
-			pr_err("parse: %d: input is '%s' OK, Time: %llu\n",
-					i, test.in, time);
+			pr_err("parse%s: %d: input is '%s' OK, Time: %llu\n",
+					mode, i, test.in, time);
 	}
 }
 
+static void __init test_bitmap_parselist(void)
+{
+	__test_bitmap_parselist(0);
+}
+
+static void __init test_bitmap_parselist_user(void)
+{
+	__test_bitmap_parselist(1);
+}
+
+static void __init test_bitmap_parse(void)
+{
+	__test_bitmap_parse(0);
+}
+
+static void __init test_bitmap_parse_user(void)
+{
+	__test_bitmap_parse(1);
+}
+
 #define EXP1_IN_BITS	(sizeof(exp1) * 8)
 
 static void __init test_bitmap_arr32(void)
@@ -628,7 +675,9 @@ static void __init selftest(void)
 	test_replace();
 	test_bitmap_arr32();
 	test_bitmap_parse();
+	test_bitmap_parse_user();
 	test_bitmap_parselist();
+	test_bitmap_parselist_user();
 	test_mem_optimisations();
 	test_for_each_set_clump8();
 	test_bitmap_cut();
diff --git a/tools/testing/selftests/lkdtm/tests.txt b/tools/testing/selftests/lkdtm/tests.txt
index 74a8d329a72c..9d266e79c6a2 100644
--- a/tools/testing/selftests/lkdtm/tests.txt
+++ b/tools/testing/selftests/lkdtm/tests.txt
@@ -9,6 +9,7 @@ EXCEPTION
 #CORRUPT_STACK_STRONG Crashes entire system on success
 CORRUPT_LIST_ADD list_add corruption
 CORRUPT_LIST_DEL list_del corruption
+CORRUPT_USER_DS Invalid address limit on user-mode return
 STACK_GUARD_PAGE_LEADING
 STACK_GUARD_PAGE_TRAILING
 UNSET_SMEP CR4 bits went missing
@@ -66,5 +67,6 @@ USERCOPY_STACK_FRAME_TO
 USERCOPY_STACK_FRAME_FROM
 USERCOPY_STACK_BEYOND
 USERCOPY_KERNEL
+USERCOPY_KERNEL_DS
 STACKLEAK_ERASING OK: the rest of the thread stack is properly erased
 CFI_FORWARD_PROTO
-- 
2.29.2

From a83033234ee0b2af53b2775b148588aa1cec0892 Mon Sep 17 00:00:00 2001
From: Metztli Information Technology <jose@xxxxxxxxxxxxxxxxxxxxxxx>
Date: Mon, 15 Mar 2021 05:39:55 -0700
Subject: [PATCH] Ic ce (First) commit Reiser4 -enabling GNU Parted 3.4-1

---
 libparted/Makefile.am          |  1 +
 libparted/fs/Makefile.am       |  3 ++
 libparted/fs/reiser4/reiser4.c | 71 ++++++++++++++++++++++++++++++++++
 libparted/libparted.c          |  4 ++
 4 files changed, 79 insertions(+)
 create mode 100644 libparted/fs/reiser4/reiser4.c

diff --git a/libparted/Makefile.am b/libparted/Makefile.am
index ccf8a6a1..aeb1045c 100644
--- a/libparted/Makefile.am
+++ b/libparted/Makefile.am
@@ -4,6 +4,7 @@
 #
 # This file may be modified and/or distributed without restriction.
 
+AUTOMAKE_OPTIONS = subdir-objects
 SUBDIRS_CHECK =
 if HAVE_CHECK
 SUBDIRS_CHECK += tests
diff --git a/libparted/fs/Makefile.am b/libparted/fs/Makefile.am
index 71ddd56b..b3a09b86 100644
--- a/libparted/fs/Makefile.am
+++ b/libparted/fs/Makefile.am
@@ -4,6 +4,8 @@
 #
 # This file may be modified and/or distributed without restriction.
 
+AUTOMAKE_OPTIONS = subdir-objects
+
 partedincludedir = -I$(top_builddir)/include -I$(top_srcdir)/include -I$(top_srcdir)/lib
 
 AM_CFLAGS = $(WARN_CFLAGS)
@@ -47,6 +49,7 @@ libfs_la_SOURCES =		\
   ntfs/ntfs.c			\
   reiserfs/reiserfs.c		\
   reiserfs/reiserfs.h		\
+  reiser4/reiser4.c		\
   udf/udf.c			\
   ufs/ufs.c			\
   xfs/platform_defs.h		\
diff --git a/libparted/fs/reiser4/reiser4.c b/libparted/fs/reiser4/reiser4.c
new file mode 100644
index 00000000..0a86e0cf
--- /dev/null
+++ b/libparted/fs/reiser4/reiser4.c
@@ -0,0 +1,71 @@
+/*
+    libparted - a library for manipulating disk partitions
+    Copyright (C) 2000 Free Software Foundation, Inc.
+
+    This program is free software; you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation; either version 2 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program; if not, write to the Free Software
+    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+*/
+
+#include <config.h>
+
+#include <parted/parted.h>
+#include <parted/endian.h>
+
+#if ENABLE_NLS
+#  include <libintl.h>
+#  define _(String) dgettext (PACKAGE, String)
+#else
+#  define _(String) (String)
+#endif /* ENABLE_NLS */
+
+#include <unistd.h>
+#include <string.h>
+
+static PedGeometry*
+reiser4_probe (PedGeometry* geom)
+{
+	char	buf[512];
+
+	if (!ped_geometry_read (geom, buf, 128, 1))
+		return 0;	
+	
+	if ( strcmp( buf, "ReIsEr4" ) == 0 )
+		return ped_geometry_duplicate( geom ) ;
+
+	else
+		return NULL;
+}
+
+static PedFileSystemOps reiser4_ops = {
+	probe:		reiser4_probe,
+};
+
+static PedFileSystemType reiser4_type = {
+	next:	NULL,
+	ops:	&reiser4_ops,
+	name:	"reiser4"
+};
+
+void
+ped_file_system_reiser4_init ()
+{
+	ped_file_system_type_register (&reiser4_type);
+}
+
+void
+ped_file_system_reiser4_done ()
+{
+	ped_file_system_type_unregister (&reiser4_type);
+}
+
diff --git a/libparted/libparted.c b/libparted/libparted.c
index d34e90d5..d8f3b5b3 100644
--- a/libparted/libparted.c
+++ b/libparted/libparted.c
@@ -112,6 +112,7 @@ extern void ped_file_system_hfs_init (void);
 extern void ped_file_system_fat_init (void);
 extern void ped_file_system_f2fs_init (void);
 extern void ped_file_system_ext2_init (void);
+extern void ped_file_system_reiser4_init (void);
 extern void ped_file_system_nilfs2_init (void);
 extern void ped_file_system_btrfs_init (void);
 extern void ped_file_system_udf_init (void);
@@ -131,6 +132,7 @@ init_file_system_types ()
 	ped_file_system_fat_init ();
 	ped_file_system_f2fs_init ();
 	ped_file_system_ext2_init ();
+	ped_file_system_reiser4_init ();
 	ped_file_system_nilfs2_init ();
 	ped_file_system_btrfs_init ();
 	ped_file_system_udf_init ();
@@ -199,6 +201,7 @@ extern void ped_file_system_ntfs_done (void);
 extern void ped_file_system_reiserfs_done (void);
 extern void ped_file_system_ufs_done (void);
 extern void ped_file_system_xfs_done (void);
+extern void ped_file_system_reiser4_done (void);
 extern void ped_file_system_amiga_done (void);
 extern void ped_file_system_btrfs_done (void);
 extern void ped_file_system_udf_done (void);
@@ -219,6 +222,7 @@ done_file_system_types ()
 	ped_file_system_reiserfs_done ();
 	ped_file_system_ufs_done ();
 	ped_file_system_xfs_done ();
+	ped_file_system_reiser4_done ();
 	ped_file_system_amiga_done ();
 	ped_file_system_btrfs_done ();
 	ped_file_system_udf_done ();
-- 
2.30.1

From 31449f2fe71045a53793c952c97a21f7d31221a8 Mon Sep 17 00:00:00 2001
From: Metztli Information Technology <jose@xxxxxxxxxxxxxxxxxxxxxxx>
Date: Sun, 14 Mar 2021 00:08:40 -0800
Subject: [PATCH] Ic ome (Second) commit Reiser4 SFRN 4.0.2 v1.2.1 fix for null
 UUID

---
 librepair/master.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/librepair/master.c b/librepair/master.c
index c7806c5..dadf21a 100644
--- a/librepair/master.c
+++ b/librepair/master.c
@@ -4,6 +4,14 @@
    librepair/master.c - methods are needed for work with broken master 
    super block. */
 
+#ifdef HAVE_CONFIG_H
+#  include <config.h>
+#endif
+
+#if defined(HAVE_LIBUUID) && defined(HAVE_UUID_UUID_H)
+#  include <uuid/uuid.h>
+#endif
+
 #include <repair/librepair.h>
 
 /* Checks the blocksize. */
@@ -347,9 +355,9 @@ void repair_master_print(reiser4_master_t *master,
 			  pid, plug ? plug->label : "absent");
 
 #if defined(HAVE_LIBUUID) && defined(HAVE_UUID_UUID_H)
-	if (*master->ent.ms_uuid != '\0') {
+	if (!uuid_is_null((unsigned char *)master->ent.ms_uuid)) {
 		char uuid[37];
-		
+
 		uuid[36] = '\0';
 		unparse(reiser4_master_get_uuid(master), uuid);
 		aal_stream_format(stream, "uuid:\t\t%s\n", uuid);
-- 
2.30.1


[Index of Archives]     [Linux File System Development]     [Linux BTRFS]     [Linux NFS]     [Linux Filesystems]     [Ext4 Filesystem]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite Forum]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Samba]     [Device Mapper]     [Linux Resources]

  Powered by Linux