[PATCH 08/13] x86, libnvdimm, dax: stop abusing __copy_user_nocache

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



The pmem and nd_blk drivers both have need to copy data through the cpu
cache to persistent memory. To date they have been abusing
__copy_user_nocache through the memcpy_to_pmem abstraction, but this has
several problems:

* __copy_user_nocache does not guarantee that it will always avoid the
cache. While we have fixed the cases where the pmem usage might trigger
that behavior it's a fragile assumption and burdens the uaccess.h
implementation with worrying about the distinction between 'nocache' and
the stricter write-through semantic needed by pmem.

* It implements SMAP (supervisor mode access protection) which is only
meant for user copies.

__arch_memcpy_to_pmem() is a copy of __copy_user_nocache() minus SMAP,
unaligned support, and exception handling. The configuration symbol
ARCH_HAS_PMEM_API is also moved local to libnvdimm to be next to the
implementation.

Cc: <x86@xxxxxxxxxx>
Cc: Jan Kara <jack@xxxxxxx>
Cc: Jeff Moyer <jmoyer@xxxxxxxxxx>
Cc: Ingo Molnar <mingo@xxxxxxxxxx>
Cc: Christoph Hellwig <hch@xxxxxx>
Cc: Toshi Kani <toshi.kani@xxxxxxx>
Cc: Tony Luck <tony.luck@xxxxxxxxx>
Cc: "H. Peter Anvin" <hpa@xxxxxxxxx>
Cc: Al Viro <viro@xxxxxxxxxxxxxxxxxx>
Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
Cc: Brian Boylston <brian.boylston@xxxxxxx>
Cc: Matthew Wilcox <mawilcox@xxxxxxxxxxxxx>
Cc: Ross Zwisler <ross.zwisler@xxxxxxxxxxxxxxx>
Cc: Linus Torvalds <torvalds@xxxxxxxxxxxxxxxxxxxx>
Signed-off-by: Dan Williams <dan.j.williams@xxxxxxxxx>
---
 MAINTAINERS                     |    2 -
 arch/x86/Kconfig                |    1 -
 arch/x86/include/asm/pmem.h     |   48 --------------------------
 drivers/acpi/nfit/core.c        |    3 +-
 drivers/nvdimm/Kconfig          |    4 ++
 drivers/nvdimm/Makefile         |    1 +
 drivers/nvdimm/claim.c          |    4 +-
 drivers/nvdimm/namespace_devs.c |    1 -
 drivers/nvdimm/pmem.c           |    4 +-
 drivers/nvdimm/region_devs.c    |    1 -
 drivers/nvdimm/x86-asm.S        |   71 +++++++++++++++++++++++++++++++++++++++
 drivers/nvdimm/x86.c            |   14 ++++++++
 fs/dax.c                        |    1 -
 include/linux/libnvdimm.h       |    9 +++++
 include/linux/pmem.h            |   59 --------------------------------
 lib/Kconfig                     |    3 --
 tools/testing/nvdimm/Kbuild     |    1 +
 17 files changed, 105 insertions(+), 122 deletions(-)
 delete mode 100644 arch/x86/include/asm/pmem.h
 create mode 100644 drivers/nvdimm/x86-asm.S
 delete mode 100644 include/linux/pmem.h

diff --git a/MAINTAINERS b/MAINTAINERS
index 0277df881da4..f5854de3afab 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -7368,8 +7368,6 @@ L:	linux-nvdimm@xxxxxxxxxxxx
 Q:	https://patchwork.kernel.org/project/linux-nvdimm/list/
 S:	Supported
 F:	drivers/nvdimm/pmem.c
-F:	include/linux/pmem.h
-F:	arch/*/include/asm/pmem.h
 
 LIGHTNVM PLATFORM SUPPORT
 M:	Matias Bjorling <mb@xxxxxxxxxxx>
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index e487493bbd47..db2d4601a02f 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -52,7 +52,6 @@ config X86
 	select ARCH_HAS_GCOV_PROFILE_ALL
 	select ARCH_HAS_KCOV			if X86_64
 	select ARCH_HAS_MMIO_FLUSH
-	select ARCH_HAS_PMEM_API		if X86_64
 	select ARCH_HAS_SG_CHAIN
 	select ARCH_HAS_UBSAN_SANITIZE_ALL
 	select ARCH_HAVE_NMI_SAFE_CMPXCHG
diff --git a/arch/x86/include/asm/pmem.h b/arch/x86/include/asm/pmem.h
deleted file mode 100644
index ded2541a7ba9..000000000000
--- a/arch/x86/include/asm/pmem.h
+++ /dev/null
@@ -1,48 +0,0 @@
-/*
- * Copyright(c) 2015 Intel Corporation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- */
-#ifndef __ASM_X86_PMEM_H__
-#define __ASM_X86_PMEM_H__
-
-#include <linux/uaccess.h>
-#include <asm/cacheflush.h>
-#include <asm/cpufeature.h>
-#include <asm/special_insns.h>
-
-#ifdef CONFIG_ARCH_HAS_PMEM_API
-/**
- * arch_memcpy_to_pmem - copy data to persistent memory
- * @dst: destination buffer for the copy
- * @src: source buffer for the copy
- * @n: length of the copy in bytes
- *
- * Copy data to persistent memory media via non-temporal stores so that
- * a subsequent pmem driver flush operation will drain posted write queues.
- */
-static inline void arch_memcpy_to_pmem(void *dst, const void *src, size_t n)
-{
-	int rem;
-
-	/*
-	 * We are copying between two kernel buffers, if
-	 * __copy_from_user_inatomic_nocache() returns an error (page
-	 * fault) we would have already reported a general protection fault
-	 * before the WARN+BUG.
-	 */
-	rem = __copy_from_user_inatomic_nocache(dst, (void __user *) src, n);
-	if (WARN(rem, "%s: fault copying %p <- %p unwritten: %d\n",
-				__func__, dst, src, rem))
-		BUG();
-}
-
-#endif /* CONFIG_ARCH_HAS_PMEM_API */
-#endif /* __ASM_X86_PMEM_H__ */
diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c
index 2019de7b84e5..41fa3f46e6e3 100644
--- a/drivers/acpi/nfit/core.c
+++ b/drivers/acpi/nfit/core.c
@@ -20,7 +20,6 @@
 #include <linux/list.h>
 #include <linux/acpi.h>
 #include <linux/sort.h>
-#include <linux/pmem.h>
 #include <linux/io.h>
 #include <linux/nd.h>
 #include <asm/cacheflush.h>
@@ -1758,7 +1757,7 @@ static int acpi_nfit_blk_single_io(struct nfit_blk *nfit_blk,
 		}
 
 		if (rw)
-			memcpy_to_pmem(mmio->addr.aperture + offset,
+			arch_memcpy_to_pmem(mmio->addr.aperture + offset,
 					iobuf + copied, c);
 		else {
 			if (nfit_blk->dimm_flags & NFIT_BLK_READ_FLUSH)
diff --git a/drivers/nvdimm/Kconfig b/drivers/nvdimm/Kconfig
index 59e750183b7f..2b62c122e1e5 100644
--- a/drivers/nvdimm/Kconfig
+++ b/drivers/nvdimm/Kconfig
@@ -35,6 +35,10 @@ config BLK_DEV_PMEM
 
 	  Say Y if you want to use an NVDIMM
 
+config ARCH_HAS_PMEM_API
+	depends on X86_64
+	def_bool y
+
 config ND_BLK
 	tristate "BLK: Block data window (aperture) device support"
 	default LIBNVDIMM
diff --git a/drivers/nvdimm/Makefile b/drivers/nvdimm/Makefile
index 9eafb1dd2876..f7e735f7c330 100644
--- a/drivers/nvdimm/Makefile
+++ b/drivers/nvdimm/Makefile
@@ -25,3 +25,4 @@ libnvdimm-$(CONFIG_BTT) += btt_devs.o
 libnvdimm-$(CONFIG_NVDIMM_PFN) += pfn_devs.o
 libnvdimm-$(CONFIG_NVDIMM_DAX) += dax_devs.o
 libnvdimm-$(CONFIG_X86_64) += x86.o
+libnvdimm-$(CONFIG_X86_64) += x86-asm.o
diff --git a/drivers/nvdimm/claim.c b/drivers/nvdimm/claim.c
index dca2a15dc01d..4f26b3fa8c40 100644
--- a/drivers/nvdimm/claim.c
+++ b/drivers/nvdimm/claim.c
@@ -10,9 +10,9 @@
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * General Public License for more details.
  */
+#include <linux/libnvdimm.h>
 #include <linux/device.h>
 #include <linux/sizes.h>
-#include <linux/pmem.h>
 #include "nd-core.h"
 #include "pmem.h"
 #include "pfn.h"
@@ -259,7 +259,7 @@ static int nsio_rw_bytes(struct nd_namespace_common *ndns,
 			rc = -EIO;
 	}
 
-	memcpy_to_pmem(nsio->addr + offset, buf, size);
+	arch_memcpy_to_pmem(nsio->addr + offset, buf, size);
 	nvdimm_flush(to_nd_region(ndns->dev.parent));
 
 	return rc;
diff --git a/drivers/nvdimm/namespace_devs.c b/drivers/nvdimm/namespace_devs.c
index 6307088b375f..eabfc46eb732 100644
--- a/drivers/nvdimm/namespace_devs.c
+++ b/drivers/nvdimm/namespace_devs.c
@@ -14,7 +14,6 @@
 #include <linux/device.h>
 #include <linux/sort.h>
 #include <linux/slab.h>
-#include <linux/pmem.h>
 #include <linux/list.h>
 #include <linux/nd.h>
 #include "nd-core.h"
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index d551bd2ef9dd..f971be271eac 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -27,7 +27,7 @@
 #include <linux/vmalloc.h>
 #include <linux/pfn_t.h>
 #include <linux/slab.h>
-#include <linux/pmem.h>
+#include <linux/uio.h>
 #include <linux/nd.h>
 #include "pmem.h"
 #include "pfn.h"
@@ -78,7 +78,7 @@ static void write_pmem(void *pmem_addr, struct page *page,
 {
 	void *mem = kmap_atomic(page);
 
-	memcpy_to_pmem(pmem_addr, mem + off, len);
+	arch_memcpy_to_pmem(pmem_addr, mem + off, len);
 	kunmap_atomic(mem);
 }
 
diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c
index 7cd705f3247c..c47cecc9358b 100644
--- a/drivers/nvdimm/region_devs.c
+++ b/drivers/nvdimm/region_devs.c
@@ -15,7 +15,6 @@
 #include <linux/sched.h>
 #include <linux/slab.h>
 #include <linux/hash.h>
-#include <linux/pmem.h>
 #include <linux/sort.h>
 #include <linux/io.h>
 #include <linux/nd.h>
diff --git a/drivers/nvdimm/x86-asm.S b/drivers/nvdimm/x86-asm.S
new file mode 100644
index 000000000000..23c5ec94e896
--- /dev/null
+++ b/drivers/nvdimm/x86-asm.S
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2017, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+#include <linux/linkage.h>
+
+/*
+ * __arch_memcpy_to_pmem - non-temporal + unordered memory copy
+ *
+ * 8-byte alignment for destination, source, and len. The results of
+ * this transfer are not persistent or globally visible until a
+ * sub-sequent sfence (REQ_FLUSH) to the pmem driver.
+ *
+ * Derived from __copy_user_nocache.
+ */
+ENTRY(__arch_memcpy_to_pmem)
+	/* Set 4x8-byte copy count and remainder */
+	movl %edx,%ecx
+	andl $63,%edx
+	shrl $6,%ecx
+	jz .L_8b_pmem_copy_entry	/* jump if count is 0 */
+
+	/* Perform 4x8-byte pmem loop-copy */
+.L_4x8b_pmem_copy_loop:
+	movq (%rsi),%r8
+	movq 1*8(%rsi),%r9
+	movq 2*8(%rsi),%r10
+	movq 3*8(%rsi),%r11
+	movnti %r8,(%rdi)
+	movnti %r9,1*8(%rdi)
+	movnti %r10,2*8(%rdi)
+	movnti %r11,3*8(%rdi)
+	movq 4*8(%rsi),%r8
+	movq 5*8(%rsi),%r9
+	movq 6*8(%rsi),%r10
+	movq 7*8(%rsi),%r11
+	movnti %r8,4*8(%rdi)
+	movnti %r9,5*8(%rdi)
+	movnti %r10,6*8(%rdi)
+	movnti %r11,7*8(%rdi)
+	leaq 64(%rsi),%rsi
+	leaq 64(%rdi),%rdi
+	decl %ecx
+	jnz .L_4x8b_pmem_copy_loop
+
+	/* Set 8-byte copy count and remainder */
+.L_8b_pmem_copy_entry:
+	movl %edx,%ecx
+	andl $7,%edx
+	shrl $3,%ecx
+	jnz .L_8b_pmem_copy_loop /* continue if count non-zero */
+	ret
+
+	/* Perform 8-byte pmem loop-copy */
+.L_8b_pmem_copy_loop:
+	movq (%rsi),%r8
+	movnti %r8,(%rdi)
+	leaq 8(%rsi),%rsi
+	leaq 8(%rdi),%rdi
+	decl %ecx
+	jnz .L_8b_pmem_copy_loop
+	ret
+ENDPROC(__arch_memcpy_to_pmem)
diff --git a/drivers/nvdimm/x86.c b/drivers/nvdimm/x86.c
index 07478ed7ce97..0d0e2e5fadae 100644
--- a/drivers/nvdimm/x86.c
+++ b/drivers/nvdimm/x86.c
@@ -40,3 +40,17 @@ void arch_invalidate_pmem(void *addr, size_t size)
 	clflush_cache_range(addr, size);
 }
 EXPORT_SYMBOL_GPL(arch_invalidate_pmem);
+
+void __arch_memcpy_to_pmem(void *dst, void *src, unsigned size);
+
+void arch_memcpy_to_pmem(void *dst, void *src, unsigned size)
+{
+	if (((unsigned long) dst | (unsigned long) src | size) & 7) {
+		/* __arch_memcpy_to_pmem assumes 8-byte alignment */
+		memcpy(dst, src, size);
+		arch_wb_cache_pmem(dst, size);
+		return;
+	}
+	__arch_memcpy_to_pmem(dst, src, size);
+}
+EXPORT_SYMBOL_GPL(arch_memcpy_to_pmem);
diff --git a/fs/dax.c b/fs/dax.c
index 8883ce4d391e..49b81c251763 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -25,7 +25,6 @@
 #include <linux/mm.h>
 #include <linux/mutex.h>
 #include <linux/pagevec.h>
-#include <linux/pmem.h>
 #include <linux/sched.h>
 #include <linux/uio.h>
 #include <linux/vmstat.h>
diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h
index 8458c5351e56..bb7a81f469e1 100644
--- a/include/linux/libnvdimm.h
+++ b/include/linux/libnvdimm.h
@@ -160,4 +160,13 @@ void nd_region_release_lane(struct nd_region *nd_region, unsigned int lane);
 u64 nd_fletcher64(void *addr, size_t len, bool le);
 void nvdimm_flush(struct nd_region *nd_region);
 int nvdimm_has_flush(struct nd_region *nd_region);
+#ifdef CONFIG_ARCH_HAS_PMEM_API
+void arch_memcpy_to_pmem(void *dst, void *src, unsigned size);
+#define ARCH_MEMREMAP_PMEM MEMREMAP_WB
+#else
+static inline void arch_memcpy_to_pmem(void *dst, void *src, unsigned size)
+{
+}
+#define ARCH_MEMREMAP_PMEM MEMREMAP_WT
+#endif /* CONFIG_ARCH_HAS_PMEM_API */
 #endif /* __LIBNVDIMM_H__ */
diff --git a/include/linux/pmem.h b/include/linux/pmem.h
deleted file mode 100644
index 559c00848583..000000000000
--- a/include/linux/pmem.h
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
- * Copyright(c) 2015 Intel Corporation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of version 2 of the GNU General Public License as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * General Public License for more details.
- */
-#ifndef __PMEM_H__
-#define __PMEM_H__
-
-#include <linux/io.h>
-#include <linux/uio.h>
-
-#ifdef CONFIG_ARCH_HAS_PMEM_API
-#define ARCH_MEMREMAP_PMEM MEMREMAP_WB
-#include <asm/pmem.h>
-#else
-#define ARCH_MEMREMAP_PMEM MEMREMAP_WT
-/*
- * These are simply here to enable compilation, all call sites gate
- * calling these symbols with arch_has_pmem_api() and redirect to the
- * implementation in asm/pmem.h.
- */
-static inline void arch_memcpy_to_pmem(void *dst, const void *src, size_t n)
-{
-	BUG();
-}
-#endif
-
-static inline bool arch_has_pmem_api(void)
-{
-	return IS_ENABLED(CONFIG_ARCH_HAS_PMEM_API);
-}
-
-/**
- * memcpy_to_pmem - copy data to persistent memory
- * @dst: destination buffer for the copy
- * @src: source buffer for the copy
- * @n: length of the copy in bytes
- *
- * Perform a memory copy that results in the destination of the copy
- * being effectively evicted from, or never written to, the processor
- * cache hierarchy after the copy completes.  After memcpy_to_pmem()
- * data may still reside in cpu or platform buffers, so this operation
- * must be followed by a blkdev_issue_flush() on the pmem block device.
- */
-static inline void memcpy_to_pmem(void *dst, const void *src, size_t n)
-{
-	if (arch_has_pmem_api())
-		arch_memcpy_to_pmem(dst, src, n);
-	else
-		memcpy(dst, src, n);
-}
-#endif /* __PMEM_H__ */
diff --git a/lib/Kconfig b/lib/Kconfig
index 260a80e313b9..006264ac768a 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -537,9 +537,6 @@ config SG_POOL
 config ARCH_HAS_SG_CHAIN
 	def_bool n
 
-config ARCH_HAS_PMEM_API
-	bool
-
 config ARCH_HAS_MMIO_FLUSH
 	bool
 
diff --git a/tools/testing/nvdimm/Kbuild b/tools/testing/nvdimm/Kbuild
index 7488dfa1309a..a989ded70c18 100644
--- a/tools/testing/nvdimm/Kbuild
+++ b/tools/testing/nvdimm/Kbuild
@@ -67,6 +67,7 @@ libnvdimm-$(CONFIG_BTT) += $(NVDIMM_SRC)/btt_devs.o
 libnvdimm-$(CONFIG_NVDIMM_PFN) += $(NVDIMM_SRC)/pfn_devs.o
 libnvdimm-$(CONFIG_NVDIMM_DAX) += $(NVDIMM_SRC)/dax_devs.o
 libnvdimm-$(CONFIG_X86_64) += $(NVDIMM_SRC)/x86.o
+libnvdimm-$(CONFIG_X86_64) += $(NVDIMM_SRC)/x86-asm.o
 libnvdimm-y += config_check.o
 
 obj-m += test/

--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html



[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [Samba]     [Device Mapper]     [CEPH Development]
  Powered by Linux