On Wed, May 6, 2015 at 1:05 PM, Dan Williams <dan.j.williams@xxxxxxxxx> wrote: > It would be unfortunate if the kmap infrastructure escaped its current > 32-bit/HIGHMEM bonds and leaked into 64-bit code. Instead, if the user > has enabled CONFIG_PMEM_IO we direct the kmap_atomic_pfn_t() > implementation to scan a list of pre-mapped persistent memory address > ranges inserted by the pmem driver. > > The __pfn_t to resource lookup is indeed inefficient walking of a linked list, > but there are two mitigating factors: > > 1/ The number of persistent memory ranges is bounded by the number of > DIMMs which is on the order of 10s of DIMMs, not hundreds. > > 2/ The lookup yields the entire range, if it becomes inefficient to do a > kmap_atomic_pfn_t() a PAGE_SIZE at a time the caller can take > advantage of the fact that the lookup can be amortized for all kmap > operations it needs to perform in a given range. > > Signed-off-by: Dan Williams <dan.j.williams@xxxxxxxxx> > --- > arch/Kconfig | 3 + > arch/x86/Kconfig | 2 + > arch/x86/kernel/Makefile | 1 > arch/x86/kernel/kmap.c | 95 ++++++++++++++++++++++++++++++++++++++++++++++ > drivers/block/pmem.c | 6 +++ > include/linux/highmem.h | 23 +++++++++++ > 6 files changed, 130 insertions(+) > create mode 100644 arch/x86/kernel/kmap.c > > diff --git a/arch/Kconfig b/arch/Kconfig > index f7f800860c00..69d3a3fa21af 100644 > --- a/arch/Kconfig > +++ b/arch/Kconfig > @@ -206,6 +206,9 @@ config HAVE_DMA_CONTIGUOUS > config HAVE_DMA_PFN > bool > > +config HAVE_KMAP_PFN > + bool > + > config GENERIC_SMP_IDLE_THREAD > bool > > diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig > index 1fae5e842423..eddaea839500 100644 > --- a/arch/x86/Kconfig > +++ b/arch/x86/Kconfig > @@ -1434,7 +1434,9 @@ config X86_PMEM_LEGACY > Say Y if unsure. > > config X86_PMEM_DMA > + depends on !HIGHMEM > def_bool PMEM_IO > + select HAVE_KMAP_PFN > select HAVE_DMA_PFN > > config HIGHPTE > diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile > index 9bcd0b56ca17..44c323342996 100644 > --- a/arch/x86/kernel/Makefile > +++ b/arch/x86/kernel/Makefile > @@ -96,6 +96,7 @@ obj-$(CONFIG_PARAVIRT) += paravirt.o paravirt_patch_$(BITS).o > obj-$(CONFIG_PARAVIRT_SPINLOCKS)+= paravirt-spinlocks.o > obj-$(CONFIG_PARAVIRT_CLOCK) += pvclock.o > obj-$(CONFIG_X86_PMEM_LEGACY) += pmem.o > +obj-$(CONFIG_X86_PMEM_DMA) += kmap.o > > obj-$(CONFIG_PCSPKR_PLATFORM) += pcspeaker.o > > diff --git a/arch/x86/kernel/kmap.c b/arch/x86/kernel/kmap.c > new file mode 100644 > index 000000000000..d597c475377b > --- /dev/null > +++ b/arch/x86/kernel/kmap.c > @@ -0,0 +1,95 @@ > +/* > + * Copyright(c) 2015 Intel Corporation. All rights reserved. > + * > + * This program is free software; you can redistribute it and/or modify > + * it under the terms of version 2 of the GNU General Public License as > + * published by the Free Software Foundation. > + * > + * This program is distributed in the hope that it will be useful, but > + * WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + * General Public License for more details. > + */ > +#include <linux/rcupdate.h> > +#include <linux/rculist.h> > +#include <linux/highmem.h> > +#include <linux/device.h> > +#include <linux/slab.h> > +#include <linux/mm.h> > + > +static LIST_HEAD(ranges); > + > +struct kmap { > + struct list_head list; > + struct resource *res; > + struct device *dev; > + void *base; > +}; > + > +static void teardown_kmap(void *data) > +{ > + struct kmap *kmap = data; > + > + dev_dbg(kmap->dev, "kmap unregister %pr\n", kmap->res); > + list_del_rcu(&kmap->list); > + synchronize_rcu(); > + kfree(kmap); > +} > + > +int devm_register_kmap_pfn_range(struct device *dev, struct resource *res, > + void *base) > +{ > + struct kmap *kmap = kzalloc(sizeof(*kmap), GFP_KERNEL); > + int rc; > + > + if (!kmap) > + return -ENOMEM; > + > + INIT_LIST_HEAD(&kmap->list); > + kmap->res = res; > + kmap->base = base; > + kmap->dev = dev; > + rc = devm_add_action(dev, teardown_kmap, kmap); > + if (rc) { > + kfree(kmap); > + return rc; > + } > + dev_dbg(kmap->dev, "kmap register %pr\n", kmap->res); > + list_add_rcu(&kmap->list, &ranges); > + return 0; > +} > +EXPORT_SYMBOL_GPL(devm_register_kmap_pfn_range); > + > +void *kmap_atomic_pfn_t(__pfn_t pfn) > +{ > + struct page *page = __pfn_t_to_page(pfn); > + resource_size_t addr; > + struct kmap *kmap; > + > + if (page) > + return kmap_atomic(page); > + addr = __pfn_t_to_phys(pfn); > + rcu_read_lock(); > + list_for_each_entry_rcu(kmap, &ranges, list) > + if (addr >= kmap->res->start && addr <= kmap->res->end) > + return kmap->base + addr - kmap->res->start; > + > + /* only unlock in the error case */ > + rcu_read_unlock(); > + return NULL; > +} > +EXPORT_SYMBOL(kmap_atomic_pfn_t); > + > +void kunmap_atomic_pfn_t(void *addr) > +{ > + rcu_read_unlock(); > + > + /* > + * If the original __pfn_t had an entry in the memmap then > + * 'addr' will be outside of vmalloc space i.e. it came from > + * page_address() > + */ > + if (!is_vmalloc_addr(addr)) > + kunmap_atomic(addr); rcu_read_unlock() should move here. -- To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html