Hello, On Fri, Jun 17, 2022 at 10:57 PM Huacai Chen <chenhuacai@xxxxxxxxxxx> wrote: > > We test 20 million times of getcpu(), the real syscall version take 25 > seconds, while the vsyscall version take only 2.4 seconds. > > Signed-off-by: Huacai Chen <chenhuacai@xxxxxxxxxxx> > --- > arch/loongarch/include/asm/vdso.h | 4 +++ > arch/loongarch/include/asm/vdso/vdso.h | 10 +++++- > arch/loongarch/kernel/vdso.c | 23 +++++++++----- > arch/loongarch/vdso/Makefile | 3 +- > arch/loongarch/vdso/vdso.lds.S | 1 + > arch/loongarch/vdso/vgetcpu.c | 43 ++++++++++++++++++++++++++ > 6 files changed, 74 insertions(+), 10 deletions(-) > create mode 100644 arch/loongarch/vdso/vgetcpu.c > > diff --git a/arch/loongarch/include/asm/vdso.h b/arch/loongarch/include/asm/vdso.h > index 8f8a0f9a4953..e76d5e37480d 100644 > --- a/arch/loongarch/include/asm/vdso.h > +++ b/arch/loongarch/include/asm/vdso.h > @@ -12,6 +12,10 @@ > > #include <asm/barrier.h> > > +typedef struct vdso_pcpu_data { > + u32 node; > +} ____cacheline_aligned_in_smp vdso_pcpu_data; > + > /* > * struct loongarch_vdso_info - Details of a VDSO image. > * @vdso: Pointer to VDSO image (page-aligned). > diff --git a/arch/loongarch/include/asm/vdso/vdso.h b/arch/loongarch/include/asm/vdso/vdso.h > index 5a01643a65b3..94055f7c54b7 100644 > --- a/arch/loongarch/include/asm/vdso/vdso.h > +++ b/arch/loongarch/include/asm/vdso/vdso.h > @@ -8,6 +8,13 @@ > > #include <asm/asm.h> > #include <asm/page.h> > +#include <asm/vdso.h> > + > +#if PAGE_SIZE < SZ_16K > +#define VDSO_DATA_SIZE SZ_16K Whether we add members to the vdso data structure or extend SMP_CACHE_BYTES/NR_CPUS, the static VDSO_DATA_SIZE may not match, and there is no assertion checking to help us catch bugs early. So I suggest defining VDSO_DATA_SIZE as ALIGN_UP(sizeof (struct vdso_data), PAGE_SIZE). hev > +#else > +#define VDSO_DATA_SIZE PAGE_SIZE > +#endif > > static inline unsigned long get_vdso_base(void) > { > @@ -24,7 +31,8 @@ static inline unsigned long get_vdso_base(void) > > static inline const struct vdso_data *get_vdso_data(void) > { > - return (const struct vdso_data *)(get_vdso_base() - PAGE_SIZE); > + return (const struct vdso_data *)(get_vdso_base() > + - VDSO_DATA_SIZE + SMP_CACHE_BYTES * NR_CPUS); > } > > #endif /* __ASSEMBLY__ */ > diff --git a/arch/loongarch/kernel/vdso.c b/arch/loongarch/kernel/vdso.c > index e20c8ca87473..6ce322a1bf8b 100644 > --- a/arch/loongarch/kernel/vdso.c > +++ b/arch/loongarch/kernel/vdso.c > @@ -26,11 +26,15 @@ extern char vdso_start[], vdso_end[]; > > /* Kernel-provided data used by the VDSO. */ > static union loongarch_vdso_data { > - u8 page[PAGE_SIZE]; > - struct vdso_data data[CS_BASES]; > + u8 page[VDSO_DATA_SIZE]; > + struct { > + vdso_pcpu_data pdata[NR_CPUS]; > + struct vdso_data data[CS_BASES]; > + }; > } loongarch_vdso_data __page_aligned_data; > -struct vdso_data *vdso_data = loongarch_vdso_data.data; > + > static struct page *vdso_pages[] = { NULL }; > +struct vdso_data *vdso_data = loongarch_vdso_data.data; > > static int vdso_mremap(const struct vm_special_mapping *sm, struct vm_area_struct *new_vma) > { > @@ -55,11 +59,14 @@ struct loongarch_vdso_info vdso_info = { > > static int __init init_vdso(void) > { > - unsigned long i, pfn; > + unsigned long i, cpu, pfn; > > BUG_ON(!PAGE_ALIGNED(vdso_info.vdso)); > BUG_ON(!PAGE_ALIGNED(vdso_info.size)); > > + for_each_possible_cpu(cpu) > + loongarch_vdso_data.pdata[cpu].node = cpu_to_node(cpu); > + > pfn = __phys_to_pfn(__pa_symbol(vdso_info.vdso)); > for (i = 0; i < vdso_info.size / PAGE_SIZE; i++) > vdso_info.code_mapping.pages[i] = pfn_to_page(pfn + i); > @@ -93,9 +100,9 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) > > /* > * Determine total area size. This includes the VDSO data itself > - * and the data page. > + * and the data pages. > */ > - vvar_size = PAGE_SIZE; > + vvar_size = VDSO_DATA_SIZE; > size = vvar_size + info->size; > > data_addr = get_unmapped_area(NULL, vdso_base(), size, 0, 0); > @@ -115,8 +122,8 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) > > /* Map VDSO data page. */ > ret = remap_pfn_range(vma, data_addr, > - virt_to_phys(vdso_data) >> PAGE_SHIFT, > - PAGE_SIZE, PAGE_READONLY); > + virt_to_phys(&loongarch_vdso_data) >> PAGE_SHIFT, > + vvar_size, PAGE_READONLY); > if (ret) > goto out; > > diff --git a/arch/loongarch/vdso/Makefile b/arch/loongarch/vdso/Makefile > index 6b6e16732c60..d89e2ac75f7b 100644 > --- a/arch/loongarch/vdso/Makefile > +++ b/arch/loongarch/vdso/Makefile > @@ -6,7 +6,7 @@ > ARCH_REL_TYPE_ABS := R_LARCH_32|R_LARCH_64|R_LARCH_MARK_LA|R_LARCH_JUMP_SLOT > include $(srctree)/lib/vdso/Makefile > > -obj-vdso-y := elf.o vgettimeofday.o sigreturn.o > +obj-vdso-y := elf.o vgetcpu.o vgettimeofday.o sigreturn.o > > # Common compiler flags between ABIs. > ccflags-vdso := \ > @@ -21,6 +21,7 @@ ccflags-vdso += $(filter --target=%,$(KBUILD_CFLAGS)) > endif > > cflags-vdso := $(ccflags-vdso) \ > + -isystem $(shell $(CC) -print-file-name=include) \ > $(filter -W%,$(filter-out -Wa$(comma)%,$(KBUILD_CFLAGS))) \ > -O2 -g -fno-strict-aliasing -fno-common -fno-builtin -G0 \ > -fno-stack-protector -fno-jump-tables -DDISABLE_BRANCH_PROFILING \ > diff --git a/arch/loongarch/vdso/vdso.lds.S b/arch/loongarch/vdso/vdso.lds.S > index 955f02de4a2d..56ad855896de 100644 > --- a/arch/loongarch/vdso/vdso.lds.S > +++ b/arch/loongarch/vdso/vdso.lds.S > @@ -58,6 +58,7 @@ VERSION > { > LINUX_5.10 { > global: > + __vdso_getcpu; > __vdso_clock_getres; > __vdso_clock_gettime; > __vdso_gettimeofday; > diff --git a/arch/loongarch/vdso/vgetcpu.c b/arch/loongarch/vdso/vgetcpu.c > new file mode 100644 > index 000000000000..23fe2362f4e0 > --- /dev/null > +++ b/arch/loongarch/vdso/vgetcpu.c > @@ -0,0 +1,43 @@ > +// SPDX-License-Identifier: GPL-2.0-only > +/* > + * Fast user context implementation of getcpu() > + */ > + > +#include <asm/vdso.h> > +#include <linux/getcpu.h> > + > +static __always_inline int read_cpu_id(void) > +{ > + int cpu_id; > + > + __asm__ __volatile__( > + " rdtime.d $zero, %0\n" > + : "=r" (cpu_id) > + : > + : "memory"); > + > + return cpu_id; > +} > + > +static __always_inline const vdso_pcpu_data *get_pcpu_data(void) > +{ > + return (vdso_pcpu_data *)(get_vdso_base() - VDSO_DATA_SIZE); > +} > + > +int __vdso_getcpu(unsigned int *cpu, unsigned int *node, struct getcpu_cache *unused) > +{ > + int cpu_id; > + const vdso_pcpu_data *data; > + > + cpu_id = read_cpu_id(); > + > + if (cpu) > + *cpu = cpu_id; > + > + if (node) { > + data = get_pcpu_data(); > + *node = data[cpu_id].node; > + } > + > + return 0; > +} > -- > 2.27.0 > >