To test this functionality, a debugfs interface is added: /sys/kernel/debug/x86/split_mapping There are three test modes. mode 0: allocate $page_nr pages and set each page's protection first to RO and X and then back to RW and NX. This is used to test multiple CPUs dealing with different address ranges. mode 1: allocate several pages and create $nr_cpu kthreads to simultaneously change those pages protection with a fixed pattern. This is used to test multiple CPUs dealing with the same address range. mode 2: same as mode 0 except using alloc_pages() instead of vmalloc() because vmalloc space is too small on x86_32/pae. On a x86_64 VM, I started mode0.sh and mode1.sh at the same time: mode0.sh: mode=0 page_nr=200000 nr_cpu=16 function test_one() { echo $mode $page_nr > /sys/kernel/debug/x86/split_mapping } while true; do for i in `seq $nr_cpu`; do test_one & done wait done mode1.sh: mode=1 page_nr=1 echo $mode $page_nr > /sys/kernel/debug/x86/split_mapping After 5 hours, no problem occured with some millions of splits and merges. For x86_32 and x86_pae, mode2 test is used and also no problem found. Signed-off-by: Aaron Lu <aaron.lu@xxxxxxxxx> --- arch/x86/mm/pat/set_memory.c | 206 +++++++++++++++++++++++++++++++++++ 1 file changed, 206 insertions(+) diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c index 1be9aab42c79..4deea4de73e7 100644 --- a/arch/x86/mm/pat/set_memory.c +++ b/arch/x86/mm/pat/set_memory.c @@ -20,6 +20,9 @@ #include <linux/kernel.h> #include <linux/cc_platform.h> #include <linux/set_memory.h> +#include <linux/kthread.h> +#include <linux/delay.h> +#include <linux/random.h> #include <asm/e820/api.h> #include <asm/processor.h> @@ -2556,6 +2559,209 @@ int __init kernel_unmap_pages_in_pgd(pgd_t *pgd, unsigned long address, return retval; } +static int split_mapping_mode0_test(int page_nr) +{ + void **addr_buff; + void *addr; + int i, j; + + addr_buff = kvmalloc(sizeof(void *) * page_nr, GFP_KERNEL); + if (!addr_buff) { + pr_err("addr_buff: no memory\n"); + return -ENOMEM; + } + + for (i = 0; i < page_nr; i++) { + addr = vmalloc(PAGE_SIZE); + if (!addr) { + pr_err("no memory\n"); + break; + } + + set_memory_ro((unsigned long)addr, 1); + set_memory_x((unsigned long)addr, 1); + + addr_buff[i] = addr; + } + + for (j = 0; j < i; j++) { + set_memory_nx((unsigned long)addr_buff[j], 1); + set_memory_rw((unsigned long)addr_buff[j], 1); + vfree(addr_buff[j]); + } + + kvfree(addr_buff); + + return 0; +} + +struct split_mapping_mode1_data { + unsigned long addr; + int page_nr; +}; + +static int split_mapping_set_prot(void *data) +{ + struct split_mapping_mode1_data *d = data; + unsigned long addr = d->addr; + int page_nr = d->page_nr; + int m; + + m = get_random_int() % 100; + msleep(m); + + while (!kthread_should_stop()) { + set_memory_ro(addr, page_nr); + set_memory_x(addr, page_nr); + set_memory_rw(addr, page_nr); + set_memory_nx(addr, page_nr); + cond_resched(); + } + + return 0; +} + +static int split_mapping_mode1_test(int page_nr) +{ + int nr_kthreads = num_online_cpus(); + struct split_mapping_mode1_data d; + struct task_struct **kthreads; + int i, j, ret; + void *addr; + + addr = vmalloc(PAGE_SIZE * page_nr); + if (!addr) + return -ENOMEM; + + kthreads = kmalloc(nr_kthreads * sizeof(struct task_struct *), GFP_KERNEL); + if (!kthreads) { + vfree(addr); + return -ENOMEM; + } + + d.addr = (unsigned long)addr; + d.page_nr = page_nr; + for (i = 0; i < nr_kthreads; i++) { + kthreads[i] = kthread_run(split_mapping_set_prot, &d, "split_mappingd%d", i); + if (IS_ERR(kthreads[i])) { + for (j = 0; j < i; j++) + kthread_stop(kthreads[j]); + ret = PTR_ERR(kthreads[i]); + goto out; + } + } + + while (1) { + if (signal_pending(current)) { + for (i = 0; i < nr_kthreads; i++) + kthread_stop(kthreads[i]); + ret = 0; + break; + } + msleep(1000); + } + +out: + kfree(kthreads); + vfree(addr); + return ret; +} + +static int split_mapping_mode2_test(int page_nr) +{ + struct page *p, *t; + unsigned long addr; + int i; + + LIST_HEAD(head); + + for (i = 0; i < page_nr; i++) { + p = alloc_pages(GFP_KERNEL | GFP_DMA32, 0); + if (!p) { + pr_err("no memory\n"); + break; + } + + addr = (unsigned long)page_address(p); + BUG_ON(!addr); + + set_memory_ro(addr, 1); + set_memory_x(addr, 1); + + list_add(&p->lru, &head); + } + + list_for_each_entry_safe(p, t, &head, lru) { + addr = (unsigned long)page_address(p); + set_memory_nx(addr, 1); + set_memory_rw(addr, 1); + + list_del(&p->lru); + __free_page(p); + } + + return 0; +} +static ssize_t split_mapping_write_file(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) +{ + unsigned int mode = 0, page_nr = 0; + char buffer[64]; + int ret; + + if (count > 64) + return -EINVAL; + + if (copy_from_user(buffer, buf, count)) + return -EFAULT; + sscanf(buffer, "%u %u", &mode, &page_nr); + + /* + * There are 3 test modes. + * mode 0: each thread allocates $page_nr pages and set each page's + * protection first to RO and X and then back to RW and NX. + * This is used to test multiple CPUs dealing with different + * pages. + * mode 1: allocate several pages and create $nr_cpu kthreads to + * simultaneously change those pages protection to a fixed + * pattern. This is used to test multiple CPUs dealing with + * some same page's protection. + * mode 2: like mode 0 but directly use alloc_pages() because vmalloc + * area on x86_32 is too small, only 128M. + */ + if (mode > 2) + return -EINVAL; + + if (page_nr == 0) + return -EINVAL; + + if (mode == 0) + ret = split_mapping_mode0_test(page_nr); + else if (mode == 1) + ret = split_mapping_mode1_test(page_nr); + else + ret = split_mapping_mode2_test(page_nr); + + return ret ? ret : count; +} + +static const struct file_operations split_mapping_fops = { + .write = split_mapping_write_file, +}; + +static int __init split_mapping_init(void) +{ + struct dentry *d = debugfs_create_file("split_mapping", S_IWUSR, arch_debugfs_dir, NULL, + &split_mapping_fops); + if (IS_ERR(d)) { + pr_err("create split_mapping failed: %ld\n", PTR_ERR(d)); + return PTR_ERR(d); + } + + return 0; +} +late_initcall(split_mapping_init); + /* * The testcases use internal knowledge of the implementation that shouldn't * be exposed to the rest of the kernel. Include these directly here. -- 2.37.1