----- On Nov 6, 2017, at 9:07 PM, Boqun Feng boqun.feng@xxxxxxxxx wrote: > On Mon, Nov 06, 2017 at 03:56:38PM -0500, Mathieu Desnoyers wrote: > [...] >> +static int cpu_op_pin_pages(unsigned long addr, unsigned long len, >> + struct page ***pinned_pages_ptr, size_t *nr_pinned, >> + int write) >> +{ >> + struct page *pages[2]; >> + int ret, nr_pages; >> + >> + if (!len) >> + return 0; >> + nr_pages = cpu_op_range_nr_pages(addr, len); >> + BUG_ON(nr_pages > 2); >> + if (*nr_pinned + nr_pages > NR_PINNED_PAGES_ON_STACK) { > > Is this a bug? Seems you will kzalloc() every time if *nr_pinned is > bigger than NR_PINNED_PAGES_ON_STACK, which will result in memory > leaking. > > I think the logic here is complex enough for us to introduce a > structure, like: > > struct cpu_opv_page_pinner { > int nr_pinned; > bool is_kmalloc; > struct page **pinned_pages; > }; > > Thoughts? Good catch ! How about the attached diff ? I'll fold it into the rseq/dev tree. Thanks, Mathieu -- Mathieu Desnoyers EfficiOS Inc. http://www.efficios.com
diff --git a/kernel/cpu_opv.c b/kernel/cpu_opv.c index 09754bbe6a4f..3d8fd66416a0 100644 --- a/kernel/cpu_opv.c +++ b/kernel/cpu_opv.c @@ -46,6 +46,12 @@ union op_fn_data { #endif }; +struct cpu_opv_pinned_pages { + struct page **pages; + size_t nr; + bool is_kmalloc; +}; + typedef int (*op_fn_t)(union op_fn_data *data, uint64_t v, uint32_t len); static DEFINE_MUTEX(cpu_opv_offline_lock); @@ -217,8 +223,7 @@ static int cpu_op_check_pages(struct page **pages, } static int cpu_op_pin_pages(unsigned long addr, unsigned long len, - struct page ***pinned_pages_ptr, size_t *nr_pinned, - int write) + struct cpu_opv_pinned_pages *pin_pages, int write) { struct page *pages[2]; int ret, nr_pages; @@ -227,15 +232,17 @@ static int cpu_op_pin_pages(unsigned long addr, unsigned long len, return 0; nr_pages = cpu_op_range_nr_pages(addr, len); BUG_ON(nr_pages > 2); - if (*nr_pinned + nr_pages > NR_PINNED_PAGES_ON_STACK) { + if (!pin_pages->is_kmalloc && pin_pages->nr + nr_pages + > NR_PINNED_PAGES_ON_STACK) { struct page **pinned_pages = kzalloc(CPU_OP_VEC_LEN_MAX * CPU_OP_MAX_PAGES * sizeof(struct page *), GFP_KERNEL); if (!pinned_pages) return -ENOMEM; - memcpy(pinned_pages, *pinned_pages_ptr, - *nr_pinned * sizeof(struct page *)); - *pinned_pages_ptr = pinned_pages; + memcpy(pinned_pages, pin_pages->pages, + pin_pages->nr * sizeof(struct page *)); + pin_pages->pages = pinned_pages; + pin_pages->is_kmalloc = true; } again: ret = get_user_pages_fast(addr, nr_pages, write, pages); @@ -257,9 +264,9 @@ static int cpu_op_pin_pages(unsigned long addr, unsigned long len, } if (ret) goto error; - (*pinned_pages_ptr)[(*nr_pinned)++] = pages[0]; + pin_pages->pages[pin_pages->nr++] = pages[0]; if (nr_pages > 1) - (*pinned_pages_ptr)[(*nr_pinned)++] = pages[1]; + pin_pages->pages[pin_pages->nr++] = pages[1]; return 0; error: @@ -270,7 +277,7 @@ static int cpu_op_pin_pages(unsigned long addr, unsigned long len, } static int cpu_opv_pin_pages(struct cpu_op *cpuop, int cpuopcnt, - struct page ***pinned_pages_ptr, size_t *nr_pinned) + struct cpu_opv_pinned_pages *pin_pages) { int ret, i; bool expect_fault = false; @@ -289,7 +296,7 @@ static int cpu_opv_pin_pages(struct cpu_op *cpuop, int cpuopcnt, goto error; ret = cpu_op_pin_pages( (unsigned long)op->u.compare_op.a, - op->len, pinned_pages_ptr, nr_pinned, 0); + op->len, pin_pages, 0); if (ret) goto error; ret = -EFAULT; @@ -299,7 +306,7 @@ static int cpu_opv_pin_pages(struct cpu_op *cpuop, int cpuopcnt, goto error; ret = cpu_op_pin_pages( (unsigned long)op->u.compare_op.b, - op->len, pinned_pages_ptr, nr_pinned, 0); + op->len, pin_pages, 0); if (ret) goto error; break; @@ -311,7 +318,7 @@ static int cpu_opv_pin_pages(struct cpu_op *cpuop, int cpuopcnt, goto error; ret = cpu_op_pin_pages( (unsigned long)op->u.memcpy_op.dst, - op->len, pinned_pages_ptr, nr_pinned, 1); + op->len, pin_pages, 1); if (ret) goto error; ret = -EFAULT; @@ -321,7 +328,7 @@ static int cpu_opv_pin_pages(struct cpu_op *cpuop, int cpuopcnt, goto error; ret = cpu_op_pin_pages( (unsigned long)op->u.memcpy_op.src, - op->len, pinned_pages_ptr, nr_pinned, 0); + op->len, pin_pages, 0); if (ret) goto error; break; @@ -333,7 +340,7 @@ static int cpu_opv_pin_pages(struct cpu_op *cpuop, int cpuopcnt, goto error; ret = cpu_op_pin_pages( (unsigned long)op->u.arithmetic_op.p, - op->len, pinned_pages_ptr, nr_pinned, 1); + op->len, pin_pages, 1); if (ret) goto error; break; @@ -347,7 +354,7 @@ static int cpu_opv_pin_pages(struct cpu_op *cpuop, int cpuopcnt, goto error; ret = cpu_op_pin_pages( (unsigned long)op->u.bitwise_op.p, - op->len, pinned_pages_ptr, nr_pinned, 1); + op->len, pin_pages, 1); if (ret) goto error; break; @@ -360,7 +367,7 @@ static int cpu_opv_pin_pages(struct cpu_op *cpuop, int cpuopcnt, goto error; ret = cpu_op_pin_pages( (unsigned long)op->u.shift_op.p, - op->len, pinned_pages_ptr, nr_pinned, 1); + op->len, pin_pages, 1); if (ret) goto error; break; @@ -373,9 +380,9 @@ static int cpu_opv_pin_pages(struct cpu_op *cpuop, int cpuopcnt, return 0; error: - for (i = 0; i < *nr_pinned; i++) - put_page((*pinned_pages_ptr)[i]); - *nr_pinned = 0; + for (i = 0; i < pin_pages->nr; i++) + put_page(pin_pages->pages[i]); + pin_pages->nr = 0; /* * If faulting access is expected, return EAGAIN to user-space. * It allows user-space to distinguish between a fault caused by @@ -923,9 +930,12 @@ SYSCALL_DEFINE4(cpu_opv, struct cpu_op __user *, ucpuopv, int, cpuopcnt, { struct cpu_op cpuopv[CPU_OP_VEC_LEN_MAX]; struct page *pinned_pages_on_stack[NR_PINNED_PAGES_ON_STACK]; - struct page **pinned_pages = pinned_pages_on_stack; + struct cpu_opv_pinned_pages pin_pages = { + .pages = pinned_pages_on_stack, + .nr = 0, + .is_kmalloc = false, + }; int ret, i; - size_t nr_pinned = 0; if (unlikely(flags)) return -EINVAL; @@ -938,15 +948,14 @@ SYSCALL_DEFINE4(cpu_opv, struct cpu_op __user *, ucpuopv, int, cpuopcnt, ret = cpu_opv_check(cpuopv, cpuopcnt); if (ret) return ret; - ret = cpu_opv_pin_pages(cpuopv, cpuopcnt, - &pinned_pages, &nr_pinned); + ret = cpu_opv_pin_pages(cpuopv, cpuopcnt, &pin_pages); if (ret) goto end; ret = do_cpu_opv(cpuopv, cpuopcnt, cpu); - for (i = 0; i < nr_pinned; i++) - put_page(pinned_pages[i]); + for (i = 0; i < pin_pages.nr; i++) + put_page(pin_pages.pages[i]); end: - if (pinned_pages != pinned_pages_on_stack) - kfree(pinned_pages); + if (pin_pages.is_kmalloc) + kfree(pin_pages.pages); return ret; }