On Thu, 2016-09-22 at 17:28 +0200, Vlastimil Babka wrote: > The select(2) syscall performs a kmalloc(size, GFP_KERNEL) where size grows > with the number of fds passed. We had a customer report page allocation > failures of order-4 for this allocation. This is a costly order, so it might > easily fail, as the VM expects such allocation to have a lower-order fallback. > > Such trivial fallback is vmalloc(), as the memory doesn't have to be > physically contiguous. Also the allocation is temporary for the duration of the > syscall, so it's unlikely to stress vmalloc too much. > > Note that the poll(2) syscall seems to use a linked list of order-0 pages, so > it doesn't need this kind of fallback. > > Signed-off-by: Vlastimil Babka <vbabka@xxxxxxx> > --- > fs/select.c | 15 +++++++++++---- > 1 file changed, 11 insertions(+), 4 deletions(-) > > diff --git a/fs/select.c b/fs/select.c > index 8ed9da50896a..8fe5bddbe99b 100644 > --- a/fs/select.c > +++ b/fs/select.c > @@ -29,6 +29,7 @@ > #include <linux/sched/rt.h> > #include <linux/freezer.h> > #include <net/busy_poll.h> > +#include <linux/vmalloc.h> > > #include <asm/uaccess.h> > > @@ -558,6 +559,7 @@ int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp, > struct fdtable *fdt; > /* Allocate small arguments on the stack to save memory and be faster */ > long stack_fds[SELECT_STACK_ALLOC/sizeof(long)]; > + unsigned long alloc_size; > > ret = -EINVAL; > if (n < 0) > @@ -580,10 +582,15 @@ int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp, > bits = stack_fds; > if (size > sizeof(stack_fds) / 6) { > /* Not enough space in on-stack array; must use kmalloc */ > + alloc_size = 6 * size; > ret = -ENOMEM; > - bits = kmalloc(6 * size, GFP_KERNEL); > - if (!bits) > - goto out_nofds; > + bits = kmalloc(alloc_size, GFP_KERNEL|__GFP_NOWARN); > + if (!bits && alloc_size > PAGE_SIZE) { > + bits = vmalloc(alloc_size); > + > + if (!bits) > + goto out_nofds; Test should happen if alloc_size <= PAGE_SIZE > + } if (!bits && alloc_size > PAGE_SIZE) bits = vmalloc(alloc_size); if (!bits) goto out_nofds; > } > fds.in = bits; > fds.out = bits + size; > @@ -618,7 +625,7 @@ int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp, > > out: > if (bits != stack_fds) > - kfree(bits); > + kvfree(bits); > out_nofds: > return ret; > } -- To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html