On Tue, Oct 28, 2014 at 07:38:30PM +0000, Eddie Chapman wrote: > On 12/10/14 10:30, Michael S. Tsirkin wrote: > > On Thu, Oct 09, 2014 at 08:41:23AM +0400, Dmitry Petuhov wrote: > >> From: Michael S. Tsirkin <mst@xxxxxxxxxx> > >> > >> upstream commit 23cc5a991c7a9fb7e6d6550e65cee4f4173111c5 > >> > >> Michael Mueller provided a patch to reduce the size of > >> vhost-net structure as some allocations could fail under > >> memory pressure/fragmentation. We are still left with > >> high order allocations though. > >> > >> This patch is handling the problem at the core level, allowing > >> vhost structures to use vmalloc() if kmalloc() failed. > >> > >> As vmalloc() adds overhead on a critical network path, add __GFP_REPEAT > >> to kzalloc() flags to do this fallback only when really needed. > >> > >> People are still looking at cleaner ways to handle the problem > >> at the API level, probably passing in multiple iovecs. > >> This hack seems consistent with approaches > >> taken since then by drivers/vhost/scsi.c and net/core/dev.c > >> > >> Based on patch by Romain Francoise. > >> > >> Cc: Michael Mueller <mimu@xxxxxxxxxxxxxxxxxx> > >> Signed-off-by: Romain Francoise <romain@xxxxxxxxxxxxx> > >> Acked-by: Michael S. Tsirkin <mst@xxxxxxxxxx> > >> [mityapetuhov: backport to v3.10: vhost_net_free() in one more place] > >> Signed-off-by: Dmitry Petuhov <mityapetuhov@xxxxxxxxx> > > > > Sounds reasonable. > > > > Acked-by: Michael S. Tsirkin <mst@xxxxxxxxxx> > > > >> --- > >> diff -uprN a/drivers/vhost/net.c b/drivers/vhost/net.c > >> --- a/drivers/vhost/net.c 2014-10-09 06:45:08.336283258 +0400 > >> +++ b/drivers/vhost/net.c 2014-10-09 06:51:21.796266607 +0400 > >> @@ -18,6 +18,7 @@ > >> #include <linux/rcupdate.h> > >> #include <linux/file.h> > >> #include <linux/slab.h> > >> +#include <linux/vmalloc.h> > >> > >> #include <linux/net.h> > >> #include <linux/if_packet.h> > >> @@ -707,18 +708,30 @@ static void handle_rx_net(struct vhost_w > >> handle_rx(net); > >> } > >> > >> +static void vhost_net_free(void *addr) > >> +{ > >> + if (is_vmalloc_addr(addr)) > >> + vfree(addr); > >> + else > >> + kfree(addr); > >> +} > >> + > >> static int vhost_net_open(struct inode *inode, struct file *f) > >> { > >> - struct vhost_net *n = kmalloc(sizeof *n, GFP_KERNEL); > >> + struct vhost_net *n; > >> struct vhost_dev *dev; > >> struct vhost_virtqueue **vqs; > >> int r, i; > >> > >> - if (!n) > >> - return -ENOMEM; > >> + n = kmalloc(sizeof *n, GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT); > >> + if (!n) { > >> + n = vmalloc(sizeof *n); > >> + if (!n) > >> + return -ENOMEM; > >> + } > >> vqs = kmalloc(VHOST_NET_VQ_MAX * sizeof(*vqs), GFP_KERNEL); > >> if (!vqs) { > >> - kfree(n); > >> + vhost_net_free(n); > >> return -ENOMEM; > >> } > >> > >> @@ -737,7 +750,7 @@ static int vhost_net_open(struct inode * > >> } > >> r = vhost_dev_init(dev, vqs, VHOST_NET_VQ_MAX); > >> if (r < 0) { > >> - kfree(n); > >> + vhost_net_free(n); > >> kfree(vqs); > >> return r; > >> } > >> @@ -840,7 +853,7 @@ static int vhost_net_release(struct inod > >> * since jobs can re-queue themselves. */ > >> vhost_net_flush(n); > >> kfree(n->dev.vqs); > >> - kfree(n); > >> + vhost_net_free(n); > >> return 0; > >> } > >> > > > > Hi Michael, Dmitry, > > I needed to apply this to a server using vanilla 3.14, so I reformatted > Dmitry's 3.10 patch to make it apply cleanly to 3.14.23-rc1. It built > fine without warnings, and the kernel has been running now all day with > the patch applied without any problems, no errors in dmesg, module > loaded fine, and qemu VMs running without any network problems, with > vhost-net enabled. > > Since I don't really know what I'm doing (I just fixed up white space to > make it apply, and removed the extra vhost_net_free() added by Dmitry > for the 3.10 version as there was no such bit of code in 3.14) I just > wanted to paste what I came up with below just to ask for your eyeballs. > If you think I haven't missed anything obvious that is needed extra in > 3.14, then I'll send it as a properly formatted patch for 3.14 according > to submission rules, since it appears to be working (unless Dmitry > prefers to submit it). > > thanks, > Eddie Looks reasonable. Acked-by: Michael S. Tsirkin <mst@xxxxxxxxxx> > --- a/drivers/vhost/net.c 2014-03-31 04:40:15.000000000 +0100 > +++ b/drivers/vhost/net.c 2014-10-28 19:05:21.242141453 +0000 > @@ -17,6 +17,7 @@ > #include <linux/workqueue.h> > #include <linux/file.h> > #include <linux/slab.h> > +#include <linux/vmalloc.h> > > #include <linux/net.h> > #include <linux/if_packet.h> > @@ -699,18 +700,31 @@ > handle_rx(net); > } > > +static void vhost_net_free(void *addr) > +{ > + if (is_vmalloc_addr(addr)) > + vfree(addr); > + else > + kfree(addr); > +} > + > static int vhost_net_open(struct inode *inode, struct file *f) > { > - struct vhost_net *n = kmalloc(sizeof *n, GFP_KERNEL); > + struct vhost_net *n; > struct vhost_dev *dev; > struct vhost_virtqueue **vqs; > int i; > > - if (!n) > - return -ENOMEM; > + n = kmalloc(sizeof *n, GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT); > + if (!n) { > + n = vmalloc(sizeof *n); > + if (!n) > + return -ENOMEM; > + } > + > vqs = kmalloc(VHOST_NET_VQ_MAX * sizeof(*vqs), GFP_KERNEL); > if (!vqs) { > - kfree(n); > + vhost_net_free(n); > return -ENOMEM; > } > > @@ -827,7 +841,7 @@ > * since jobs can re-queue themselves. */ > vhost_net_flush(n); > kfree(n->dev.vqs); > - kfree(n); > + vhost_net_free(n); > return 0; > } -- To unsubscribe from this list: send the line "unsubscribe kvm" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html