On Mon, Mar 5, 2012 at 4:49 PM, Santosh Jodh <Santosh.Jodh@xxxxxxxxxx> wrote: > From: Santosh Jodh <santosh.jodh@xxxxxxxxxx> > > Add support for multi page ring for block devices. > The number of pages is configurable for blkback via module parameter. > blkback reports max-ring-page-order to blkfront via xenstore. > blkfront reports its supported ring-page-order to blkback via xenstore. > blkfront reports multi page ring references via ring-refNN in xenstore. > The change allows newer blkfront to work with older blkback and > vice-versa. > Based on original patch by Paul Durrant. you should include his SoB in this patch. The patch overall looks Ok, thought I do have some comments: -> the call to "xenbus_ring_ops_init();" looks like a bug-fix? If so, it should be a separate patch. -> the usage of XenbusStateInitWait? Why do we introduce that? Looks like a fix to something. -> XENBUS_MAX_RING_PAGES - why 2? Why not 4? What is the optimal default size for SSD usage? 16? -> don't do sprintf, use snprinf -> don't use printk(KERN_..), use pr_info or the variant of pr_err,pr_debug, etc. -> don't split the printk contents. It is Ok for them to be more than 80 lines. -> check that xen_blkif_ring_order is under XENBUS_MAX_RING_PAGES. Otherwise a joker could do = 9999999999999999999 for ring size and we would try to use that. -> Separate the patch that introduces the changes to the XenBus infrastructure (and then the changes to net* and blk*) to use the extra arguments would be folded in that patch. Then the patch that implements the multi ring to blkback is a patch that depends on that the XenBus modifications patch. Also make sure you CC David Miller and Jens Axboe on the XenBus patch as it modifies the net-* side which requires Ian's and David's Ack. -> Have you done a sanity/test check where the backend and frontend have different size rings? Just to make sure nothing explodes. > > Signed-off-by: Santosh Jodh <santosh.jodh@xxxxxxxxxx> > --- > diff --git a/drivers/block/xen-blkback/blkback.c b/drivers/block/xen-blkback/blkback.c > index 0088bf6..72f2e18 100644 > --- a/drivers/block/xen-blkback/blkback.c > +++ b/drivers/block/xen-blkback/blkback.c > @@ -60,6 +60,39 @@ static int xen_blkif_reqs = 64; > module_param_named(reqs, xen_blkif_reqs, int, 0); > MODULE_PARM_DESC(reqs, "Number of blkback requests to allocate"); > > +/* Order of maximum shared ring size advertised to the front end. */ > +int xen_blkif_max_ring_order = XENBUS_MAX_RING_ORDER; > + > +#define BLK_RING_SIZE(_order) __CONST_RING_SIZE(blkif, PAGE_SIZE << (_order)) > + > +static int set_max_ring_order(const char *buf, struct kernel_param *kp) > +{ > + int err; > + unsigned long order; > + > + err = kstrtol(buf, 0, &order); > + if (err || > + order < 0 || > + order > XENBUS_MAX_RING_ORDER) > + return -EINVAL; > + > + if (xen_blkif_reqs < BLK_RING_SIZE(order)) > + printk(KERN_WARNING "WARNING: " > + "I/O request space (%d reqs) < ring order %ld, " > + "consider increasing %s.reqs to >= %ld.", > + xen_blkif_reqs, order, KBUILD_MODNAME, > + roundup_pow_of_two(BLK_RING_SIZE(order))); > + > + xen_blkif_max_ring_order = order; > + > + return 0; > +} > + > +module_param_call(max_ring_order, > + set_max_ring_order, param_get_int, > + &xen_blkif_max_ring_order, 0644); > +MODULE_PARM_DESC(max_ring_order, "log2 of maximum ring size, in pages."); > + > /* Run-time switchable: /sys/module/blkback/parameters/ */ > static unsigned int log_stats; > module_param(log_stats, int, 0644); > diff --git a/drivers/block/xen-blkback/common.h b/drivers/block/xen-blkback/common.h > index d0ee7ed..5f33a1a 100644 > --- a/drivers/block/xen-blkback/common.h > +++ b/drivers/block/xen-blkback/common.h > @@ -126,6 +126,8 @@ struct blkif_x86_64_response { > int16_t status; /* BLKIF_RSP_??? */ > }; > > +extern int xen_blkif_max_ring_order; > + > DEFINE_RING_TYPES(blkif_common, struct blkif_common_request, > struct blkif_common_response); > DEFINE_RING_TYPES(blkif_x86_32, struct blkif_x86_32_request, > diff --git a/drivers/block/xen-blkback/xenbus.c b/drivers/block/xen-blkback/xenbus.c > index 24a2fb5..7a9d71d 100644 > --- a/drivers/block/xen-blkback/xenbus.c > +++ b/drivers/block/xen-blkback/xenbus.c > @@ -122,8 +122,8 @@ static struct xen_blkif *xen_blkif_alloc(domid_t domid) > return blkif; > } > > -static int xen_blkif_map(struct xen_blkif *blkif, unsigned long shared_page, > - unsigned int evtchn) > +static int xen_blkif_map(struct xen_blkif *blkif, int ring_ref[], > + unsigned int ring_order, unsigned int evtchn) > { > int err; > > @@ -131,7 +131,8 @@ static int xen_blkif_map(struct xen_blkif *blkif, unsigned long shared_page, > if (blkif->irq) > return 0; > > - err = xenbus_map_ring_valloc(blkif->be->dev, shared_page, &blkif->blk_ring); > + err = xenbus_map_ring_valloc(blkif->be->dev, ring_ref, 1 << ring_order, > + &blkif->blk_ring); > if (err < 0) > return err; > > @@ -140,21 +141,24 @@ static int xen_blkif_map(struct xen_blkif *blkif, unsigned long shared_page, > { > struct blkif_sring *sring; > sring = (struct blkif_sring *)blkif->blk_ring; > - BACK_RING_INIT(&blkif->blk_rings.native, sring, PAGE_SIZE); > + BACK_RING_INIT(&blkif->blk_rings.native, sring, > + PAGE_SIZE << ring_order); > break; > } > case BLKIF_PROTOCOL_X86_32: > { > struct blkif_x86_32_sring *sring_x86_32; > sring_x86_32 = (struct blkif_x86_32_sring *)blkif->blk_ring; > - BACK_RING_INIT(&blkif->blk_rings.x86_32, sring_x86_32, PAGE_SIZE); > + BACK_RING_INIT(&blkif->blk_rings.x86_32, sring_x86_32, > + PAGE_SIZE << ring_order); > break; > } > case BLKIF_PROTOCOL_X86_64: > { > struct blkif_x86_64_sring *sring_x86_64; > sring_x86_64 = (struct blkif_x86_64_sring *)blkif->blk_ring; > - BACK_RING_INIT(&blkif->blk_rings.x86_64, sring_x86_64, PAGE_SIZE); > + BACK_RING_INIT(&blkif->blk_rings.x86_64, sring_x86_64, > + PAGE_SIZE << ring_order); > break; > } > default: > @@ -497,6 +501,11 @@ static int xen_blkbk_probe(struct xenbus_device *dev, > if (err) > goto fail; > > + err = xenbus_printf(XBT_NIL, dev->nodename, "max-ring-page-order", > + "%u", xen_blkif_max_ring_order); > + if (err) > + goto fail; > + > err = xenbus_switch_state(dev, XenbusStateInitWait); > if (err) > goto fail; > @@ -744,22 +753,80 @@ again: > static int connect_ring(struct backend_info *be) > { > struct xenbus_device *dev = be->dev; > - unsigned long ring_ref; > + int ring_ref[XENBUS_MAX_RING_PAGES]; > + unsigned int ring_order; > unsigned int evtchn; > char protocol[64] = ""; > int err; > > DPRINTK("%s", dev->otherend); > > - err = xenbus_gather(XBT_NIL, dev->otherend, "ring-ref", "%lu", > - &ring_ref, "event-channel", "%u", &evtchn, NULL); > - if (err) { > - xenbus_dev_fatal(dev, err, > - "reading %s/ring-ref and event-channel", > + err = xenbus_scanf(XBT_NIL, dev->otherend, "event-channel", "%u", > + &evtchn); > + if (err != 1) { > + err = -EINVAL; > + > + xenbus_dev_fatal(dev, err, "reading %s/event-channel", > dev->otherend); > return err; > } > > + printk(KERN_INFO "blkback: event-channel %u\n", evtchn); > + > + err = xenbus_scanf(XBT_NIL, dev->otherend, "ring-page-order", "%u", > + &ring_order); > + if (err != 1) { > + DPRINTK("%s: using single page handshake", dev->otherend); > + > + ring_order = 0; > + > + err = xenbus_scanf(XBT_NIL, dev->otherend, "ring-ref", > + "%d", &ring_ref[0]); > + if (err != 1) { > + err = -EINVAL; > + > + xenbus_dev_fatal(dev, err, "reading %s/ring-ref", > + dev->otherend); > + return err; > + } > + > + printk(KERN_INFO "blkback: ring-ref %d\n", ring_ref[0]); > + } else { > + unsigned int i; > + > + if (ring_order > xen_blkif_max_ring_order) { > + err = -EINVAL; > + > + xenbus_dev_fatal(dev, err, > + "%s/ring-page-order too big", > + dev->otherend); > + return err; > + } > + > + for (i = 0; i < (1u << ring_order); i++) { > + char ring_ref_name[10]; > + > + snprintf(ring_ref_name, sizeof(ring_ref_name), > + "ring-ref%u", i); > + > + err = xenbus_scanf(XBT_NIL, dev->otherend, > + ring_ref_name, "%d", > + &ring_ref[i]); > + if (err != 1) { > + err = -EINVAL; > + > + xenbus_dev_fatal(dev, err, > + "reading %s/%s", > + dev->otherend, > + ring_ref_name); > + return err; > + } > + > + printk(KERN_INFO "blkback: ring-ref%u %d\n", i, > + ring_ref[i]); > + } > + } > + > be->blkif->blk_protocol = BLKIF_PROTOCOL_NATIVE; > err = xenbus_gather(XBT_NIL, dev->otherend, "protocol", > "%63s", protocol, NULL); > @@ -775,14 +842,11 @@ static int connect_ring(struct backend_info *be) > xenbus_dev_fatal(dev, err, "unknown fe protocol %s", protocol); > return -1; > } > - pr_info(DRV_PFX "ring-ref %ld, event-channel %d, protocol %d (%s)\n", > - ring_ref, evtchn, be->blkif->blk_protocol, protocol); > > /* Map the shared frame, irq etc. */ > - err = xen_blkif_map(be->blkif, ring_ref, evtchn); > + err = xen_blkif_map(be->blkif, ring_ref, ring_order, evtchn); > if (err) { > - xenbus_dev_fatal(dev, err, "mapping ring-ref %lu port %u", > - ring_ref, evtchn); > + xenbus_dev_fatal(dev, err, "mapping ring-refs and evtchn"); > return err; > } > > diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c > index 2f22874..485813a 100644 > --- a/drivers/block/xen-blkfront.c > +++ b/drivers/block/xen-blkfront.c > @@ -57,6 +57,10 @@ > > #include <asm/xen/hypervisor.h> > > +static int xen_blkif_ring_order; > +module_param_named(reqs, xen_blkif_ring_order, int, 0); > +MODULE_PARM_DESC(reqs, "log2 of requested ring size, in pages."); > + > enum blkif_state { > BLKIF_STATE_DISCONNECTED, > BLKIF_STATE_CONNECTED, > @@ -72,7 +76,8 @@ struct blk_shadow { > static DEFINE_MUTEX(blkfront_mutex); > static const struct block_device_operations xlvbd_block_fops; > > -#define BLK_RING_SIZE __CONST_RING_SIZE(blkif, PAGE_SIZE) > +#define BLK_RING_SIZE(_order) __CONST_RING_SIZE(blkif, PAGE_SIZE << (_order)) > +#define BLK_MAX_RING_SIZE BLK_RING_SIZE(XENBUS_MAX_RING_ORDER) > > /* > * We have one of these per vbd, whether ide, scsi or 'other'. They > @@ -87,14 +92,15 @@ struct blkfront_info > int vdevice; > blkif_vdev_t handle; > enum blkif_state connected; > - int ring_ref; > + int ring_ref[XENBUS_MAX_RING_PAGES]; > + int ring_order; > struct blkif_front_ring ring; > struct scatterlist sg[BLKIF_MAX_SEGMENTS_PER_REQUEST]; > unsigned int evtchn, irq; > struct request_queue *rq; > struct work_struct work; > struct gnttab_free_callback callback; > - struct blk_shadow shadow[BLK_RING_SIZE]; > + struct blk_shadow shadow[BLK_MAX_RING_SIZE]; > unsigned long shadow_free; > unsigned int feature_flush; > unsigned int flush_op; > @@ -111,9 +117,7 @@ static unsigned int nr_minors; > static unsigned long *minors; > static DEFINE_SPINLOCK(minor_lock); > > -#define MAXIMUM_OUTSTANDING_BLOCK_REQS \ > - (BLKIF_MAX_SEGMENTS_PER_REQUEST * BLK_RING_SIZE) > -#define GRANT_INVALID_REF 0 > +#define GRANT_INVALID_REF 0 > > #define PARTS_PER_DISK 16 > #define PARTS_PER_EXT_DISK 256 > @@ -135,7 +139,7 @@ static DEFINE_SPINLOCK(minor_lock); > static int get_id_from_freelist(struct blkfront_info *info) > { > unsigned long free = info->shadow_free; > - BUG_ON(free >= BLK_RING_SIZE); > + BUG_ON(free >= BLK_MAX_RING_SIZE); > info->shadow_free = info->shadow[free].req.u.rw.id; > info->shadow[free].req.u.rw.id = 0x0fffffee; /* debug */ > return free; > @@ -683,6 +687,8 @@ static void blkif_restart_queue(struct work_struct *work) > > static void blkif_free(struct blkfront_info *info, int suspend) > { > + int i; > + > /* Prevent new requests being issued until we fix things up. */ > spin_lock_irq(&blkif_io_lock); > info->connected = suspend ? > @@ -698,16 +704,19 @@ static void blkif_free(struct blkfront_info *info, int suspend) > flush_work_sync(&info->work); > > /* Free resources associated with old device channel. */ > - if (info->ring_ref != GRANT_INVALID_REF) { > - gnttab_end_foreign_access(info->ring_ref, 0, > - (unsigned long)info->ring.sring); > - info->ring_ref = GRANT_INVALID_REF; > - info->ring.sring = NULL; > + for (i = 0; i < (1 << info->ring_order); i++) { > + if (info->ring_ref[i] != GRANT_INVALID_REF) { > + gnttab_end_foreign_access(info->ring_ref[i], 0, 0); > + info->ring_ref[i] = GRANT_INVALID_REF; > + } > } > + > + free_pages((unsigned long)info->ring.sring, info->ring_order); > + info->ring.sring = NULL; > + > if (info->irq) > unbind_from_irqhandler(info->irq, info); > info->evtchn = info->irq = 0; > - > } > > static void blkif_completion(struct blk_shadow *s) > @@ -828,25 +837,24 @@ static int setup_blkring(struct xenbus_device *dev, > struct blkif_sring *sring; > int err; > > - info->ring_ref = GRANT_INVALID_REF; > - > - sring = (struct blkif_sring *)__get_free_page(GFP_NOIO | __GFP_HIGH); > + sring = (struct blkif_sring *)__get_free_pages(GFP_NOIO | __GFP_HIGH, > + info->ring_order); > if (!sring) { > xenbus_dev_fatal(dev, -ENOMEM, "allocating shared ring"); > return -ENOMEM; > } > SHARED_RING_INIT(sring); > - FRONT_RING_INIT(&info->ring, sring, PAGE_SIZE); > + FRONT_RING_INIT(&info->ring, sring, PAGE_SIZE << info->ring_order); > > sg_init_table(info->sg, BLKIF_MAX_SEGMENTS_PER_REQUEST); > > - err = xenbus_grant_ring(dev, virt_to_mfn(info->ring.sring)); > + err = xenbus_grant_ring(dev, info->ring.sring, 1 << info->ring_order, > + info->ring_ref); > if (err < 0) { > - free_page((unsigned long)sring); > + free_pages((unsigned long)sring, info->ring_order); > info->ring.sring = NULL; > goto fail; > } > - info->ring_ref = err; > > err = xenbus_alloc_evtchn(dev, &info->evtchn); > if (err) > @@ -875,8 +883,27 @@ static int talk_to_blkback(struct xenbus_device *dev, > { > const char *message = NULL; > struct xenbus_transaction xbt; > + unsigned int ring_order; > + int legacy_backend; > + int i; > int err; > > + for (i = 0; i < (1 << info->ring_order); i++) > + info->ring_ref[i] = GRANT_INVALID_REF; > + > + err = xenbus_scanf(XBT_NIL, dev->otherend, "max-ring-page-order", "%u", > + &ring_order); > + > + legacy_backend = !(err == 1); > + > + if (legacy_backend) { > + info->ring_order = 0; > + } else { > + info->ring_order = (ring_order <= xen_blkif_ring_order) ? > + ring_order : > + xen_blkif_ring_order; > + } > + > /* Create shared ring, alloc event channel. */ > err = setup_blkring(dev, info); > if (err) > @@ -889,12 +916,35 @@ again: > goto destroy_blkring; > } > > - err = xenbus_printf(xbt, dev->nodename, > - "ring-ref", "%u", info->ring_ref); > - if (err) { > - message = "writing ring-ref"; > - goto abort_transaction; > + if (legacy_backend) { > + err = xenbus_printf(xbt, dev->nodename, > + "ring-ref", "%d", info->ring_ref[0]); > + if (err) { > + message = "writing ring-ref"; > + goto abort_transaction; > + } > + } else { > + for (i = 0; i < (1 << info->ring_order); i++) { > + char key[sizeof("ring-ref") + 2]; > + > + sprintf(key, "ring-ref%d", i); > + > + err = xenbus_printf(xbt, dev->nodename, > + key, "%d", info->ring_ref[i]); > + if (err) { > + message = "writing ring-ref"; > + goto abort_transaction; > + } > + } > + > + err = xenbus_printf(xbt, dev->nodename, > + "ring-page-order", "%u", info->ring_order); > + if (err) { > + message = "writing ring-order"; > + goto abort_transaction; > + } > } > + > err = xenbus_printf(xbt, dev->nodename, > "event-channel", "%u", info->evtchn); > if (err) { > @@ -996,21 +1046,14 @@ static int blkfront_probe(struct xenbus_device *dev, > info->connected = BLKIF_STATE_DISCONNECTED; > INIT_WORK(&info->work, blkif_restart_queue); > > - for (i = 0; i < BLK_RING_SIZE; i++) > + for (i = 0; i < BLK_MAX_RING_SIZE; i++) > info->shadow[i].req.u.rw.id = i+1; > - info->shadow[BLK_RING_SIZE-1].req.u.rw.id = 0x0fffffff; > + info->shadow[BLK_MAX_RING_SIZE-1].req.u.rw.id = 0x0fffffff; > > /* Front end dir is a number, which is used as the id. */ > info->handle = simple_strtoul(strrchr(dev->nodename, '/')+1, NULL, 0); > dev_set_drvdata(&dev->dev, info); > > - err = talk_to_blkback(dev, info); > - if (err) { > - kfree(info); > - dev_set_drvdata(&dev->dev, NULL); > - return err; > - } > - > return 0; > } > > @@ -1031,13 +1074,13 @@ static int blkif_recover(struct blkfront_info *info) > > /* Stage 2: Set up free list. */ > memset(&info->shadow, 0, sizeof(info->shadow)); > - for (i = 0; i < BLK_RING_SIZE; i++) > + for (i = 0; i < BLK_MAX_RING_SIZE; i++) > info->shadow[i].req.u.rw.id = i+1; > info->shadow_free = info->ring.req_prod_pvt; > - info->shadow[BLK_RING_SIZE-1].req.u.rw.id = 0x0fffffff; > + info->shadow[BLK_MAX_RING_SIZE-1].req.u.rw.id = 0x0fffffff; > > /* Stage 3: Find pending requests and requeue them. */ > - for (i = 0; i < BLK_RING_SIZE; i++) { > + for (i = 0; i < BLK_RING_SIZE(info->ring_order); i++) { > /* Not in use? */ > if (!copy[i].request) > continue; > @@ -1299,7 +1342,6 @@ static void blkback_changed(struct xenbus_device *dev, > > switch (backend_state) { > case XenbusStateInitialising: > - case XenbusStateInitWait: > case XenbusStateInitialised: > case XenbusStateReconfiguring: > case XenbusStateReconfigured: > @@ -1307,6 +1349,10 @@ static void blkback_changed(struct xenbus_device *dev, > case XenbusStateClosed: > break; > > + case XenbusStateInitWait: > + talk_to_blkback(dev, info); > + break; > + > case XenbusStateConnected: > blkfront_connect(info); > break; > diff --git a/drivers/net/xen-netback/common.h b/drivers/net/xen-netback/common.h > index 94b79c3..f93b59a 100644 > --- a/drivers/net/xen-netback/common.h > +++ b/drivers/net/xen-netback/common.h > @@ -130,8 +130,8 @@ int xen_netbk_must_stop_queue(struct xenvif *vif); > /* (Un)Map communication rings. */ > void xen_netbk_unmap_frontend_rings(struct xenvif *vif); > int xen_netbk_map_frontend_rings(struct xenvif *vif, > - grant_ref_t tx_ring_ref, > - grant_ref_t rx_ring_ref); > + int tx_ring_ref, > + int rx_ring_ref); > > /* (De)Register a xenvif with the netback backend. */ > void xen_netbk_add_xenvif(struct xenvif *vif); > diff --git a/drivers/net/xen-netback/netback.c b/drivers/net/xen-netback/netback.c > index 59effac..0b014cf 100644 > --- a/drivers/net/xen-netback/netback.c > +++ b/drivers/net/xen-netback/netback.c > @@ -1594,8 +1594,8 @@ void xen_netbk_unmap_frontend_rings(struct xenvif *vif) > } > > int xen_netbk_map_frontend_rings(struct xenvif *vif, > - grant_ref_t tx_ring_ref, > - grant_ref_t rx_ring_ref) > + int tx_ring_ref, > + int rx_ring_ref) > { > void *addr; > struct xen_netif_tx_sring *txs; > @@ -1604,7 +1604,7 @@ int xen_netbk_map_frontend_rings(struct xenvif *vif, > int err = -ENOMEM; > > err = xenbus_map_ring_valloc(xenvif_to_xenbus_device(vif), > - tx_ring_ref, &addr); > + &tx_ring_ref, 1, &addr); > if (err) > goto err; > > @@ -1612,7 +1612,7 @@ int xen_netbk_map_frontend_rings(struct xenvif *vif, > BACK_RING_INIT(&vif->tx, txs, PAGE_SIZE); > > err = xenbus_map_ring_valloc(xenvif_to_xenbus_device(vif), > - rx_ring_ref, &addr); > + &rx_ring_ref, 1, &addr); > if (err) > goto err; > > diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c > index 698b905..521a595 100644 > --- a/drivers/net/xen-netfront.c > +++ b/drivers/net/xen-netfront.c > @@ -1496,13 +1496,12 @@ static int setup_netfront(struct xenbus_device *dev, struct netfront_info *info) > SHARED_RING_INIT(txs); > FRONT_RING_INIT(&info->tx, txs, PAGE_SIZE); > > - err = xenbus_grant_ring(dev, virt_to_mfn(txs)); > + err = xenbus_grant_ring(dev, txs, 1, &info->tx_ring_ref); > if (err < 0) { > free_page((unsigned long)txs); > goto fail; > } > > - info->tx_ring_ref = err; > rxs = (struct xen_netif_rx_sring *)get_zeroed_page(GFP_NOIO | __GFP_HIGH); > if (!rxs) { > err = -ENOMEM; > @@ -1512,12 +1511,11 @@ static int setup_netfront(struct xenbus_device *dev, struct netfront_info *info) > SHARED_RING_INIT(rxs); > FRONT_RING_INIT(&info->rx, rxs, PAGE_SIZE); > > - err = xenbus_grant_ring(dev, virt_to_mfn(rxs)); > + err = xenbus_grant_ring(dev, rxs, 1, &info->rx_ring_ref); > if (err < 0) { > free_page((unsigned long)rxs); > goto fail; > } > - info->rx_ring_ref = err; > > err = xenbus_alloc_evtchn(dev, &info->evtchn); > if (err) > diff --git a/drivers/pci/xen-pcifront.c b/drivers/pci/xen-pcifront.c > index 1620088..95109d8 100644 > --- a/drivers/pci/xen-pcifront.c > +++ b/drivers/pci/xen-pcifront.c > @@ -768,12 +768,10 @@ static int pcifront_publish_info(struct pcifront_device *pdev) > int err = 0; > struct xenbus_transaction trans; > > - err = xenbus_grant_ring(pdev->xdev, virt_to_mfn(pdev->sh_info)); > + err = xenbus_grant_ring(pdev->xdev, pdev->sh_info, 1, &pdev->gnt_ref); > if (err < 0) > goto out; > > - pdev->gnt_ref = err; > - > err = xenbus_alloc_evtchn(pdev->xdev, &pdev->evtchn); > if (err) > goto out; > diff --git a/drivers/xen/xen-pciback/xenbus.c b/drivers/xen/xen-pciback/xenbus.c > index 64b11f9..e0834cd 100644 > --- a/drivers/xen/xen-pciback/xenbus.c > +++ b/drivers/xen/xen-pciback/xenbus.c > @@ -108,7 +108,7 @@ static int xen_pcibk_do_attach(struct xen_pcibk_device *pdev, int gnt_ref, > "Attaching to frontend resources - gnt_ref=%d evtchn=%d\n", > gnt_ref, remote_evtchn); > > - err = xenbus_map_ring_valloc(pdev->xdev, gnt_ref, &vaddr); > + err = xenbus_map_ring_valloc(pdev->xdev, &gnt_ref, 1, &vaddr); > if (err < 0) { > xenbus_dev_fatal(pdev->xdev, err, > "Error mapping other domain page in ours."); > diff --git a/drivers/xen/xenbus/xenbus_client.c b/drivers/xen/xenbus/xenbus_client.c > index 566d2ad..3a14524 100644 > --- a/drivers/xen/xenbus/xenbus_client.c > +++ b/drivers/xen/xenbus/xenbus_client.c > @@ -53,14 +53,16 @@ struct xenbus_map_node { > struct vm_struct *area; /* PV */ > struct page *page; /* HVM */ > }; > - grant_handle_t handle; > + grant_handle_t handle[XENBUS_MAX_RING_PAGES]; > + unsigned int nr_handles; > }; > > static DEFINE_SPINLOCK(xenbus_valloc_lock); > static LIST_HEAD(xenbus_valloc_pages); > > struct xenbus_ring_ops { > - int (*map)(struct xenbus_device *dev, int gnt, void **vaddr); > + int (*map)(struct xenbus_device *dev, int gnt[], int nr_gnts, > + void **vaddr); > int (*unmap)(struct xenbus_device *dev, void *vaddr); > }; > > @@ -356,17 +358,38 @@ static void xenbus_switch_fatal(struct xenbus_device *dev, int depth, int err, > /** > * xenbus_grant_ring > * @dev: xenbus device > - * @ring_mfn: mfn of ring to grant > - > - * Grant access to the given @ring_mfn to the peer of the given device. Return > - * 0 on success, or -errno on error. On error, the device will switch to > - * XenbusStateClosing, and the error will be saved in the store. > + * @vaddr: starting virtual address of the ring > + * @nr_pages: number of page to be granted > + * @grefs: grant reference array to be filled in > + * Grant access to the given @vaddr to the peer of the given device. > + * Then fill in @grefs with grant references. Return 0 on success, or > + * -errno on error. On error, the device will switch to > + * XenbusStateClosing, and the first error will be saved in the store. > */ > -int xenbus_grant_ring(struct xenbus_device *dev, unsigned long ring_mfn) > +int xenbus_grant_ring(struct xenbus_device *dev, void *vaddr, > + int nr_pages, int grefs[]) > { > - int err = gnttab_grant_foreign_access(dev->otherend_id, ring_mfn, 0); > - if (err < 0) > - xenbus_dev_fatal(dev, err, "granting access to ring page"); > + int i; > + int err; > + > + for (i = 0; i < nr_pages; i++) { > + unsigned long addr = (unsigned long)vaddr + > + (PAGE_SIZE * i); > + err = gnttab_grant_foreign_access(dev->otherend_id, > + virt_to_mfn(addr), 0); > + if (err < 0) { > + xenbus_dev_fatal(dev, err, > + "granting access to ring page"); > + goto fail; > + } > + grefs[i] = err; > + } > + > + return 0; > + > +fail: > + for ( ; i >= 0; i--) > + gnttab_end_foreign_access_ref(grefs[i], 0); > return err; > } > EXPORT_SYMBOL_GPL(xenbus_grant_ring); > @@ -447,7 +470,8 @@ EXPORT_SYMBOL_GPL(xenbus_free_evtchn); > /** > * xenbus_map_ring_valloc > * @dev: xenbus device > - * @gnt_ref: grant reference > + * @gnt_ref: grant reference array > + * @nr_grefs: number of grant reference > * @vaddr: pointer to address to be filled out by mapping > * > * Based on Rusty Russell's skeleton driver's map_page. > @@ -458,23 +482,28 @@ EXPORT_SYMBOL_GPL(xenbus_free_evtchn); > * or -ENOMEM on error. If an error is returned, device will switch to > * XenbusStateClosing and the error message will be saved in XenStore. > */ > -int xenbus_map_ring_valloc(struct xenbus_device *dev, int gnt_ref, void **vaddr) > +int xenbus_map_ring_valloc(struct xenbus_device *dev, int gnt_ref[], > + int nr_grefs, void **vaddr) > { > - return ring_ops->map(dev, gnt_ref, vaddr); > + return ring_ops->map(dev, gnt_ref, nr_grefs, vaddr); > } > EXPORT_SYMBOL_GPL(xenbus_map_ring_valloc); > > +static int __xenbus_unmap_ring_vfree_pv(struct xenbus_device *dev, > + struct xenbus_map_node *node); > + > static int xenbus_map_ring_valloc_pv(struct xenbus_device *dev, > - int gnt_ref, void **vaddr) > + int gnt_ref[], int nr_grefs, void **vaddr) > { > - struct gnttab_map_grant_ref op = { > - .flags = GNTMAP_host_map | GNTMAP_contains_pte, > - .ref = gnt_ref, > - .dom = dev->otherend_id, > - }; > + struct gnttab_map_grant_ref op[XENBUS_MAX_RING_PAGES]; > struct xenbus_map_node *node; > struct vm_struct *area; > - pte_t *pte; > + pte_t *pte[XENBUS_MAX_RING_PAGES]; > + int i; > + int err = 0; > + > + if (nr_grefs > XENBUS_MAX_RING_PAGES) > + return -EINVAL; > > *vaddr = NULL; > > @@ -482,28 +511,44 @@ static int xenbus_map_ring_valloc_pv(struct xenbus_device *dev, > if (!node) > return -ENOMEM; > > - area = alloc_vm_area(PAGE_SIZE, &pte); > + area = alloc_vm_area(PAGE_SIZE * nr_grefs, pte); > if (!area) { > kfree(node); > return -ENOMEM; > } > > - op.host_addr = arbitrary_virt_to_machine(pte).maddr; > + for (i = 0; i < nr_grefs; i++) { > + op[i].flags = GNTMAP_host_map | GNTMAP_contains_pte, > + op[i].ref = gnt_ref[i], > + op[i].dom = dev->otherend_id, > + op[i].host_addr = arbitrary_virt_to_machine(pte[i]).maddr; > + }; > > if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1)) > BUG(); > > - if (op.status != GNTST_okay) { > - free_vm_area(area); > - kfree(node); > - xenbus_dev_fatal(dev, op.status, > - "mapping in shared page %d from domain %d", > - gnt_ref, dev->otherend_id); > - return op.status; > + node->nr_handles = nr_grefs; > + node->area = area; > + > + for (i = 0; i < nr_grefs; i++) { > + if (op[i].status != GNTST_okay) { > + err = op[i].status; > + node->handle[i] = INVALID_GRANT_HANDLE; > + continue; > + } > + node->handle[i] = op[i].handle; > } > > - node->handle = op.handle; > - node->area = area; > + if (err != 0) { > + for (i = 0; i < nr_grefs; i++) > + xenbus_dev_fatal(dev, op[i].status, > + "mapping in shared page %d from domain %d", > + gnt_ref[i], dev->otherend_id); > + > + __xenbus_unmap_ring_vfree_pv(dev, node); > + > + return err; > + } > > spin_lock(&xenbus_valloc_lock); > list_add(&node->next, &xenbus_valloc_pages); > @@ -514,25 +559,29 @@ static int xenbus_map_ring_valloc_pv(struct xenbus_device *dev, > } > > static int xenbus_map_ring_valloc_hvm(struct xenbus_device *dev, > - int gnt_ref, void **vaddr) > + int gnt_ref[], int nr_grefs, void **vaddr) > { > struct xenbus_map_node *node; > int err; > void *addr; > > + if (nr_grefs > XENBUS_MAX_RING_PAGES) > + return -EINVAL; > + > *vaddr = NULL; > > node = kzalloc(sizeof(*node), GFP_KERNEL); > if (!node) > return -ENOMEM; > > - err = alloc_xenballooned_pages(1, &node->page, false /* lowmem */); > + err = alloc_xenballooned_pages(nr_grefs, &node->page, > + false /* lowmem */); > if (err) > goto out_err; > > addr = pfn_to_kaddr(page_to_pfn(node->page)); > > - err = xenbus_map_ring(dev, gnt_ref, &node->handle, addr); > + err = xenbus_map_ring(dev, gnt_ref, nr_grefs, node->handle, addr); > if (err) > goto out_err; > > @@ -544,7 +593,7 @@ static int xenbus_map_ring_valloc_hvm(struct xenbus_device *dev, > return 0; > > out_err: > - free_xenballooned_pages(1, &node->page); > + free_xenballooned_pages(nr_grefs, &node->page); > kfree(node); > return err; > } > @@ -553,36 +602,51 @@ static int xenbus_map_ring_valloc_hvm(struct xenbus_device *dev, > /** > * xenbus_map_ring > * @dev: xenbus device > - * @gnt_ref: grant reference > - * @handle: pointer to grant handle to be filled > + * @gnt_ref: grant reference array > + * @nr_grefs: number of grant references > + * @handle: pointer to grant handle array to be filled, mind the size > * @vaddr: address to be mapped to > * > - * Map a page of memory into this domain from another domain's grant table. > + * Map pages of memory into this domain from another domain's grant table. > * xenbus_map_ring does not allocate the virtual address space (you must do > - * this yourself!). It only maps in the page to the specified address. > + * this yourself!). It only maps in the pages to the specified address. > * Returns 0 on success, and GNTST_* (see xen/include/interface/grant_table.h) > * or -ENOMEM on error. If an error is returned, device will switch to > - * XenbusStateClosing and the error message will be saved in XenStore. > + * XenbusStateClosing and the last error message will be saved in XenStore. > */ > -int xenbus_map_ring(struct xenbus_device *dev, int gnt_ref, > - grant_handle_t *handle, void *vaddr) > +int xenbus_map_ring(struct xenbus_device *dev, int gnt_ref[], int nr_grefs, > + grant_handle_t handle[], void *vaddr) > { > - struct gnttab_map_grant_ref op; > - > - gnttab_set_map_op(&op, (phys_addr_t)vaddr, GNTMAP_host_map, gnt_ref, > - dev->otherend_id); > + struct gnttab_map_grant_ref op[XENBUS_MAX_RING_PAGES]; > + int i; > + int err = GNTST_okay; /* 0 */ > + > + for (i = 0; i < nr_grefs; i++) { > + unsigned long addr = (unsigned long)vaddr + > + (PAGE_SIZE * i); > + gnttab_set_map_op(&op[i], (phys_addr_t)addr, > + GNTMAP_host_map, gnt_ref[i], > + dev->otherend_id); > + } > > - if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1)) > + if (HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, op, nr_grefs)) > BUG(); > > - if (op.status != GNTST_okay) { > - xenbus_dev_fatal(dev, op.status, > - "mapping in shared page %d from domain %d", > - gnt_ref, dev->otherend_id); > - } else > - *handle = op.handle; > + for (i = 0; i < nr_grefs; i++) { > + if (op[i].status != GNTST_okay) { > + err = op[i].status; > + xenbus_dev_fatal(dev, err, > + "mapping in shared page %d from domain %d", > + gnt_ref[i], dev->otherend_id); > + handle[i] = INVALID_GRANT_HANDLE; > + } else > + handle[i] = op[i].handle; > + } > > - return op.status; > + if (err != GNTST_okay) > + xenbus_unmap_ring(dev, handle, nr_grefs, vaddr); > + > + return err; > } > EXPORT_SYMBOL_GPL(xenbus_map_ring); > > @@ -605,13 +669,53 @@ int xenbus_unmap_ring_vfree(struct xenbus_device *dev, void *vaddr) > } > EXPORT_SYMBOL_GPL(xenbus_unmap_ring_vfree); > > +static int __xenbus_unmap_ring_vfree_pv(struct xenbus_device *dev, > + struct xenbus_map_node *node) > +{ > + struct gnttab_unmap_grant_ref op[XENBUS_MAX_RING_PAGES]; > + unsigned int level; > + int i, j; > + int err = GNTST_okay; > + > + j = 0; > + for (i = 0; i < node->nr_handles; i++) { > + unsigned long vaddr = (unsigned long)node->area->addr + > + (PAGE_SIZE * i); > + if (node->handle[i] != INVALID_GRANT_HANDLE) { > + memset(&op[j], 0, sizeof(op[0])); > + op[j].host_addr = arbitrary_virt_to_machine( > + lookup_address(vaddr, &level)).maddr; > + op[j].handle = node->handle[i]; > + j++; > + node->handle[i] = INVALID_GRANT_HANDLE; > + } > + } > + > + if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, op, j)) > + BUG(); > + > + node->nr_handles = 0; > + > + for (i = 0; i < j; i++) { > + if (op[i].status != GNTST_okay) { > + err = op[i].status; > + xenbus_dev_error(dev, err, > + "unmapping page %d at handle %d error %d", > + i, op[i].handle, err); > + } > + } > + > + if (err == GNTST_okay) > + free_vm_area(node->area); > + > + kfree(node); > + > + return err; > +} > + > static int xenbus_unmap_ring_vfree_pv(struct xenbus_device *dev, void *vaddr) > { > struct xenbus_map_node *node; > - struct gnttab_unmap_grant_ref op = { > - .host_addr = (unsigned long)vaddr, > - }; > - unsigned int level; > > spin_lock(&xenbus_valloc_lock); > list_for_each_entry(node, &xenbus_valloc_pages, next) { > @@ -626,33 +730,18 @@ static int xenbus_unmap_ring_vfree_pv(struct xenbus_device *dev, void *vaddr) > > if (!node) { > xenbus_dev_error(dev, -ENOENT, > - "can't find mapped virtual address %p", vaddr); > + "can't find mapped virtual address %p", vaddr); > return GNTST_bad_virt_addr; > } > > - op.handle = node->handle; > - op.host_addr = arbitrary_virt_to_machine( > - lookup_address((unsigned long)vaddr, &level)).maddr; > - > - if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1)) > - BUG(); > - > - if (op.status == GNTST_okay) > - free_vm_area(node->area); > - else > - xenbus_dev_error(dev, op.status, > - "unmapping page at handle %d error %d", > - node->handle, op.status); > - > - kfree(node); > - return op.status; > + return __xenbus_unmap_ring_vfree_pv(dev, node); > } > > static int xenbus_unmap_ring_vfree_hvm(struct xenbus_device *dev, void *vaddr) > { > int rv; > struct xenbus_map_node *node; > - void *addr; > + void *addr = NULL; > > spin_lock(&xenbus_valloc_lock); > list_for_each_entry(node, &xenbus_valloc_pages, next) { > @@ -668,14 +757,14 @@ static int xenbus_unmap_ring_vfree_hvm(struct xenbus_device *dev, void *vaddr) > > if (!node) { > xenbus_dev_error(dev, -ENOENT, > - "can't find mapped virtual address %p", vaddr); > + "can't find mapped virtual address %p", vaddr); > return GNTST_bad_virt_addr; > } > > - rv = xenbus_unmap_ring(dev, node->handle, addr); > + rv = xenbus_unmap_ring(dev, node->handle, node->nr_handles, addr); > > if (!rv) > - free_xenballooned_pages(1, &node->page); > + free_xenballooned_pages(node->nr_handles, &node->page); > else > WARN(1, "Leaking %p\n", vaddr); > > @@ -687,6 +776,7 @@ static int xenbus_unmap_ring_vfree_hvm(struct xenbus_device *dev, void *vaddr) > * xenbus_unmap_ring > * @dev: xenbus device > * @handle: grant handle > + * @nr_handles: number of grant handle > * @vaddr: addr to unmap > * > * Unmap a page of memory in this domain that was imported from another domain. > @@ -694,21 +784,37 @@ static int xenbus_unmap_ring_vfree_hvm(struct xenbus_device *dev, void *vaddr) > * (see xen/include/interface/grant_table.h). > */ > int xenbus_unmap_ring(struct xenbus_device *dev, > - grant_handle_t handle, void *vaddr) > + grant_handle_t handle[], int nr_handles, > + void *vaddr) > { > - struct gnttab_unmap_grant_ref op; > - > - gnttab_set_unmap_op(&op, (phys_addr_t)vaddr, GNTMAP_host_map, handle); > + struct gnttab_unmap_grant_ref op[XENBUS_MAX_RING_PAGES]; > + int i, j; > + int err = GNTST_okay; > + > + j = 0; > + for (i = 0; i < nr_handles; i++) { > + unsigned long addr = (unsigned long)vaddr + > + (PAGE_SIZE * i); > + if (handle[i] != INVALID_GRANT_HANDLE) { > + gnttab_set_unmap_op(&op[j++], (phys_addr_t)addr, > + GNTMAP_host_map, handle[i]); > + handle[i] = INVALID_GRANT_HANDLE; > + } > + } > > - if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1)) > + if (HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, op, j)) > BUG(); > > - if (op.status != GNTST_okay) > - xenbus_dev_error(dev, op.status, > - "unmapping page at handle %d error %d", > - handle, op.status); > + for (i = 0; i < j; i++) { > + if (op[i].status != GNTST_okay) { > + err = op[i].status; > + xenbus_dev_error(dev, err, > + "unmapping page at handle %d error %d", > + handle[i], err); > + } > + } > > - return op.status; > + return err; > } > EXPORT_SYMBOL_GPL(xenbus_unmap_ring); > > diff --git a/drivers/xen/xenbus/xenbus_probe.c b/drivers/xen/xenbus/xenbus_probe.c > index 3864967..62b92d2 100644 > --- a/drivers/xen/xenbus/xenbus_probe.c > +++ b/drivers/xen/xenbus/xenbus_probe.c > @@ -718,6 +718,7 @@ static int __init xenstored_local_init(void) > return err; > } > > +extern void xenbus_ring_ops_init(void); > static int __init xenbus_init(void) > { > int err = 0; > @@ -767,6 +768,8 @@ static int __init xenbus_init(void) > proc_mkdir("xen", NULL); > #endif > > + xenbus_ring_ops_init(); > + > out_error: > return err; > } > diff --git a/include/xen/xenbus.h b/include/xen/xenbus.h > index e8c599b..cdbd948 100644 > --- a/include/xen/xenbus.h > +++ b/include/xen/xenbus.h > @@ -195,15 +195,23 @@ int xenbus_watch_pathfmt(struct xenbus_device *dev, struct xenbus_watch *watch, > const char *pathfmt, ...); > > int xenbus_switch_state(struct xenbus_device *dev, enum xenbus_state new_state); > -int xenbus_grant_ring(struct xenbus_device *dev, unsigned long ring_mfn); > -int xenbus_map_ring_valloc(struct xenbus_device *dev, > - int gnt_ref, void **vaddr); > -int xenbus_map_ring(struct xenbus_device *dev, int gnt_ref, > - grant_handle_t *handle, void *vaddr); > + > +#define XENBUS_MAX_RING_ORDER 2 > +#define XENBUS_MAX_RING_PAGES (1 << XENBUS_MAX_RING_ORDER) > + > +#define INVALID_GRANT_HANDLE (~0U) > + > +int xenbus_grant_ring(struct xenbus_device *dev, void *vaddr, > + int nr_pages, int grefs[]); > +int xenbus_map_ring_valloc(struct xenbus_device *dev, int gnt_ref[], > + int nr_grefs, void **vaddr); > +int xenbus_map_ring(struct xenbus_device *dev, int gnt_ref[], int nr_grefs, > + grant_handle_t handle[], void *vaddr); > > int xenbus_unmap_ring_vfree(struct xenbus_device *dev, void *vaddr); > int xenbus_unmap_ring(struct xenbus_device *dev, > - grant_handle_t handle, void *vaddr); > + grant_handle_t handle[], int nr_handles, > + void *vaddr); > > int xenbus_alloc_evtchn(struct xenbus_device *dev, int *port); > int xenbus_bind_evtchn(struct xenbus_device *dev, int remote_port, int *port); > > _______________________________________________ > Xen-devel mailing list > Xen-devel@xxxxxxxxxxxxx > http://lists.xen.org/xen-devel > _______________________________________________ Virtualization mailing list Virtualization@xxxxxxxxxxxxxxxxxxxxxxxxxx https://lists.linuxfoundation.org/mailman/listinfo/virtualization