FT> I can't apply both cleanly. Hmm, really? The kernel patch is against 2.6.20-rc6 and the library patch is against device-mapper CVS from January 29th. FT> Can you resend them as an attachment (though I don't like FT> attachments). Attached. Signed-off-by: Dan Smith <danms@xxxxxxxxxx>
diff -r 50f87a6ffd94 drivers/md/Kconfig --- a/drivers/md/Kconfig Thu Jan 25 17:50:37 2007 -0800 +++ b/drivers/md/Kconfig Mon Jan 29 14:28:05 2007 -0800 @@ -236,6 +236,12 @@ config DM_SNAPSHOT ---help--- Allow volume managers to take writable snapshots of a device. +config DM_USERSPACE + tristate "Userspace target (EXPERIMENTAL)" + depends on BLK_DEV_DM && EXPERIMENTAL + ---help--- + A target that provides a userspace interface to device-mapper + config DM_MIRROR tristate "Mirror target (EXPERIMENTAL)" depends on BLK_DEV_DM && EXPERIMENTAL diff -r 50f87a6ffd94 drivers/md/Makefile --- a/drivers/md/Makefile Thu Jan 25 17:50:37 2007 -0800 +++ b/drivers/md/Makefile Mon Jan 29 14:28:05 2007 -0800 @@ -14,6 +14,8 @@ raid456-objs := raid5.o raid6algos.o rai raid6altivec1.o raid6altivec2.o raid6altivec4.o \ raid6altivec8.o \ raid6mmx.o raid6sse1.o raid6sse2.o +dm-user-objs := dm-userspace.o dm-userspace-chardev.o \ + dm-userspace-cache.o hostprogs-y := mktables # Note: link order is important. All raid personalities @@ -36,6 +38,7 @@ obj-$(CONFIG_DM_SNAPSHOT) += dm-snapshot obj-$(CONFIG_DM_SNAPSHOT) += dm-snapshot.o obj-$(CONFIG_DM_MIRROR) += dm-mirror.o obj-$(CONFIG_DM_ZERO) += dm-zero.o +obj-$(CONFIG_DM_USERSPACE) += dm-user.o quiet_cmd_unroll = UNROLL $@ cmd_unroll = $(PERL) $(srctree)/$(src)/unroll.pl $(UNROLL) \ diff -r 50f87a6ffd94 drivers/md/dm-user.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/drivers/md/dm-user.h Mon Jan 29 14:28:05 2007 -0800 @@ -0,0 +1,176 @@ +/* + * Copyright IBM Corp., 2006 + * Author: Dan Smith <danms@xxxxxxxxxx> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; under version 2 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#ifndef __DM_USER_H +#define __DM_USER_H + +#include <linux/dm-userspace.h> + +#include <linux/hardirq.h> +#include <linux/slab.h> + +#define DMU_KEY_LEN 256 + +extern struct target_type userspace_target; +extern mempool_t *request_pool; +extern dev_t dmu_dev; +extern spinlock_t devices_lock; +extern struct list_head devices; + +struct dmu_mappings; + +#define DMU_CP_HASH 1024 + +/* + * A block device that we can send bios to + */ +struct target_device { + struct list_head list; /* Our place in the targets list */ + struct block_device *bdev; /* The target block_device */ + struct kref users; /* Self-destructing reference count */ +}; + +/* + * A dm-userspace device, which consists of multiple targets sharing a + * common key + */ +struct dmu_device { + struct list_head list; /* Our place in the devices list */ + + spinlock_t lock; /* Protects all the fields below */ + + /* We need to protect the TX/RX lists with a separate lock that is + * always used with IRQs disabled because it is locked from + * inside the endio function + */ + spinlock_t xmit_lock; + struct list_head tx_requests; /* Requests to send to userspace */ + struct list_head *rx_requests; /* Requests waiting for reply */ + + struct dmu_mappings *mappings; + + /* Accounting */ + atomic_t t_reqs; /* Waiting to be sent to userspace */ + atomic_t r_reqs; /* Waiting for a response from uspace*/ + atomic_t f_reqs; /* Submitted, waiting for endio */ + atomic_t total; /* Total requests allocated */ + + atomic_t idcounter; /* Counter for making request IDs */ + + struct list_head target_devs; /* List of devices we can target */ + + void *transport_private; /* Private data for userspace comms */ + + char key[DMU_KEY_LEN]; /* Unique name string for device */ + struct kref users; /* Self-destructing reference count */ + + wait_queue_head_t lowmem; /* To block while waiting for memory */ + + uint64_t block_size; /* Block size for this device */ + uint64_t block_mask; /* Mask for offset in block */ + unsigned int block_shift; /* Shift to convert to/from block */ + + struct kcopyd_client *kcopy; /* Interface to kcopyd */ + + unsigned int request_slots; /* Max number of reqs we will queue */ +}; + +struct dmu_request { + struct list_head list; /* Our place on the request queue */ + struct list_head copy; /* Our place on the copy list */ + struct dmu_device *dev; /* The DMU device that owns us */ + + struct block_device *target_dev; + + int type; /* Type of request */ + uint32_t flags; /* Attribute flags */ + uint64_t id; /* Unique ID for sync with userspace */ + union { + uint64_t block; /* The block in question */ + } u; + + struct list_head deps; /* Requests depending on this one */ + struct bio *bio; /* The bio this request represents */ + + struct work_struct task; /* Async task to run for this req */ + + struct dmu_msg_map_response response; /* FIXME: Clean this up */ +}; + + +extern void add_tx_request(struct dmu_device *dev, struct dmu_request *req); +extern void endio_worker(struct work_struct *work); + +/* Find and grab a reference to a target device */ +struct target_device *find_target(struct dmu_device *dev, + dev_t devno); +/* Character device transport functions */ +int register_chardev_transport(struct dmu_device *dev); +void unregister_chardev_transport(struct dmu_device *dev); +int init_chardev_transport(void); +void cleanup_chardev_transport(void); +void write_chardev_transport_info(struct dmu_device *dev, + char *buf, unsigned int maxlen); + +/* Return the block number for @sector */ +static inline u64 dmu_block(struct dmu_device *dev, + sector_t sector) +{ + return sector >> dev->block_shift; +} + +/* Return the sector offset in a block for @sector */ +static inline u64 dmu_sector_offset(struct dmu_device *dev, + sector_t sector) +{ + return sector & dev->block_mask; +} + +/* Return the starting sector for @block */ +static inline u64 dmu_sector(struct dmu_device *dev, + uint64_t block) +{ + return block << dev->block_shift; +} + +/* Increase the usage count for @dev */ +static inline void get_dev(struct dmu_device *dev) +{ + kref_get(&dev->users); +} + +/* Decrease the usage count for @dev */ +void destroy_dmu_device(struct kref *ref); +static inline void put_dev(struct dmu_device *dev) +{ + kref_put(&dev->users, destroy_dmu_device); +} + +int dmu_init_mappings(void); +void dmu_cleanup_mappings(void); +int dmu_make_mapping(struct dmu_device *dev, + uint64_t org, uint64_t new, int64_t offset, + struct block_device *dest, int rw); +int dmu_map_from_mappings(struct dmu_device *dev, + struct bio *bio); +int dmu_alloc_mappings(struct dmu_mappings **m, uint32_t size); +int dmu_remove_mapping(struct dmu_device *dev, uint64_t org); +unsigned int dmu_remove_all_mappings(struct dmu_device *dev); + +#endif diff -r 50f87a6ffd94 drivers/md/dm-userspace-cache.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/drivers/md/dm-userspace-cache.c Mon Jan 29 14:28:05 2007 -0800 @@ -0,0 +1,256 @@ +/* + * Copyright IBM Corp., 2006 + * Author: Dan Smith <danms@xxxxxxxxxx> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; under version 2 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#include <linux/module.h> +#include <linux/init.h> +#include <linux/blkdev.h> +#include <linux/bio.h> +#include <linux/slab.h> +#include <linux/spinlock.h> +#include <linux/list.h> +#include <linux/fs.h> +#include <linux/cdev.h> +#include <linux/types.h> +#include <linux/poll.h> + +#include "dm.h" + +#include <linux/dm-userspace.h> + +#include "dm-user.h" + +#define DM_MSG_PREFIX "dm-userspace-cache" + +static struct kmem_cache *map_cache; + +struct dmu_mappings { + struct list_head *table; + uint32_t size; + uint32_t count; + struct semaphore sem; +}; + +struct dmu_map { + struct list_head list; + uint64_t org_block; + uint64_t new_block; + int64_t offset; + struct block_device *dest_dev; + int rw; +}; + +int dmu_alloc_mappings(struct dmu_mappings **mp, uint32_t size) +{ + struct dmu_mappings *m; + int i; + + (*mp) = kmalloc(sizeof(*m), GFP_KERNEL); + if (!(*mp)) { + DMERR("Failed to alloc mappings"); + return 0; + } + + m = *mp; + + m->table = kmalloc(sizeof(struct list_head) * size, GFP_KERNEL); + m->size = size; + m->count = 0; + + for (i = 0; i < m->size; i++) { + INIT_LIST_HEAD(&m->table[i]); + } + + init_MUTEX(&m->sem); + + return 1; +} + +int dmu_destroy_mappings(struct dmu_mappings *m) +{ + if (m->table) + kfree(m->table); + + return 1; +} + +static struct dmu_map *__dmu_find_mapping(struct dmu_mappings *m, + uint64_t block) +{ + uint32_t bucket; + struct dmu_map *map; + + bucket = ((uint32_t)block) % m->size; + + list_for_each_entry(map, &m->table[bucket], list) { + if (map->org_block == block) + return map; + } + + return NULL; +} + +static void __dmu_delete_mapping(struct dmu_mappings *m, + struct dmu_map *map) +{ + m->count--; + list_del(&map->list); + kmem_cache_free(map_cache, map); +} + +static int dmu_add_mapping(struct dmu_mappings *m, + struct dmu_map *map) +{ + uint32_t bucket; + struct dmu_map *old; + + down(&m->sem); + + old = __dmu_find_mapping(m, map->org_block); + if (old) + __dmu_delete_mapping(m, old); + + bucket = ((uint32_t)map->org_block) % m->size; + + list_add(&map->list, &m->table[bucket]); + m->count++; + + up(&m->sem); + + return 1; +} + +int dmu_map_from_mappings(struct dmu_device *dev, + struct bio *bio) +{ + struct dmu_map *map; + int ret = 0; + + down(&dev->mappings->sem); + + map = __dmu_find_mapping(dev->mappings, + dmu_block(dev, bio->bi_sector)); + + if (map && (bio_rw(bio) == map->rw)) { + + bio->bi_sector = dmu_sector(dev, map->new_block) + + dmu_sector_offset(dev, bio->bi_sector) + + map->offset; + bio->bi_bdev = map->dest_dev; + ret = 1; + } + + up(&dev->mappings->sem); + + return ret; +} + +int dmu_make_mapping(struct dmu_device *dev, + uint64_t org, uint64_t new, int64_t offset, + struct block_device *dest, int rw) +{ + struct dmu_map *map; + + /* FIXME */ + map = kmem_cache_alloc(map_cache, GFP_NOIO); + if (!map) { + DMERR("Failed to alloc mapping"); + return 0; + } + + INIT_LIST_HEAD(&map->list); + + map->org_block = org; + map->new_block = new; + map->dest_dev = dest; + map->offset = offset; + map->rw = rw; + + return dmu_add_mapping(dev->mappings, map); +} + +int dmu_remove_mapping(struct dmu_device *dev, + uint64_t org) +{ + struct dmu_map *map; + int ret = 0; + + down(&dev->mappings->sem); + + map = __dmu_find_mapping(dev->mappings, org); + if (map) { + __dmu_delete_mapping(dev->mappings, map); + ret = 1; + } + + up(&dev->mappings->sem); + + return ret; +} + +static unsigned int __destroy_bucket(struct dmu_mappings *m, + unsigned int index) +{ + struct dmu_map *map, *next; + unsigned int count = 0; + + list_for_each_entry_safe(map, next, &m->table[index], list) { + __dmu_delete_mapping(m, map); + count++; + } + + return count; +} + +unsigned int dmu_remove_all_mappings(struct dmu_device *dev) +{ + int i; + unsigned int count = 0; + + down(&dev->mappings->sem); + + for (i = 0; i < dev->mappings->size; i++) { + count += __destroy_bucket(dev->mappings, i); + } + + up(&dev->mappings->sem); + + return count; +} + +int dmu_init_mappings(void) +{ + map_cache = + kmem_cache_create("dm-userspace-mappings", + sizeof(struct dmu_map), + __alignof__ (struct dmu_map), + 0, NULL, NULL); + if (!map_cache) { + DMERR("Failed to allocate map cache"); + return 0; + } + + return 1; +} + +void dmu_cleanup_mappings(void) +{ + kmem_cache_destroy(map_cache); +} + + diff -r 50f87a6ffd94 drivers/md/dm-userspace-chardev.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/drivers/md/dm-userspace-chardev.c Mon Jan 29 14:28:05 2007 -0800 @@ -0,0 +1,765 @@ +/* + * Copyright IBM Corp., 2006 + * Author: Dan Smith <danms@xxxxxxxxxx> + * + * (C) 2006 FUJITA Tomonori <tomof@xxxxxxx> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; under version 2 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#include <linux/spinlock.h> +#include <linux/blkdev.h> +#include <linux/mempool.h> +#include <linux/dm-userspace.h> +#include <linux/list.h> +#include <linux/kthread.h> +#include <linux/sched.h> +#include <linux/wait.h> +#include <linux/poll.h> +#include <linux/fs.h> +#include <linux/cdev.h> +#include <asm/uaccess.h> + +#include "dm.h" +#include "dm-bio-list.h" +#include "kcopyd.h" +#include "dm-user.h" + +#define DM_MSG_PREFIX "dm-userspace" + +/* This allows for a cleaner separation between the dm-userspace + * device-mapper target, and the userspace transport used. Right now, + * only a chardev transport exists, but it's possible that there could + * be more in the future + */ +struct dmu_ring { + u32 r_idx; + unsigned long r_pages[DMU_RING_PAGES]; + spinlock_t r_lock; +}; + +struct chardev_transport { + struct cdev cdev; + dev_t ctl_dev; + struct dmu_device *parent; + + struct dmu_ring tx; + struct dmu_ring rx; + + struct task_struct *tx_task; + struct task_struct *rx_task; + + wait_queue_head_t tx_wqueue; + wait_queue_head_t rx_wqueue; + wait_queue_head_t poll_wait; +}; + +static inline void dmu_ring_idx_inc(struct dmu_ring *r) +{ + if (r->r_idx == DMU_MAX_EVENTS - 1) + r->r_idx = 0; + else + r->r_idx++; +} + +static struct dmu_msg *dmu_head_msg(struct dmu_ring *r, u32 idx) +{ + u32 pidx, off; + + pidx = idx / DMU_EVENT_PER_PAGE; + off = idx % DMU_EVENT_PER_PAGE; + + return (struct dmu_msg *) + (r->r_pages[pidx] + sizeof(struct dmu_msg) * off); +} + +static struct dmu_request *find_rx_request(struct dmu_device *dev, + uint64_t id) +{ + struct dmu_request *req, *next, *match = NULL; + int count = 0; + struct list_head *list = &dev->rx_requests[id % DMU_CP_HASH]; + unsigned long flags; + + spin_lock_irqsave(&dev->xmit_lock, flags); + list_for_each_entry_safe(req, next, list, list) { + count++; + if (req->id == id) { + list_del_init(&req->list); + match = req; + atomic_dec(&dev->r_reqs); + break; + } + } + spin_unlock_irqrestore(&dev->xmit_lock, flags); + + return match; +} + +static int have_pending_requests(struct dmu_device *dev) +{ + return atomic_read(&dev->t_reqs) != 0; +} + +static void send_userspace_message(struct dmu_msg *msg, + struct dmu_request *req) +{ + memset(msg, 0, sizeof(*msg)); + + msg->hdr.id = req->id; + + switch (req->type) { + case DM_USERSPACE_MAP_BLOCK_REQ: + msg->hdr.msg_type = req->type; + msg->payload.map_req.org_block = req->u.block; + dmu_cpy_flag(&msg->payload.map_req.flags, + req->flags, DMU_FLAG_WR); + break; + + case DM_USERSPACE_MAP_DONE: + msg->hdr.msg_type = DM_USERSPACE_MAP_DONE; + msg->payload.map_done.id_of_op = req->id; + msg->payload.map_done.org_block = req->u.block; + dmu_cpy_flag(&msg->payload.map_done.flags, + req->flags, DMU_FLAG_WR); + break; + + default: + DMWARN("Unknown outgoing message type %i", req->type); + } + + /* If this request is not on a list (the rx_requests list), + * then it needs to be freed after sending + */ + if (list_empty(&req->list)) { + INIT_WORK(&req->task, endio_worker); + schedule_work(&req->task); + } +} + +static void add_rx_request(struct dmu_request *req) +{ + unsigned long flags; + + spin_lock_irqsave(&req->dev->xmit_lock, flags); + list_add_tail(&req->list, + &req->dev->rx_requests[req->id % DMU_CP_HASH]); + atomic_inc(&req->dev->r_reqs); + spin_unlock_irqrestore(&req->dev->xmit_lock, flags); +} + +struct dmu_request *pluck_next_request(struct dmu_device *dev) +{ + struct dmu_request *req = NULL; + unsigned long flags; + + spin_lock_irqsave(&dev->xmit_lock, flags); + if (!list_empty(&dev->tx_requests)) { + req = list_entry(dev->tx_requests.next, + struct dmu_request, list); + list_del_init(&req->list); + + atomic_dec(&dev->t_reqs); + } + spin_unlock_irqrestore(&dev->xmit_lock, flags); + + if (req && ((req->type == DM_USERSPACE_MAP_BLOCK_REQ) || + (req->type == DM_USERSPACE_MAP_DONE))) + add_rx_request(req); + + return req; +} + +static struct dmu_msg *get_tx_msg(struct dmu_ring *ring) +{ + struct dmu_msg *msg; + unsigned long flags; + + spin_lock_irqsave(&ring->r_lock, flags); + msg = dmu_head_msg(ring, ring->r_idx); + if (msg->hdr.status) + msg = NULL; + else + dmu_ring_idx_inc(ring); + spin_unlock_irqrestore(&ring->r_lock, flags); + + return msg; +} + +static void send_tx_request(struct dmu_msg *msg, struct dmu_request *req) +{ + struct chardev_transport *t = req->dev->transport_private; + + send_userspace_message(msg, req); + msg->hdr.status = 1; + mb(); + flush_dcache_page(virt_to_page(msg)); + wake_up_interruptible(&t->poll_wait); +} + +/* Add a request to a device's request queue */ +void add_tx_request(struct dmu_device *dev, struct dmu_request *req) +{ + unsigned long flags; + struct chardev_transport *t = dev->transport_private; + struct dmu_ring *ring = &t->tx; + struct dmu_msg *msg; + + BUG_ON(!list_empty(&req->list)); + + msg = get_tx_msg(ring); + + if (msg) { + add_rx_request(req); + send_tx_request(msg, req); + } else { + spin_lock_irqsave(&dev->xmit_lock, flags); + list_add_tail(&req->list, &dev->tx_requests); + atomic_inc(&dev->t_reqs); + spin_unlock_irqrestore(&dev->xmit_lock, flags); + + wake_up_interruptible(&t->tx_wqueue); + } +} + +static int dmu_txd(void *data) +{ + + struct dmu_device *dev = data; + struct chardev_transport *t = dev->transport_private; + struct dmu_ring *ring = &t->tx; + struct dmu_request *req = NULL; + struct dmu_msg *msg; + + while (!kthread_should_stop()) { + msg = dmu_head_msg(ring, ring->r_idx); + + wait_event_interruptible(t->tx_wqueue, + (!msg->hdr.status && + have_pending_requests(dev)) || + kthread_should_stop()); + + if (kthread_should_stop()) + break; + + msg = get_tx_msg(ring); + if (!msg) + continue; + + req = pluck_next_request(dev); + BUG_ON(!req); + + send_tx_request(msg, req); + } + + return 0; +} + +static void flush_block(int read_err, unsigned int write_err, void *data) +{ + struct dmu_request *req = data; + + if (read_err || write_err) { + DMERR("Failed to copy block!"); + bio_io_error(req->bio, req->bio->bi_size); + return; + } + + atomic_inc(&req->dev->f_reqs); + generic_make_request(req->bio); +} + +static void copy_block(struct dmu_device *dev, + struct block_device *src_dev, + struct block_device *dst_dev, + struct dmu_request *req, + uint64_t org_block, + uint64_t new_block, + int64_t offset) +{ + struct io_region src, dst; + + src.bdev = src_dev; + src.sector = dmu_sector(dev, org_block); + src.count = dev->block_size; + + dst.bdev = dst_dev; + dst.sector = dmu_sector(dev, new_block); + dst.sector += offset; + dst.count = dev->block_size; + + kcopyd_copy(dev->kcopy, &src, 1, &dst, 0, flush_block, req); +} + +static void map_worker(struct work_struct *work) +{ + struct dmu_request *req; + struct dmu_msg_map_response *msg; + struct dmu_device *dev; + struct target_device *src_dev, *dst_dev; + + req = container_of(work, struct dmu_request, task); + msg = &req->response; + dev = req->dev; + + if (dmu_get_flag(&msg->flags, DMU_FLAG_COPY_FIRST)) { + src_dev = find_target(dev, MKDEV(msg->src_maj, msg->src_min)); + if (!src_dev) { + DMERR("Failed to find src device %i:%i\n", + msg->src_maj, msg->src_min); + goto fail; + } + } else + src_dev = NULL; + + dst_dev = find_target(dev, MKDEV(msg->dst_maj, msg->dst_min)); + if (!dst_dev) { + DMERR("Failed to find dest device %i:%i\n", + msg->dst_maj, msg->dst_min); + goto fail; + } + + req->target_dev = dst_dev->bdev; + + /* Remap the bio */ + req->bio->bi_sector = dmu_sector(dev, msg->new_block) + + dmu_sector_offset(dev, req->bio->bi_sector) + + msg->offset; + req->bio->bi_bdev = dst_dev->bdev; + + dmu_cpy_flag(&req->flags, msg->flags, DMU_FLAG_SYNC); + + if (dmu_get_flag(&msg->flags, DMU_FLAG_COPY_FIRST)) + copy_block(dev, src_dev->bdev, dst_dev->bdev, req, + req->u.block, msg->new_block, + msg->offset); + else + flush_block(0, 0, req); + + return; + + fail: + bio_io_error(req->bio, req->bio->bi_size); +} + +static void do_make_mapping(struct dmu_device *dev, + struct dmu_msg_make_mapping *msg) +{ + struct target_device *target; + + target = find_target(dev, MKDEV(msg->dev_maj, msg->dev_min)); + if (!target) { + DMERR("Failed to find target device %i:%i\n", + msg->dev_maj, msg->dev_min); + return; + } + + dmu_make_mapping(dev, + msg->org_block, msg->new_block, msg->offset, + target->bdev, dmu_get_flag(&msg->flags, DMU_FLAG_WR)); + +} + +static void do_kill_mapping(struct dmu_device *dev, + struct dmu_msg_make_mapping *msg) +{ + if (!dmu_remove_mapping(dev, msg->org_block)) + DMERR("Tried to remove non-existent mapping for %llu", + msg->org_block); +} + +static void do_map_bio(struct dmu_device *dev, + struct dmu_msg_map_response *msg) +{ + struct dmu_request *req; + + req = find_rx_request(dev, msg->id_of_req); + if (!req) { + DMERR("Unable to complete unknown map: %llu\n", + (unsigned long long) msg->id_of_req); + return; + } + + memcpy(&req->response, msg, sizeof(req->response)); + + INIT_WORK(&req->task, map_worker); + schedule_work(&req->task); +} + +static void do_map_done(struct dmu_device *dev, uint64_t id_of_op, int fail) +{ + struct dmu_request *req; + + req = find_rx_request(dev, id_of_op); + if (!req) { + DMERR("Unable to complete unknown request: %llu\n", + (unsigned long long) id_of_op); + return; + } + + dmu_clr_flag(&req->flags, DMU_FLAG_SYNC); + + req->bio->bi_end_io(req->bio, req->bio->bi_size, fail); +} + +static void do_map_failed(struct dmu_device *dev, uint64_t id_of_op) +{ + struct dmu_request *req; + + req = find_rx_request(dev, id_of_op); + if (!req) { + DMERR("Unable to fail unknown request: %llu\n", + (unsigned long long) id_of_op); + return; + } + + DMERR("Userspace failed to map id %llu (sector %llu)", + (unsigned long long) id_of_op, + (unsigned long long) req->bio->bi_sector); + + bio_io_error(req->bio, req->bio->bi_size); + + mempool_free(req, request_pool); +} + +static int dmu_rxd(void *data) +{ + struct dmu_device *dev = (struct dmu_device *) data; + struct chardev_transport *t = dev->transport_private; + struct dmu_ring *ring = &t->rx; + struct dmu_msg *msg; + + while (!kthread_should_stop()) { + msg = dmu_head_msg(ring, ring->r_idx); + /* do we need this? */ + flush_dcache_page(virt_to_page(msg)); + + wait_event_interruptible(t->rx_wqueue, msg->hdr.status || + kthread_should_stop()); + + if (kthread_should_stop()) + break; + + switch (msg->hdr.msg_type) { + case DM_USERSPACE_MAP_BLOCK_RESP: + do_map_bio(dev, &msg->payload.map_rsp); + break; + + case DM_USERSPACE_MAP_FAILED: + do_map_failed(dev, msg->payload.map_rsp.id_of_req); + break; + + case DM_USERSPACE_MAP_DONE: + do_map_done(dev, msg->payload.map_done.id_of_op, 0); + break; + + case DM_USERSPACE_MAP_DONE_FAILED: + do_map_done(dev, msg->payload.map_done.id_of_op, 1); + break; + + case DM_USERSPACE_MAKE_MAPPING: + do_make_mapping(dev, &msg->payload.make_mapping); + break; + + case DM_USERSPACE_KILL_MAPPING: + do_kill_mapping(dev, &msg->payload.make_mapping); + break; + + default: + DMWARN("Unknown incoming request type: %i", + msg->hdr.msg_type); + } + + msg->hdr.status = 0; + dmu_ring_idx_inc(ring); + } + + return 0; +} + +ssize_t dmu_ctl_write(struct file *file, const char __user *buffer, + size_t size, loff_t *offset) +{ + struct dmu_device *dev = (struct dmu_device *)file->private_data; + struct chardev_transport *t = dev->transport_private; + + wake_up(&t->tx_wqueue); + wake_up(&t->rx_wqueue); + return size; +} + +static void dmu_ring_free(struct dmu_ring *r) +{ + int i; + for (i = 0; i < DMU_RING_PAGES; i++) { + if (!r->r_pages[i]) + break; + free_page(r->r_pages[i]); + r->r_pages[i] = 0; + } +} + +static int dmu_ring_alloc(struct dmu_ring *r) +{ + int i; + + r->r_idx = 0; + spin_lock_init(&r->r_lock); + + for (i = 0; i < DMU_RING_PAGES; i++) { + r->r_pages[i] = get_zeroed_page(GFP_KERNEL); + if (!r->r_pages[i]) + return -ENOMEM; + } + return 0; +} + +int dmu_ctl_open(struct inode *inode, struct file *file) +{ + int ret; + struct chardev_transport *t; + struct dmu_device *dev; + + if (!capable(CAP_SYS_ADMIN)) + return -EACCES; + + t = container_of(inode->i_cdev, struct chardev_transport, cdev); + dev = t->parent; + + init_waitqueue_head(&t->poll_wait); + init_waitqueue_head(&t->tx_wqueue); + init_waitqueue_head(&t->rx_wqueue); + + ret = dmu_ring_alloc(&t->tx); + if (ret) + return -ENOMEM; + + ret = dmu_ring_alloc(&t->rx); + if (ret) + goto free_tx; + + t->tx_task = kthread_run(dmu_txd, dev, "%s_tx", DM_MSG_PREFIX); + if (!t->tx_task) + goto free_rx; + + t->rx_task = kthread_run(dmu_rxd, dev, "%s_rx", DM_MSG_PREFIX); + if (!t->rx_task) { + ret = -ENOMEM; + goto destroy_tx_task; + } + + get_dev(dev); + + file->private_data = dev; + + return 0; +destroy_tx_task: + kthread_stop(t->tx_task); +free_rx: + dmu_ring_free(&t->rx); +free_tx: + dmu_ring_free(&t->tx); + return ret; +} + +int dmu_ctl_release(struct inode *inode, struct file *file) +{ + struct dmu_device *dev = (struct dmu_device *)file->private_data; + struct chardev_transport *t = dev->transport_private; + + kthread_stop(t->rx_task); + kthread_stop(t->tx_task); + + dmu_ring_free(&t->rx); + dmu_ring_free(&t->tx); + + put_dev(dev); + + /* Stop taking requests when there is no userspace to service them */ + dev->request_slots = 0; + + return 0; +} + +unsigned dmu_ctl_poll(struct file *file, poll_table *wait) +{ + struct dmu_device *dev = (struct dmu_device *)file->private_data; + struct chardev_transport *t = dev->transport_private; + struct dmu_ring *ring = &t->tx; + struct dmu_msg *msg; + unsigned mask = 0; + u32 idx; + unsigned long flags; + + poll_wait(file, &t->poll_wait, wait); + + spin_lock_irqsave(&ring->r_lock, flags); + + idx = ring->r_idx ? ring->r_idx - 1 : DMU_MAX_EVENTS - 1; + msg = dmu_head_msg(ring, idx); + if (msg->hdr.status) + mask |= POLLIN | POLLRDNORM; + + spin_unlock_irqrestore(&ring->r_lock, flags); + + return mask; +} + +static int dmu_ring_map(struct vm_area_struct *vma, unsigned long addr, + struct dmu_ring *ring) +{ + int i, err; + + for (i = 0; i < DMU_RING_PAGES; i++) { + struct page *page = virt_to_page(ring->r_pages[i]); + err = vm_insert_page(vma, addr, page); + if (err) + return err; + addr += PAGE_SIZE; + } + + return 0; +} + +static int dmu_ctl_mmap(struct file *file, struct vm_area_struct *vma) +{ + struct dmu_device *dev = (struct dmu_device *)file->private_data; + struct chardev_transport *t = dev->transport_private; + unsigned long addr; + int err; + + if (vma->vm_pgoff) + return -EINVAL; + + if (vma->vm_end - vma->vm_start != DMU_RING_SIZE * 2) { + DMERR("mmap size must be %lu, not %lu \n", + DMU_RING_SIZE * 2, vma->vm_end - vma->vm_start); + return -EINVAL; + } + + addr = vma->vm_start; + err = dmu_ring_map(vma, addr, &t->tx); + if (err) + return err; + err = dmu_ring_map(vma, addr + DMU_RING_SIZE, &t->rx); + + /* Open the gates and wake anyone waiting */ + /* FIXME: Magic number */ + dev->request_slots = 20000; + wake_up_interruptible(&dev->lowmem); + + return err; +} + +static struct file_operations ctl_fops = { + .open = dmu_ctl_open, + .release = dmu_ctl_release, + .write = dmu_ctl_write, + .mmap = dmu_ctl_mmap, + .poll = dmu_ctl_poll, + .owner = THIS_MODULE, +}; + +static int get_free_minor(void) +{ + struct dmu_device *dev; + int minor = 0; + + spin_lock(&devices_lock); + + while (1) { + list_for_each_entry(dev, &devices, list) { + struct chardev_transport *t = dev->transport_private; + if (MINOR(t->ctl_dev) == minor) + goto dupe; + } + break; + dupe: + minor++; + } + + spin_unlock(&devices_lock); + + return minor; +} + +int register_chardev_transport(struct dmu_device *dev) +{ + struct chardev_transport *t; + int ret; + + dev->transport_private = kmalloc(sizeof(struct chardev_transport), + GFP_KERNEL); + t = dev->transport_private; + + if (!t) { + DMERR("Failed to allocate chardev transport"); + goto bad; + } + + t->ctl_dev = MKDEV(MAJOR(dmu_dev), get_free_minor()); + t->parent = dev; + + cdev_init(&t->cdev, &ctl_fops); + t->cdev.owner = THIS_MODULE; + t->cdev.ops = &ctl_fops; + + ret = cdev_add(&t->cdev, t->ctl_dev, 1); + if (ret < 0) { + DMERR("Failed to register control device %d:%d", + MAJOR(t->ctl_dev), MINOR(t->ctl_dev)); + goto bad; + } + + return 1; + + bad: + kfree(t); + return 0; +} + +void unregister_chardev_transport(struct dmu_device *dev) +{ + struct chardev_transport *t = dev->transport_private; + + cdev_del(&t->cdev); + kfree(t); +} + +int init_chardev_transport(void) +{ + int r; + + r = alloc_chrdev_region(&dmu_dev, 0, 10, "dm-userspace"); + if (r) { + DMERR("Failed to allocate chardev region"); + return 0; + } else + return 1; +} + +void cleanup_chardev_transport(void) +{ + unregister_chrdev_region(dmu_dev, 10); +} + +void write_chardev_transport_info(struct dmu_device *dev, + char *buf, unsigned int maxlen) +{ + struct chardev_transport *t = dev->transport_private; + + snprintf(buf, maxlen, "%x:%x", + MAJOR(t->ctl_dev), MINOR(t->ctl_dev)); +} diff -r 50f87a6ffd94 drivers/md/dm-userspace.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/drivers/md/dm-userspace.c Mon Jan 29 14:28:05 2007 -0800 @@ -0,0 +1,568 @@ +/* + * Copyright IBM Corp., 2006 + * Author: Dan Smith <danms@xxxxxxxxxx> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; under version 2 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#include <linux/module.h> +#include <linux/init.h> +#include <linux/blkdev.h> +#include <linux/bio.h> +#include <linux/slab.h> +#include <linux/spinlock.h> +#include <linux/list.h> +#include <linux/fs.h> +#include <linux/cdev.h> +#include <linux/types.h> +#include <linux/poll.h> + +#include <linux/dm-userspace.h> + +#include "dm.h" +#include "dm-bio-list.h" +#include "kcopyd.h" +#include "dm-user.h" + +#define DMU_COPY_PAGES 256 + +#define DM_MSG_PREFIX "dm-userspace" + +struct kmem_cache *request_cache; +mempool_t *request_pool; + +spinlock_t devices_lock; +LIST_HEAD(devices); + +/* Device number for the control device */ +dev_t dmu_dev; + +void endio_worker(struct work_struct *work) +{ + struct dmu_request *req; + struct dmu_device *dev; + + req = container_of(work, struct dmu_request, task); + dev = req->dev; + + spin_lock(&dev->lock); + if (list_empty(&req->list) && list_empty(&req->copy)) { + mempool_free(req, request_pool); + atomic_dec(&dev->f_reqs); + atomic_dec(&dev->total); + wake_up_interruptible(&dev->lowmem); + } else { + PREPARE_WORK(&req->task, endio_worker); + schedule_work(&req->task); + } + spin_unlock(&dev->lock); +} + +/* Return an already-bound target device */ +struct target_device *find_target(struct dmu_device *dev, + dev_t devno) +{ + struct target_device *target, *match = NULL; + + spin_lock(&dev->lock); + list_for_each_entry(target, &dev->target_devs, list) { + if (target->bdev->bd_dev == devno) { + match = target; + break; + } + } + spin_unlock(&dev->lock); + + return match; +} + +/* Find a new target device and bind it to our device */ +static struct target_device *get_target(struct dmu_device *dev, + dev_t devno) +{ + struct target_device *target; + struct block_device *bdev; + + target = find_target(dev, devno); + if (target) + return target; + + bdev = open_by_devnum(devno, FMODE_READ | FMODE_WRITE); + if (IS_ERR(bdev)) { + DMERR("Unable to lookup device %x", devno); + return NULL; + } + + target = kmalloc(sizeof(*target), GFP_KERNEL); + if (!target) { + DMERR("Unable to alloc new target device"); + return NULL; + } + + target->bdev = bdev; + INIT_LIST_HEAD(&target->list); + + if (in_interrupt()) + DMERR("%s in irq\n", __FUNCTION__); + + spin_lock(&dev->lock); + list_add_tail(&target->list, &dev->target_devs); + spin_unlock(&dev->lock); + + return target; +} + +/* Caller must hold dev->lock */ +static void put_target(struct dmu_device *dev, + struct target_device *target) +{ + list_del(&target->list); + + bd_release(target->bdev); + blkdev_put(target->bdev); + + kfree(target); +} + +void destroy_dmu_device(struct kref *ref) +{ + struct dmu_device *dev; + struct list_head *cursor, *next; + int i; + + dev = container_of(ref, struct dmu_device, users); + + spin_lock(&devices_lock); + list_del(&dev->list); + spin_unlock(&devices_lock); + + list_for_each_safe(cursor, next, &dev->target_devs) { + struct target_device *target; + + target = list_entry(cursor, + struct target_device, + list); + + put_target(dev, target); + } + + list_for_each_safe(cursor, next, &dev->tx_requests) { + struct dmu_request *req; + + req = list_entry(cursor, + struct dmu_request, + list); + + DMERR("Failing unsent bio"); + bio_io_error(req->bio, req->bio->bi_size); + + list_del(&req->list); + + mempool_free(req, request_pool); + } + + for (i = 0; i < DMU_CP_HASH; i++) { + list_for_each_safe(cursor, next, &dev->rx_requests[i]) { + struct dmu_request *req; + + req = list_entry(cursor, + struct dmu_request, + list); + + DMERR("Failing bio"); + req->flags = 0; + bio_io_error(req->bio, req->bio->bi_size); + + list_del(&req->list); + + mempool_free(req, request_pool); + } + } + + dmu_remove_all_mappings(dev); + + kcopyd_client_destroy(dev->kcopy); + unregister_chardev_transport(dev); + + kfree(dev); +} + +static int init_dmu_device(struct dmu_device *dev, u32 block_size) +{ + int ret, i; + + init_waitqueue_head(&dev->lowmem); + INIT_LIST_HEAD(&dev->list); + INIT_LIST_HEAD(&dev->target_devs); + kref_init(&dev->users); + spin_lock_init(&dev->lock); + spin_lock_init(&dev->xmit_lock); + + INIT_LIST_HEAD(&dev->tx_requests); + + dev->rx_requests = kmalloc(sizeof(struct list_head) * DMU_CP_HASH, + GFP_KERNEL); + if (!dev->rx_requests) { + DMERR("Failed to alloc RX hash\n"); + return 0; + } + + for (i = 0; i < DMU_CP_HASH; i++) + INIT_LIST_HEAD(&dev->rx_requests[i]); + + dev->block_size = block_size; + dev->block_mask = block_size - 1; + dev->block_shift = ffs(block_size) - 1; + + atomic_set(&dev->t_reqs, 0); + atomic_set(&dev->r_reqs, 0); + atomic_set(&dev->f_reqs, 0); + atomic_set(&dev->total, 0); + atomic_set(&dev->idcounter, 0); + + dmu_alloc_mappings(&dev->mappings, 2048); + + ret = kcopyd_client_create(DMU_COPY_PAGES, &dev->kcopy); + if (ret) { + DMERR("Failed to initialize kcopyd client"); + return 0; + } + + dev->request_slots = 0; /* Unable to queue reqs right away */ + + return 1; +} + +static struct dmu_device *new_dmu_device(char *key, + struct dm_target *ti, + u32 block_size) +{ + struct dmu_device *dev; + int ret; + + dev = kmalloc(sizeof(*dev), GFP_KERNEL); + if (!dev) { + DMERR("Failed to allocate new userspace device"); + return NULL; + } + + if (!init_dmu_device(dev, block_size)) + goto bad1; + + snprintf(dev->key, DMU_KEY_LEN, "%s", key); + + ret = register_chardev_transport(dev); + if (!ret) + goto bad2; + + spin_lock(&devices_lock); + list_add(&dev->list, &devices); + spin_unlock(&devices_lock); + + return dev; + + bad2: + put_dev(dev); + bad1: + kfree(dev); + DMERR("Failed to create device"); + return NULL; +} + +static struct dmu_device *find_dmu_device(const char *key) +{ + struct dmu_device *dev; + struct dmu_device *match = NULL; + + spin_lock(&devices_lock); + + list_for_each_entry(dev, &devices, list) { + spin_lock(&dev->lock); + if (strncmp(dev->key, key, DMU_KEY_LEN) == 0) { + match = dev; + spin_unlock(&dev->lock); + break; + } + spin_unlock(&dev->lock); + } + + spin_unlock(&devices_lock); + + return match; +} + +static int dmu_ctr(struct dm_target *ti, unsigned int argc, char **argv) +{ + uint64_t block_size; + struct dmu_device *dev; + char *device_key; + char *block_size_param; + int target_idx = 2; + + if (argc < 3) { + ti->error = "Invalid argument count"; + return -EINVAL; + } + + device_key = argv[0]; + block_size_param = argv[1]; + + block_size = simple_strtoul(block_size_param, NULL, 10) / 512; + + dev = find_dmu_device(device_key); + if (!dev) { + dev = new_dmu_device(device_key, ti, block_size); + if (!dev) { + ti->error = "Failed to create device"; + goto bad; + } + } else + get_dev(dev); + + spin_lock(&dev->lock); + if (dev->block_size != block_size) { + ti->error = "Invalid block size"; + goto bad; + } + spin_unlock(&dev->lock); + + /* Resolve target devices */ + do { + int maj, min; + sscanf(argv[target_idx], "%i:%i", &maj, &min); + if (!get_target(dev, MKDEV(maj, min))) { + DMERR("Failed to find target device %i:%i (%s)", + maj, min, argv[target_idx]); + goto out; + } + } while (++target_idx < argc); + + ti->private = dev; + ti->split_io = block_size; + + return 0; + + bad: + if (dev) + spin_unlock(&dev->lock); + out: + if (dev) + put_dev(dev); + + return -EINVAL; +} + +static void dmu_dtr(struct dm_target *ti) +{ + struct dmu_device *dev = (struct dmu_device *) ti->private; + + put_dev(dev); +} + +static void init_req(struct dmu_device *dev, + struct bio *bio, + struct dmu_request *req) +{ + req->id = (uint64_t) atomic_add_return(1, &dev->idcounter); + + req->type = DM_USERSPACE_MAP_BLOCK_REQ; + req->dev = dev; + req->bio = bio; + req->u.block = dmu_block(dev, bio->bi_sector); + req->flags = 0; + INIT_LIST_HEAD(&req->deps); + INIT_LIST_HEAD(&req->list); + INIT_LIST_HEAD(&req->copy); + + if (bio_rw(bio)) + dmu_set_flag(&req->flags, DMU_FLAG_WR); +} + +static int dmu_map(struct dm_target *ti, struct bio *bio, + union map_info *map_context) +{ + struct dmu_device *dev = (struct dmu_device *) ti->private; + struct dmu_request *req; + + if (unlikely(bio_barrier(bio))) { + DMINFO("Refusing bio barrier\n"); + return -EOPNOTSUPP; + } + + if (dmu_map_from_mappings(dev, bio)) { + map_context->ptr = NULL; + return 1; + } + + wait_event_interruptible(dev->lowmem, + atomic_read(&dev->total) < + dev->request_slots); + + req = mempool_alloc(request_pool, GFP_NOIO); + if (!req) { + DMERR("Failed to alloc request"); + return -1; + } + + atomic_inc(&dev->total); + + map_context->ptr = req; + + init_req(dev, bio, req); + + add_tx_request(dev, req); + + return 0; +} + +static int dmu_status(struct dm_target *ti, status_type_t type, + char *result, unsigned int maxlen) +{ + struct dmu_device *dev = (struct dmu_device *) ti->private; + + switch (type) { + case STATUSTYPE_INFO: + write_chardev_transport_info(dev, result, maxlen); + break; + + case STATUSTYPE_TABLE: + snprintf(result, maxlen, "%s %llu", + dev->key, + (unsigned long long) dev->block_size * 512); + break; + } + + return 0; +} + +static int dmu_end_io(struct dm_target *ti, struct bio *bio, + int error, union map_info *map_context) +{ + struct dmu_request *req = map_context->ptr; + int ret = 0; + + if (error) + return -1; + + if (!req) + return 0; + + if (dmu_get_flag(&req->flags, DMU_FLAG_SYNC)) { + req->type = DM_USERSPACE_MAP_DONE; + add_tx_request(req->dev, req); + ret = 1; + } else { + INIT_WORK(&req->task, endio_worker); + schedule_work(&req->task); + } + + return ret; +} + +struct target_type userspace_target = { + .name = "userspace", + .version = {0, 1, 0}, + .module = THIS_MODULE, + .ctr = dmu_ctr, + .dtr = dmu_dtr, + .map = dmu_map, + .status = dmu_status, + .end_io = dmu_end_io +}; + +int __init dm_userspace_init(void) +{ + int r = dm_register_target(&userspace_target); + if (r < 0) { + DMERR("Register failed %d", r); + return 0; + } + + spin_lock_init(&devices_lock); + + request_cache = + kmem_cache_create("dm-userspace-requests", + sizeof(struct dmu_request), + __alignof__ (struct dmu_request), + 0, NULL, NULL); + if (!request_cache) { + DMERR("Failed to allocate request cache"); + goto bad; + } + + request_pool = mempool_create(64, + mempool_alloc_slab, mempool_free_slab, + request_cache); + if (!request_pool) { + DMERR("Failed to allocate request pool"); + goto bad2; + } + + r = dmu_init_mappings(); + if (!r) + goto bad3; + + r = init_chardev_transport(); + if (!r) + goto bad4; + + return 1; + bad4: + dmu_cleanup_mappings(); + bad3: + mempool_destroy(request_pool); + bad2: + kmem_cache_destroy(request_cache); + bad: + dm_unregister_target(&userspace_target); + + return 0; +} + +void __exit dm_userspace_exit(void) +{ + int r; + struct list_head *cursor, *next; + struct dmu_device *dev; + + spin_lock(&devices_lock); + + list_for_each_safe(cursor, next, &devices) { + dev = list_entry(cursor, struct dmu_device, list); + list_del(cursor); + destroy_dmu_device(&dev->users); + DMERR("Destroying hanging device %s", dev->key); + } + + spin_unlock(&devices_lock); + + cleanup_chardev_transport(); + + mempool_destroy(request_pool); + kmem_cache_destroy(request_cache); + + dmu_cleanup_mappings(); + + r = dm_unregister_target(&userspace_target); + if (r < 0) + DMERR("unregister failed %d", r); +} + +module_init(dm_userspace_init); +module_exit(dm_userspace_exit); + +MODULE_DESCRIPTION(DM_NAME " userspace target"); +MODULE_AUTHOR("Dan Smith"); +MODULE_LICENSE("GPL"); diff -r 50f87a6ffd94 include/linux/dm-userspace.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/include/linux/dm-userspace.h Mon Jan 29 14:28:05 2007 -0800 @@ -0,0 +1,123 @@ +/* + * Copyright IBM Corp., 2006 + * Author: Dan Smith <danms@xxxxxxxxxx> + * + * This file is released under the LGPL + * + */ + +#ifndef __DM_USERSPACE_H +#define __DM_USERSPACE_H + +#include <linux/types.h> + +/* + * Message Types + */ +#define DM_USERSPACE_MAP_BLOCK_REQ 1 +#define DM_USERSPACE_MAP_BLOCK_RESP 2 +#define DM_USERSPACE_MAP_FAILED 3 +#define DM_USERSPACE_MAP_DONE 4 +#define DM_USERSPACE_MAP_DONE_FAILED 5 +#define DM_USERSPACE_MAKE_MAPPING 6 +#define DM_USERSPACE_KILL_MAPPING 7 + +/* + * Flags and associated macros + */ +#define DMU_FLAG_VALID 1 +#define DMU_FLAG_WR 2 +#define DMU_FLAG_COPY_FIRST 4 +#define DMU_FLAG_SYNC 8 + +static inline int dmu_get_flag(uint32_t *flags, uint32_t flag) +{ + return (*flags & flag) != 0; +} + +static inline void dmu_set_flag(uint32_t *flags, uint32_t flag) +{ + *flags |= flag; +} + +static inline void dmu_clr_flag(uint32_t *flags, uint32_t flag) +{ + *flags &= (~flag); +} + +static inline void dmu_cpy_flag(uint32_t *flags, uint32_t src, uint32_t flag) +{ + *flags = (*flags & ~flag) | (src & flag); +} + +/* + * This message header is sent in front of every message, in both + * directions + */ +struct dmu_msg_header { + uint64_t id; + uint32_t msg_type; + uint32_t payload_len; + uint32_t status; + uint32_t padding; +}; + +/* DM_USERSPACE_MAP_DONE + * DM_USERSPACE_MAP_DONE_FAILED + */ +struct dmu_msg_map_done { + uint64_t id_of_op; + uint64_t org_block; + uint32_t flags; +}; + +/* DM_USERSPACE_MAP_BLOCK_REQ */ +struct dmu_msg_map_request { + uint64_t org_block; + + uint32_t flags; +}; + +struct dmu_msg_make_mapping { + uint64_t org_block; + uint64_t new_block; + int64_t offset; + uint32_t dev_maj; + uint32_t dev_min; + uint32_t flags; +}; + +/* DM_USERSPACE_MAP_BLOCK_RESP + * DM_USERSPACE_MAP_BLOCK_FAILED + */ +struct dmu_msg_map_response { + uint64_t new_block; + int64_t offset; + + uint64_t id_of_req; + uint32_t flags; + + uint32_t src_maj; + uint32_t src_min; + + uint32_t dst_maj; + uint32_t dst_min; +}; + +/* A full message */ +struct dmu_msg { + struct dmu_msg_header hdr; + union { + struct dmu_msg_map_done map_done; + struct dmu_msg_map_request map_req; + struct dmu_msg_map_response map_rsp; + struct dmu_msg_make_mapping make_mapping; + } payload; +}; + +#define DMU_RING_SIZE (1UL << 16) +#define DMU_RING_PAGES (DMU_RING_SIZE >> PAGE_SHIFT) +#define DMU_EVENT_PER_PAGE (PAGE_SIZE / sizeof(struct dmu_msg)) +#define DMU_MAX_EVENTS (DMU_EVENT_PER_PAGE * DMU_RING_PAGES) + +#endif
diff -r 0200430c78db configure --- a/configure Thu Jan 25 23:36:05 2007 +0000 +++ b/configure Mon Jan 29 14:32:56 2007 -0800 @@ -310,7 +310,7 @@ ac_includes_default="\ #endif" ac_default_prefix=/usr -ac_subst_vars='SHELL PATH_SEPARATOR PACKAGE_NAME PACKAGE_TARNAME PACKAGE_VERSION PACKAGE_STRING PACKAGE_BUGREPORT exec_prefix prefix program_transform_name bindir sbindir libexecdir datadir sysconfdir sharedstatedir localstatedir libdir includedir oldincludedir infodir mandir build_alias host_alias target_alias DEFS ECHO_C ECHO_N ECHO_T LIBS build build_cpu build_vendor build_os host host_cpu host_vendor host_os target target_cpu target_vendor target_os AWK CC CFLAGS LDFLAGS CPPFLAGS ac_ct_CC EXEEXT OBJEXT CPP EGREP INSTALL_PROGRAM INSTALL_SCRIPT INSTALL_DATA LN_S SET_MAKE RANLIB ac_ct_RANLIB LIBOBJS MSGFMT usrlibdir JOBS STATIC_LINK OWNER GROUP interface kerneldir missingkernel kernelvsn tmpdir COPTIMISE_FLAG CLDFLAGS LDDEPS LIB_SUFFIX DEBUG DM_LIB_VERSION COMPAT DMIOCTLS LOCALEDIR INTL_PACKAGE INTL DEVICE_UID DEVICE_GID DEVICE_MODE DMEVENTD PKGCONFIG LTLIBOBJS' +ac_subst_vars='SHELL PATH_SEPARATOR PACKAGE_NAME PACKAGE_TARNAME PACKAGE_VERSION PACKAGE_STRING PACKAGE_BUGREPORT exec_prefix prefix program_transform_name bindir sbindir libexecdir datadir sysconfdir sharedstatedir localstatedir libdir includedir oldincludedir infodir mandir build_alias host_alias target_alias DEFS ECHO_C ECHO_N ECHO_T LIBS build build_cpu build_vendor build_os host host_cpu host_vendor host_os target target_cpu target_vendor target_os AWK CC CFLAGS LDFLAGS CPPFLAGS ac_ct_CC EXEEXT OBJEXT CPP EGREP INSTALL_PROGRAM INSTALL_SCRIPT INSTALL_DATA LN_S SET_MAKE RANLIB ac_ct_RANLIB LIBOBJS MSGFMT usrlibdir JOBS STATIC_LINK OWNER GROUP interface kerneldir missingkernel kernelvsn tmpdir COPTIMISE_FLAG CLDFLAGS LDDEPS LIB_SUFFIX DEBUG DM_LIB_VERSION COMPAT DMIOCTLS LOCALEDIR INTL_PACKAGE INTL DEVICE_UID DEVICE_GID DEVICE_MODE DMEVENTD PKGCONFIG DMU LTLIBOBJS' ac_subst_files='' # Initialize some variables set by options. @@ -856,6 +856,7 @@ Optional Features: statically. Default is dynamic linking --disable-selinux Disable selinux support --enable-nls Enable Native Language Support + --disable-dmu Disable dm-userspace support Optional Packages: --with-PACKAGE[=ARG] use PACKAGE [ARG=yes] @@ -1445,7 +1446,8 @@ case "$host_os" in LDDEPS="$LDDEPS .export.sym" LIB_SUFFIX="so" DMIOCTLS="yes" - SELINUX="yes" ;; + SELINUX="yes" + DMU="yes" ;; darwin*) CFLAGS="$CFLAGS -no-cpp-precomp -fno-common" COPTIMISE_FLAG="-O2" @@ -1453,7 +1455,8 @@ case "$host_os" in LDDEPS="$LDDEPS" LIB_SUFFIX="dylib" DMIOCTLS="no" - SELINUX="no" ;; + SELINUX="no" + DMU="no" ;; esac ################################################################################ @@ -5963,6 +5966,26 @@ fi fi ################################################################################ +echo "$as_me:$LINENO: checking whether to enable dm-userspace" >&5 +echo $ECHO_N "checking whether to enable dm-userspace... $ECHO_C" >&6 +# Check whether --enable-dmu or --disable-dmu was given. +if test "${enable_dmu+set}" = set; then + enableval="$enable_dmu" + DMU=$enableval +fi; +echo "$as_me:$LINENO: result: $DMU" >&5 +echo "${ECHO_T}$DMU" >&6 + +if test "x${DMU}" = "xyes"; then + if test "x${missingkernel}" = xyes; then + { { echo "$as_me:$LINENO: error: \"Kernel source required to build dm-userspace tools\"" >&5 +echo "$as_me: error: \"Kernel source required to build dm-userspace tools\"" >&2;} + { (exit 1); exit 1; }; } + fi +fi + + +################################################################################ echo "$as_me:$LINENO: checking for kernel version" >&5 echo $ECHO_N "checking for kernel version... $ECHO_C" >&6 @@ -6044,6 +6067,7 @@ _ACEOF ################################################################################ + if test "$DMEVENTD" = yes; then @@ -6799,6 +6823,7 @@ s,@DEVICE_MODE@,$DEVICE_MODE,;t t s,@DEVICE_MODE@,$DEVICE_MODE,;t t s,@DMEVENTD@,$DMEVENTD,;t t s,@PKGCONFIG@,$PKGCONFIG,;t t +s,@DMU@,$DMU,;t t s,@LTLIBOBJS@,$LTLIBOBJS,;t t CEOF diff -r 0200430c78db configure.in --- a/configure.in Thu Jan 25 23:36:05 2007 +0000 +++ b/configure.in Mon Jan 29 14:32:56 2007 -0800 @@ -38,7 +38,8 @@ case "$host_os" in LDDEPS="$LDDEPS .export.sym" LIB_SUFFIX="so" DMIOCTLS="yes" - SELINUX="yes" ;; + SELINUX="yes" + DMU="yes" ;; darwin*) CFLAGS="$CFLAGS -no-cpp-precomp -fno-common" COPTIMISE_FLAG="-O2" @@ -46,7 +47,8 @@ case "$host_os" in LDDEPS="$LDDEPS" LIB_SUFFIX="dylib" DMIOCTLS="no" - SELINUX="no" ;; + SELINUX="no" + DMU="no" ;; esac ################################################################################ @@ -296,6 +298,20 @@ else else test -d "${kerneldir}" || { AC_MSG_WARN(kernel dir $kerneldir not found); missingkernel=yes ; } fi + +################################################################################ +dnl -- Disable dm-userspace +AC_MSG_CHECKING(whether to enable dm-userspace) +AC_ARG_ENABLE(dmu, [ --disable-dmu Disable dm-userspace support], +DMU=$enableval) +AC_MSG_RESULT($DMU) + +if test "x${DMU}" = "xyes"; then + if test "x${missingkernel}" = xyes; then + AC_ERROR("Kernel source required to build dm-userspace tools") + fi +fi + ################################################################################ dnl -- Kernel version string @@ -413,6 +429,7 @@ AC_SUBST(DEVICE_MODE) AC_SUBST(DEVICE_MODE) AC_SUBST(DMEVENTD) AC_SUBST(PKGCONFIG) +AC_SUBST(DMU) ################################################################################ dnl -- First and last lines should not contain files to generate in order to diff -r 0200430c78db lib/.exported_symbols --- a/lib/.exported_symbols Thu Jan 25 23:36:05 2007 +0000 +++ b/lib/.exported_symbols Mon Jan 29 14:32:56 2007 -0800 @@ -127,3 +127,26 @@ dm_report_field_uint32 dm_report_field_uint32 dm_report_field_uint64 dm_report_field_set_value +dmu_async_map +dmu_async_map_done +dmu_ctl_close +dmu_ctl_open +dmu_ctl_send_queue +dmu_events_pending +dmu_get_ctl_fd +dmu_kill_mapping +dmu_make_mapping +dmu_map_dup +dmu_map_get_block +dmu_map_get_id +dmu_map_is_write +dmu_map_set_block +dmu_map_set_copy_src_dev +dmu_map_set_dest_dev +dmu_map_set_offset +dmu_map_set_origin_block +dmu_map_set_sync +dmu_map_set_writable +dmu_process_events +dmu_register_map_done_handler +dmu_register_map_handler \ No newline at end of file diff -r 0200430c78db lib/Makefile.in --- a/lib/Makefile.in Thu Jan 25 23:36:05 2007 +0000 +++ b/lib/Makefile.in Mon Jan 29 14:32:56 2007 -0800 @@ -16,6 +16,7 @@ top_srcdir = @top_srcdir@ top_srcdir = @top_srcdir@ VPATH = @srcdir@ interface = @interface@ +kerneldir = @kerneldir@ SOURCES =\ datastruct/bitset.c \ @@ -30,6 +31,11 @@ SOURCES =\ $(interface)/libdm-iface.c INCLUDES = -I$(interface) + +ifeq ("@DMU@", "yes") + INCLUDES += -I$(kerneldir)/include + SOURCES += dmu.c +endif LIB_STATIC = $(interface)/libdevmapper.a diff -r 0200430c78db lib/libdevmapper.h --- a/lib/libdevmapper.h Thu Jan 25 23:36:05 2007 +0000 +++ b/lib/libdevmapper.h Mon Jan 29 14:32:56 2007 -0800 @@ -1,6 +1,7 @@ /* * Copyright (C) 2001-2004 Sistina Software, Inc. All rights reserved. * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. + * Copyright IBM Corp., 2006 * * This file is part of the device-mapper userspace tools. * @@ -27,6 +28,7 @@ #include <limits.h> #include <string.h> #include <stdlib.h> +#include <stdint.h> /***************************************************************** * The first section of this file provides direct access to the @@ -711,4 +713,58 @@ void dm_report_field_set_value(struct dm void dm_report_field_set_value(struct dm_report_field *field, const void *value, const void *sortvalue); + +/************** + * dm-userspace + **************/ + +struct dmu_context; +struct dmu_map_data; + +/* Returns 1 to allow IO to complete, 0 to delay */ +typedef int (*map_done_handler_t)(void *data, struct dmu_map_data *map_data); + +/* Returns 1 to map IO, -1 to fail IO, 0 to delay */ +typedef int (*map_req_handler_t)(void *data, struct dmu_map_data *map_data); + +/* High-level control operations */ +struct dmu_context *dmu_ctl_open(char *dev, int flags); +int dmu_ctl_close(struct dmu_context *ctx); +int dmu_ctl_send_queue(struct dmu_context *ctx); +void dmu_register_map_done_handler(struct dmu_context *ctx, + map_done_handler_t handler, + void *data); +void dmu_register_map_handler(struct dmu_context *ctx, + map_req_handler_t handler, + void *data); +int dmu_invalidate_block(struct dmu_context *ctx, uint64_t block); +int dmu_events_pending(struct dmu_context *ctx, unsigned int msec); +int dmu_process_events(struct dmu_context *ctx); +int dmu_get_ctl_fd(struct dmu_context *ctx); + +/* Map manipulation functions */ +void dmu_map_set_block(struct dmu_map_data *data, uint64_t block); +void dmu_map_set_origin_block(struct dmu_map_data *data, uint64_t block); +uint64_t dmu_map_get_block(struct dmu_map_data *data); +void dmu_map_set_offset(struct dmu_map_data *data, int64_t offset); +uint32_t dmu_map_get_id(struct dmu_map_data *data); +void dmu_map_set_dest_dev(struct dmu_map_data *data, dev_t dev); +void dmu_map_set_copy_src_dev(struct dmu_map_data *data, dev_t dev); +int dmu_map_is_write(struct dmu_map_data *data); +void dmu_map_set_sync(struct dmu_map_data *data); +void dmu_map_set_writable(struct dmu_map_data *data, int rw); +struct dmu_map_data *dmu_map_dup(struct dmu_map_data *data); + +/* Functions for submitting out-of-order events */ +int dmu_async_map(struct dmu_context *ctx, + struct dmu_map_data *data, + int fail); +int dmu_async_map_done(struct dmu_context *ctx, uint64_t id, int fail); + +/* Functions to manipulate the kernel map cache */ +int dmu_make_mapping(struct dmu_context *ctx, + struct dmu_map_data *data); +int dmu_kill_mapping(struct dmu_context *ctx, + struct dmu_map_data *data); + #endif /* LIB_DEVICE_MAPPER_H */ diff -r 0200430c78db lib/dmu.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lib/dmu.c Mon Jan 29 14:32:56 2007 -0800 @@ -0,0 +1,638 @@ +/* + * Copyright IBM Corp., 2006 + * Author: Dan Smith <danms@xxxxxxxxxx> + * + * This file is subject to the terms and conditions of the GNU Lesser + * General Public License. See the file COPYING in the main directory + * of this archive for more details. + * + */ + +#include <stdio.h> +#include <fcntl.h> +#include <linux/fs.h> +#include <sys/stat.h> +#include <sys/sysmacros.h> +#include <errno.h> +#include <stdint.h> +#include <string.h> +#include <unistd.h> +#include <stdlib.h> +#include <libdevmapper.h> +#include <linux/dm-userspace.h> +#include <sys/mman.h> + +#define PAGE_SHIFT 12 +#define PAGE_SIZE (1UL << PAGE_SHIFT) + +#define MAX_MAJ_VER 0 +#define MAX_MIN_VER 1 + +#define DMU_MSG_DEBUG 0 + +struct uring { + uint32_t idx; + char *buf; + int size; +}; + +#if DMU_MSG_DEBUG +#define DPRINTF( s, arg... ) fprintf(stderr, s, ##arg) +#else +#define DPRINTF( s, arg... ) +#endif + +struct dmu_events { + map_done_handler_t map_done_fn; + map_req_handler_t map_fn; +}; + +struct dmu_event_data { + void *map_done_user_data; + void *map_user_data; +}; + +struct dmu_context { + int fd; + uint32_t id_ctr; + struct dmu_events events; + struct dmu_event_data event_data; + + struct uring ukring; + struct uring kuring; + + uint32_t pending; +}; + +struct dmu_map_data { + uint64_t org_block; + uint64_t block; + int64_t offset; + uint32_t id; + uint32_t flags; + dev_t dest_dev; + dev_t copy_src_dev; +}; + +void dmu_map_set_origin_block(struct dmu_map_data *data, uint64_t block) +{ + data->org_block = block; +} + +void dmu_map_set_writable(struct dmu_map_data *data, int rw) +{ + dmu_set_flag(&data->flags, DMU_FLAG_WR); +} + +void dmu_map_set_block(struct dmu_map_data *data, uint64_t block) +{ + data->block = block; +} + +uint64_t dmu_map_get_block(struct dmu_map_data *data) +{ + return data->block; +} + +void dmu_map_set_offset(struct dmu_map_data *data, int64_t offset) +{ + data->offset = offset; +} + +uint32_t dmu_map_get_id(struct dmu_map_data *data) +{ + return data->id; +} + +void dmu_map_set_dest_dev(struct dmu_map_data *data, dev_t dev) +{ + data->dest_dev = dev; +} + +void dmu_map_set_copy_src_dev(struct dmu_map_data *data, dev_t dev) +{ + data->copy_src_dev = dev; + dmu_set_flag(&data->flags, DMU_FLAG_COPY_FIRST); +} + +int dmu_map_is_write(struct dmu_map_data *data) +{ + return dmu_get_flag(&data->flags, DMU_FLAG_WR); +} + +void dmu_map_set_sync(struct dmu_map_data *data) +{ + dmu_set_flag(&data->flags, DMU_FLAG_SYNC); +} + +struct dmu_map_data *dmu_map_dup(struct dmu_map_data *data) +{ + struct dmu_map_data *dup; + + dup = malloc(sizeof(*dup)); + if (!dup) + return NULL; + + if (data) + memcpy(dup, data, sizeof(*dup)); + + return dup; +} + +/* + * Get the major/minor of the character control device that @dm_device + * has exported for us. We do this by looking at the device status + * string. + */ +static int get_dm_control_dev(char *dm_device, + unsigned *maj, unsigned *min) +{ + struct dm_task *task; + int ret; + void *next = NULL; + uint64_t start, length; + char *ttype = NULL, *params = NULL; + + task = dm_task_create(DM_DEVICE_STATUS); + + ret = dm_task_set_name(task, dm_device); + if (!ret) { + DPRINTF("Failed to set device-mapper target name\n"); + dm_task_destroy(task); + return -1; + } + + ret = dm_task_run(task); + if (!ret) { + DPRINTF("Failed to run device-mapper task\n"); + dm_task_destroy(task); + return -1; + } + + ret = 0; + do { + next = dm_get_next_target(task, next, &start, &length, + &ttype, ¶ms); + + if (strcmp(ttype, "userspace") == 0) { + ret = sscanf(params, "%x:%x", maj, min); + if (ret == 2) + break; + } + + } while (next); + + return 0; +} + +/* + * Create the character device node for our control channel + */ +static int make_device_node(unsigned major, unsigned minor) +{ + char path[256]; + + sprintf(path, "/dev/dmu%i", minor); + + return mknod(path, S_IFCHR, makedev(major, minor)); +} + +static char *dmu_get_ctl_device(char *dm_device) +{ + unsigned ctl_major, ctl_minor; + static char path[256]; + + if (get_dm_control_dev(dm_device, &ctl_major, &ctl_minor) < 0) + return NULL; + + if (ctl_major == 0) { + DPRINTF("Unable to get device number\n"); + return NULL; + } + + sprintf(path, "/dev/dmu%i", ctl_minor); + + if (access(path, R_OK | W_OK)) { + if (make_device_node(ctl_major, ctl_minor)) { + DPRINTF("Failed to create device node: %s", + strerror(errno)); + return NULL; + } + } + + return path; +} + +static void dmu_split_dev(dev_t dev, uint32_t *maj, uint32_t *min) +{ + *maj = (dev & 0xFF00) >> 8; + *min = (dev & 0x00FF); +} + +static inline void ring_index_inc(struct uring *ring) +{ + ring->idx = (ring->idx == DMU_MAX_EVENTS - 1) ? 0 : ring->idx + 1; +} + +static inline struct dmu_msg *head_ring_hdr(struct uring *ring) +{ + uint32_t pidx, off, pos; + + pidx = ring->idx / DMU_EVENT_PER_PAGE; + off = ring->idx % DMU_EVENT_PER_PAGE; + pos = pidx * PAGE_SIZE + off * sizeof(struct dmu_msg); + + return (struct dmu_msg *) (ring->buf + pos); +} + +/* Queue a message for sending */ +static int dmu_ctl_queue_msg(struct dmu_context *ctx, int type, void *msgbuf) +{ + struct dmu_msg *msg; + + msg = (struct dmu_msg *)head_ring_hdr(&ctx->ukring); + if (msg->hdr.status) { + DPRINTF("No room in ring, flushing...\n"); + dmu_ctl_send_queue(ctx); + + /* FIXME: Need a better way to wait for space to free up */ + usleep(50000); + + msg = (struct dmu_msg *)head_ring_hdr(&ctx->ukring); + if (msg->hdr.status) { + printf("#################### Still no room!\n"); + return -ENOMEM; + } + } + + msg->hdr.msg_type = type; + msg->hdr.id = ctx->id_ctr++; + + memcpy(&msg->payload, msgbuf, sizeof(msg->payload)); + + ring_index_inc(&ctx->ukring); + msg->hdr.status = 1; + ctx->pending++; + + return 1; +} + +/* Flush queue of messages to the kernel */ +int dmu_ctl_send_queue(struct dmu_context *ctx) +{ + int r; + + DPRINTF("Flushing outgoing queue\n"); + + r = write(ctx->fd, &r, 1); + + ctx->pending = 0; + + return r; +} + +static int check_version(char *dev) +{ + struct dm_task *task; + struct dm_versions *target, *last; + int ret; + + task = dm_task_create(DM_DEVICE_LIST_VERSIONS); + + ret = dm_task_set_name(task, dev); + if (!ret) { + DPRINTF("Failed to set device-mapper target name\n"); + dm_task_destroy(task); + return -1; + } + + ret = dm_task_run(task); + if (!ret) { + DPRINTF("Failed to run device-mapper task\n"); + dm_task_destroy(task); + return -1; + } + + target = dm_task_get_versions(task); + + do { + last = target; + + if (strcmp(target->name, "userspace") == 0) { + DPRINTF("%s version: %i.%i.%i\n", + target->name, + target->version[0], + target->version[1], + target->version[2]); + break; + } + + target = (void *) target + target->next; + } while (last != target); + + if (!target) { + DPRINTF("userspace target not found\n"); + return -1; + } + + if ((target->version[0] == MAX_MAJ_VER) && + (target->version[1] == MAX_MIN_VER)) + return 1; + else + return 0; /* Unsupported */ +} + +struct dmu_context *dmu_ctl_open(char *dev, int flags) +{ + int fd, r; + struct dmu_context *ctx = NULL; + char *ctl_dev; + char *ringbuf; + + r = check_version(dev); + if (r <= 0) { + return NULL; + } + + ctl_dev = dmu_get_ctl_device(dev); + if (ctl_dev == NULL) + return NULL; + else if (access(ctl_dev, R_OK | W_OK)) + return NULL; + + fd = open(ctl_dev, O_RDWR | flags); + if (fd < 0) + goto out; + + ctx = calloc(sizeof(*ctx), 1); + if (!ctx) + goto out; + + ctx->fd = fd; + ctx->id_ctr = 0; + memset(&ctx->events, 0, sizeof(ctx->events)); + memset(&ctx->event_data, 0, sizeof(ctx->event_data)); + + ringbuf = mmap(NULL, DMU_RING_SIZE * 2, PROT_READ | PROT_WRITE, + MAP_SHARED, fd, 0); + if (ringbuf == MAP_FAILED) { + printf("fail to mmap, %m\n"); + return NULL; + } + + ctx->kuring.idx = ctx->ukring.idx = 0; + ctx->kuring.buf = ringbuf; + ctx->ukring.buf = ringbuf + DMU_RING_SIZE; + + return ctx; + + out: + if (ctx) + free(ctx); + + return NULL; +} + +int dmu_ctl_close(struct dmu_context *ctx) +{ + return close(ctx->fd); +} + +void dmu_register_map_done_handler(struct dmu_context *ctx, + map_done_handler_t handler, + void *data) +{ + ctx->events.map_done_fn = handler; + ctx->event_data.map_done_user_data = data; +} + +void dmu_register_map_handler(struct dmu_context *ctx, + map_req_handler_t handler, + void *data) +{ + ctx->events.map_fn = handler; + ctx->event_data.map_user_data = data; +} + +int dmu_make_mapping(struct dmu_context *ctx, + struct dmu_map_data *data) +{ + struct dmu_msg_make_mapping msg; + int r; + + msg.org_block = data->org_block; + msg.new_block = data->block; + msg.offset = data->offset; + dmu_split_dev(data->dest_dev, &msg.dev_maj, &msg.dev_min); + msg.flags = 0; + dmu_cpy_flag(&msg.flags, data->flags, DMU_FLAG_WR); + + r = dmu_ctl_queue_msg(ctx, DM_USERSPACE_MAKE_MAPPING, &msg); + + return r; +} + +int dmu_kill_mapping(struct dmu_context *ctx, + struct dmu_map_data *data) +{ + struct dmu_msg_make_mapping msg; + int r; + + msg.org_block = data->org_block; + + r = dmu_ctl_queue_msg(ctx, DM_USERSPACE_KILL_MAPPING, &msg); + + return r; +} + +int dmu_async_map_done(struct dmu_context *ctx, uint64_t id, int fail) +{ + struct dmu_msg_map_done msg; + int r; + + msg.org_block = 0; + msg.flags = 0; + msg.id_of_op = id; + + if (fail) + r = dmu_ctl_queue_msg(ctx, DM_USERSPACE_MAP_DONE_FAILED, &msg); + else + r = dmu_ctl_queue_msg(ctx, DM_USERSPACE_MAP_DONE, &msg); + + return r; +} + +int dmu_async_map(struct dmu_context *ctx, + struct dmu_map_data *data, + int fail) +{ + struct dmu_msg_map_response msg; + int r; + + msg.new_block = data->block; + msg.offset = data->offset; + msg.flags = data->flags; + msg.id_of_req = data->id; + + dmu_split_dev(data->copy_src_dev, &msg.src_maj, &msg.src_min); + dmu_split_dev(data->dest_dev, &msg.dst_maj, &msg.dst_min); + + if (fail) + r = dmu_ctl_queue_msg(ctx, DM_USERSPACE_MAP_FAILED, &msg); + else + r = dmu_ctl_queue_msg(ctx, DM_USERSPACE_MAP_BLOCK_RESP, &msg); + + return r; +} + +int dmu_events_pending(struct dmu_context *ctx, unsigned int msec) +{ + fd_set fds; + struct timeval tv; + + FD_ZERO(&fds); + FD_SET(ctx->fd, &fds); + + tv.tv_sec = msec / 1000; + tv.tv_usec = (msec % 1000) * 1000; + + if (select(ctx->fd + 1, &fds, NULL, NULL, &tv) < 0) + return 0; + + if (FD_ISSET(ctx->fd, &fds)) + return 1; + else + return 0; +} + +static int fire_map_req_event(struct dmu_context *ctx, + struct dmu_msg_map_request *req, + uint64_t id) +{ + struct dmu_map_data data; + int ret; + + if (!ctx->events.map_fn) + return 1; + + DPRINTF("Map event for %llu %c\n", + req->org_block, + dmu_get_flag(&req->flags, DMU_FLAG_WR) ? 'W':'R'); + + data.block = req->org_block; + data.offset = 0; + data.id = id; + data.flags = req->flags; + data.dest_dev = data.copy_src_dev = 0; + + dmu_clr_flag(&data.flags, DMU_FLAG_COPY_FIRST); + dmu_clr_flag(&data.flags, DMU_FLAG_SYNC); + + ret = ctx->events.map_fn(ctx->event_data.map_user_data, &data); + + if (ret != 0) { + /* If the handler returns 0, we assume they will + * complete the operation later + */ + dmu_async_map(ctx, &data, ret < 0); + DPRINTF("Mapped %llu\n", data.block); + } + + return ret != 0; +} + +static int fire_map_done_event(struct dmu_context *ctx, + struct dmu_msg_map_done *msg, + uint64_t id) +{ + struct dmu_map_data data; + int ret = 1; + + if (ctx->events.map_done_fn) { + data.block = msg->org_block; + data.offset = 0; + data.id = msg->id_of_op; + data.flags = msg->flags; + data.dest_dev = data.copy_src_dev = 0; + + ret = ctx->events.map_done_fn(ctx->event_data.map_done_user_data, + &data); + } + + if (ret > 0) { + /* If the handler returns 0, we assume they will + * complete the operation later + */ + dmu_async_map_done(ctx, msg->id_of_op, ret < 0); + DPRINTF("Completed %llu (%llu)\n", + msg->org_block, msg->id_of_op); + } + + return ret != 0; +} + +static int decode_message(struct dmu_context *ctx, int type, uint64_t id, + uint8_t *msg) +{ + switch (type) { + case DM_USERSPACE_MAP_BLOCK_REQ: + DPRINTF("Request event: %u\n", id); + return fire_map_req_event(ctx, + (struct dmu_msg_map_request *)msg, + id); + case DM_USERSPACE_MAP_DONE: + DPRINTF("Map Done event\n"); + return fire_map_done_event(ctx, + (struct dmu_msg_map_done *)msg, + id); + default: + printf("Unknown message type: %i\n", type); + return -1; /* Unknown message type */ + }; +} + +static int dmu_process_event(struct dmu_context *ctx) +{ + struct dmu_msg *msg; + int ret; + + msg = head_ring_hdr(&ctx->kuring); + if (!msg->hdr.status) + return -1; + + ret = decode_message(ctx, msg->hdr.msg_type, msg->hdr.id, + (uint8_t *)&msg->payload); + + msg->hdr.status = 0; + ring_index_inc(&ctx->kuring); + + return ret; +} + +int dmu_process_events(struct dmu_context *ctx) +{ + int ret, do_flush = 1; + uint32_t count; + + //DPRINTF("Processing events\n"); + + for (count = 0; count < DMU_MAX_EVENTS; count++) { + ret = dmu_process_event(ctx); + + if (ret > 0) + do_flush = 1; + } + + DPRINTF("Pending events: %u\n", ctx->pending); + if (ctx->pending) + dmu_ctl_send_queue(ctx); + + //DPRINTF("Finished processing events\n"); + + return 1; +} + +int dmu_get_ctl_fd(struct dmu_context *ctx) +{ + return ctx->fd; +}
-- Dan Smith IBM Linux Technology Center Open Hypervisor Team email: danms@xxxxxxxxxx
Attachment:
pgp2iVqZnObU3.pgp
Description: PGP signature
-- dm-devel mailing list dm-devel@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/dm-devel