Here is my latest dm-userspace kernel code. This has a lot of cleanups and fixes since the last version. We have been successfully using this version for a while and believe it to be quite stable and well-performing. It is not intended to be in final form, but I think it should be close to functionally complete. I will post the updated userspace code shortly. -- Dan Smith IBM Linux Technology Center Open Hypervisor Team email: danms@xxxxxxxxxx Signed-off-by: Dan Smith <danms@xxxxxxxxxx> diff -Naur linux-2.6.18-orig/drivers/md/dm-user.h linux-2.6.18-dmu/drivers/md/dm-user.h --- linux-2.6.18-orig/drivers/md/dm-user.h 1969-12-31 16:00:00.000000000 -0800 +++ linux-2.6.18-dmu/drivers/md/dm-user.h 2006-09-28 13:49:18.000000000 -0700 @@ -0,0 +1,156 @@ +/* + * Copyright (C) International Business Machines Corp., 2006 + * Author: Dan Smith <danms@xxxxxxxxxx> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; under version 2 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#ifndef __DM_USER_H +#define __DM_USER_H + +#include <linux/hardirq.h> + +#define DMU_KEY_LEN 256 + +extern struct target_type userspace_target; +extern mempool_t *request_pool; +extern dev_t dmu_dev; +extern spinlock_t devices_lock; +extern struct list_head devices; + +#define DMU_CP_HASH 1024 + +/* + * A block device that we can send bios to + */ +struct target_device { + struct list_head list; /* Our place in the targets list */ + struct block_device *bdev; /* The target block_device */ + struct kref users; /* Self-destructing reference count */ +}; + +/* + * A dm-userspace device, which consists of multiple targets sharing a + * common key + */ +struct dmu_device { + struct list_head list; /* Our place in the devices list */ + + spinlock_t lock; /* Protects all the fields below */ + + /* We need to protect the TX list with a separate lock that is + * always used with IRQs disabled because it is locked from + * inside the endio function + */ + spinlock_t tx_lock; + struct list_head tx_requests; /* Requests to send to userspace */ + + struct list_head rx_requests; /* Requests waiting for reply */ + + struct semaphore cp_sem; /* Protection for cp_requests */ + struct list_head cp_requests; /* Requests waiting to be copied */ + + /* Accounting */ + atomic_t t_reqs; /* Waiting to be sent to userspace */ + atomic_t r_reqs; /* Waiting for a response from uspace*/ + atomic_t f_reqs; /* Submitted, waiting for endio */ + atomic_t total; /* Total requests allocated */ + + atomic_t idcounter; /* Counter for making request IDs */ + + struct list_head target_devs; /* List of devices we can target */ + + void *transport_private; /* Private data for userspace comms */ + + char key[DMU_KEY_LEN]; /* Unique name string for device */ + struct kref users; /* Self-destructing reference count */ + + wait_queue_head_t wqueue; /* To block while waiting for reqs */ + wait_queue_head_t lowmem; /* To block while waiting for memory */ + + uint64_t block_size; /* Block size for this device */ + uint64_t block_mask; /* Mask for offset in block */ + unsigned int block_shift; /* Shift to convert to/from block */ + + struct kcopyd_client *kcopy; /* Interface to kcopyd */ +}; + +struct dmu_request { + struct list_head list; /* Our place on the request queue */ + struct list_head copy; /* Our place on the copy list */ + struct dmu_device *dev; /* The DMU device that owns us */ + + int type; /* Type of request */ + uint32_t flags; /* Attribute flags */ + uint64_t id; /* Unique ID for sync with userspace */ + union { + uint64_t block; /* The block in question */ + } u; + + struct list_head deps; /* Requests depending on this one */ + struct bio *bio; /* The bio this request represents */ + + struct work_struct task; /* Async task to run for this req */ + + struct dmu_msg_map_response response; /* FIXME: Clean this up */ +}; + + +/* Find and grab a reference to a target device */ +struct target_device *find_target(struct dmu_device *dev, + dev_t devno); +/* Character device transport functions */ +int register_chardev_transport(struct dmu_device *dev); +void unregister_chardev_transport(struct dmu_device *dev); +int init_chardev_transport(void); +void cleanup_chardev_transport(void); +void write_chardev_transport_info(struct dmu_device *dev, + char *buf, unsigned int maxlen); + +/* Return the block number for @sector */ +static inline u64 dmu_block(struct dmu_device *dev, + sector_t sector) +{ + return sector >> dev->block_shift; +} + +/* Return the sector offset in a block for @sector */ +static inline u64 dmu_sector_offset(struct dmu_device *dev, + sector_t sector) +{ + return sector & dev->block_mask; +} + +/* Return the starting sector for @block */ +static inline u64 dmu_sector(struct dmu_device *dev, + uint64_t block) +{ + return block << dev->block_shift; +} + +/* Increase the usage count for @dev */ +static inline void get_dev(struct dmu_device *dev) +{ + kref_get(&dev->users); +} + +/* Decrease the usage count for @dev */ +void destroy_dmu_device(struct kref *ref); +static inline void put_dev(struct dmu_device *dev) +{ + kref_put(&dev->users, destroy_dmu_device); +} + +#endif diff -Naur linux-2.6.18-orig/drivers/md/dm-userspace.c linux-2.6.18-dmu/drivers/md/dm-userspace.c --- linux-2.6.18-orig/drivers/md/dm-userspace.c 1969-12-31 16:00:00.000000000 -0800 +++ linux-2.6.18-dmu/drivers/md/dm-userspace.c 2006-09-28 13:49:18.000000000 -0700 @@ -0,0 +1,585 @@ +/* + * Copyright (C) International Business Machines Corp., 2006 + * Author: Dan Smith <danms@xxxxxxxxxx> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; under version 2 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#include <linux/module.h> +#include <linux/init.h> +#include <linux/blkdev.h> +#include <linux/bio.h> +#include <linux/slab.h> +#include <linux/spinlock.h> +#include <linux/list.h> +#include <linux/fs.h> +#include <linux/cdev.h> +#include <linux/types.h> +#include <linux/poll.h> + +#include <linux/dm-userspace.h> + +#include "dm.h" +#include "dm-bio-list.h" +#include "kcopyd.h" +#include "dm-user.h" + +#define DMU_COPY_PAGES 256 + +#define DM_MSG_PREFIX "dm-userspace" + +static kmem_cache_t *request_cache; +mempool_t *request_pool; + +spinlock_t devices_lock; +LIST_HEAD(devices); + +/* Device number for the control device */ +dev_t dmu_dev; + +/* Add a request to a device's request queue */ +static void add_tx_request(struct dmu_device *dev, + struct dmu_request *req) +{ + unsigned long flags; + + BUG_ON(!list_empty(&req->list)); + + spin_lock_irqsave(&dev->tx_lock, flags); + list_add_tail(&req->list, &dev->tx_requests); + atomic_inc(&dev->t_reqs); + spin_unlock_irqrestore(&dev->tx_lock, flags); + + wake_up(&dev->wqueue); +} + +static void endio_worker(void *data) +{ + struct dmu_request *req = data; + struct dmu_device *dev = req->dev; + + spin_lock(&dev->lock); + if (list_empty(&req->list) && list_empty(&req->copy)) { + mempool_free(req, request_pool); + atomic_dec(&dev->f_reqs); + atomic_dec(&dev->total); + wake_up_interruptible(&dev->lowmem); + } else { + PREPARE_WORK(&req->task, endio_worker, req); + schedule_work(&req->task); + } + spin_unlock(&dev->lock); +} + +/* Return an already-bound target device */ +struct target_device *find_target(struct dmu_device *dev, + dev_t devno) +{ + struct target_device *target, *match = NULL; + + spin_lock(&dev->lock); + list_for_each_entry(target, &dev->target_devs, list) { + if (target->bdev->bd_dev == devno) { + match = target; + break; + } + } + spin_unlock(&dev->lock); + + return match; +} + +/* Find a new target device and bind it to our device */ +static struct target_device *get_target(struct dmu_device *dev, + dev_t devno) +{ + struct target_device *target; + struct block_device *bdev; + + target = find_target(dev, devno); + if (target) + return target; + + bdev = open_by_devnum(devno, FMODE_READ | FMODE_WRITE); + if (IS_ERR(bdev)) { + DMERR("Unable to lookup device %x", devno); + return NULL; + } + + target = kmalloc(sizeof(*target), GFP_KERNEL); + if (!target) { + DMERR("Unable to alloc new target device"); + return NULL; + } + + target->bdev = bdev; + INIT_LIST_HEAD(&target->list); + + if (in_interrupt()) + printk("%s in irq\n", __FUNCTION__); + + spin_lock(&dev->lock); + list_add_tail(&target->list, &dev->target_devs); + spin_unlock(&dev->lock); + + return target; +} + +/* Caller must hold dev->lock */ +static void put_target(struct dmu_device *dev, + struct target_device *target) +{ + list_del(&target->list); + + bd_release(target->bdev); + blkdev_put(target->bdev); + + kfree(target); +} + +void destroy_dmu_device(struct kref *ref) +{ + struct dmu_device *dev; + struct list_head *cursor, *next; + + dev = container_of(ref, struct dmu_device, users); + + spin_lock(&devices_lock); + list_del(&dev->list); + spin_unlock(&devices_lock); + + list_for_each_safe(cursor, next, &dev->target_devs) { + struct target_device *target; + + target = list_entry(cursor, + struct target_device, + list); + + put_target(dev, target); + } + + list_for_each_safe(cursor, next, &dev->tx_requests) { + struct dmu_request *req; + + req = list_entry(cursor, + struct dmu_request, + list); + + DMERR("Failing unsent bio"); + bio_io_error(req->bio, req->bio->bi_size); + + list_del(&req->list); + + mempool_free(req, request_pool); + } + + list_for_each_safe(cursor, next, &dev->rx_requests) { + struct dmu_request *req; + + req = list_entry(cursor, + struct dmu_request, + list); + + DMERR("Failing bio"); + req->flags = 0; + bio_io_error(req->bio, req->bio->bi_size); + + list_del(&req->list); + + mempool_free(req, request_pool); + } + + list_for_each_safe(cursor, next, &dev->cp_requests) { + struct dmu_request *req; + + req = list_entry(cursor, + struct dmu_request, + list); + + DMERR("Failing bio"); + req->flags = 0; + bio_io_error(req->bio, req->bio->bi_size); + + list_del(&req->list); + + mempool_free(req, request_pool); + } + + kcopyd_client_destroy(dev->kcopy); + unregister_chardev_transport(dev); + + kfree(dev); +} + +static int init_dmu_device(struct dmu_device *dev, u32 block_size) +{ + int ret; + + init_waitqueue_head(&dev->wqueue); + init_waitqueue_head(&dev->lowmem); + INIT_LIST_HEAD(&dev->list); + INIT_LIST_HEAD(&dev->target_devs); + kref_init(&dev->users); + spin_lock_init(&dev->lock); + spin_lock_init(&dev->tx_lock); + + INIT_LIST_HEAD(&dev->tx_requests); + INIT_LIST_HEAD(&dev->rx_requests); + INIT_LIST_HEAD(&dev->cp_requests); + + dev->block_size = block_size; + dev->block_mask = block_size - 1; + dev->block_shift = ffs(block_size) - 1; + + atomic_set(&dev->t_reqs, 0); + atomic_set(&dev->r_reqs, 0); + atomic_set(&dev->f_reqs, 0); + atomic_set(&dev->total, 0); + atomic_set(&dev->idcounter, 0); + + init_MUTEX(&dev->cp_sem); + + ret = kcopyd_client_create(DMU_COPY_PAGES, &dev->kcopy); + if (ret) { + DMERR("Failed to initialize kcopyd client"); + return 0; + } + + return 1; +} + +static struct dmu_device *new_dmu_device(char *key, + struct dm_target *ti, + u32 block_size) +{ + struct dmu_device *dev; + int ret; + + dev = kmalloc(sizeof(*dev), GFP_KERNEL); + if (dev == NULL) { + DMERR("Failed to allocate new userspace device"); + return NULL; + } + + if (!init_dmu_device(dev, block_size)) + goto bad1; + + snprintf(dev->key, DMU_KEY_LEN, "%s", key); + + ret = register_chardev_transport(dev); + if (!ret) + goto bad2; + + spin_lock(&devices_lock); + list_add(&dev->list, &devices); + spin_unlock(&devices_lock); + + return dev; + + bad2: + put_dev(dev); + bad1: + kfree(dev); + DMERR("Failed to create device"); + return NULL; +} + +static struct dmu_device *find_dmu_device(const char *key) +{ + struct dmu_device *dev; + struct dmu_device *match = NULL; + + spin_lock(&devices_lock); + + list_for_each_entry(dev, &devices, list) { + spin_lock(&dev->lock); + if (strncmp(dev->key, key, DMU_KEY_LEN) == 0) { + match = dev; + spin_unlock(&dev->lock); + break; + } + spin_unlock(&dev->lock); + } + + spin_unlock(&devices_lock); + + return match; +} + +static int dmu_ctr(struct dm_target *ti, unsigned int argc, char **argv) +{ + uint64_t block_size; + struct dmu_device *dev; + char *device_key; + char *block_size_param; + int target_idx = 2; + + if (argc < 3) { + ti->error = "Invalid argument count"; + return -EINVAL; + } + + device_key = argv[0]; + block_size_param = argv[1]; + + block_size = simple_strtoul(block_size_param, NULL, 10) / 512; + + dev = find_dmu_device(device_key); + if (dev == NULL) { + dev = new_dmu_device(device_key, + ti, + block_size); + if (dev == NULL) { + ti->error = "Failed to create device"; + goto bad; + } + } else { + get_dev(dev); + } + + spin_lock(&dev->lock); + if (dev->block_size != block_size) { + ti->error = "Invalid block size"; + goto bad; + } + spin_unlock(&dev->lock); + + /* Resolve target devices */ + do { + int maj, min; + sscanf(argv[target_idx], "%i:%i", &maj, &min); + if (!get_target(dev, MKDEV(maj, min))) { + DMERR("Failed to find target device %i:%i (%s)", + maj, min, argv[target_idx]); + goto out; + } + } while (++target_idx < argc); + + ti->private = dev; + ti->split_io = block_size; + + return 0; + + bad: + if (dev) { + spin_unlock(&dev->lock); + } + out: + if (dev) { + put_dev(dev); + } + + return -EINVAL; +} + +static void dmu_dtr(struct dm_target *ti) +{ + struct dmu_device *dev = (struct dmu_device *) ti->private; + + put_dev(dev); +} + +static void init_req(struct dmu_device *dev, + struct bio *bio, + struct dmu_request *req) +{ + req->id = (uint64_t) atomic_add_return(1, &dev->idcounter); + + req->type = DM_USERSPACE_MAP_BLOCK_REQ; + req->dev = dev; + req->bio = bio; + req->u.block = dmu_block(dev, bio->bi_sector); + req->flags = 0; + INIT_LIST_HEAD(&req->deps); + INIT_LIST_HEAD(&req->list); + INIT_LIST_HEAD(&req->copy); + + if (bio_rw(bio)) + dmu_set_flag(&req->flags, DMU_FLAG_WR); +} + +static int dmu_map(struct dm_target *ti, struct bio *bio, + union map_info *map_context) +{ + struct dmu_device *dev = (struct dmu_device *) ti->private; + struct dmu_request *req; + + if (unlikely(bio_barrier(bio))) { + printk("Refusing bio barrier\n"); + return -EOPNOTSUPP; + } + + wait_event_interruptible(dev->lowmem, + atomic_read(&dev->total) <= 20000); + + req = mempool_alloc(request_pool, GFP_NOIO); + if (!req) { + DMERR("Failed to alloc request"); + return -1; + } + + atomic_inc(&dev->total); + + map_context->ptr = req; + + init_req(dev, bio, req); + + add_tx_request(dev, req); + + return 0; +} + +static int dmu_status(struct dm_target *ti, status_type_t type, + char *result, unsigned int maxlen) +{ + struct dmu_device *dev = (struct dmu_device *) ti->private; + + /* FIXME: Remove after debug */ + spin_lock(&dev->lock); + printk("Requests: %u t:%u r:%u f:%u (%c%c%c)\n", + atomic_read(&dev->total), + atomic_read(&dev->t_reqs), + atomic_read(&dev->r_reqs), + atomic_read(&dev->f_reqs), + list_empty(&dev->tx_requests) ? ' ':'T', + list_empty(&dev->rx_requests) ? ' ':'R', + list_empty(&dev->cp_requests) ? ' ':'C'); + spin_unlock(&dev->lock); + + + switch (type) { + case STATUSTYPE_INFO: + write_chardev_transport_info(dev, result, maxlen); + break; + + case STATUSTYPE_TABLE: + snprintf(result, maxlen, "%s %llu", + dev->key, + dev->block_size * 512); + break; + } + + return 0; +} + +static int dmu_end_io(struct dm_target *ti, struct bio *bio, + int error, union map_info *map_context) +{ + struct dmu_request *req = map_context->ptr; + int ret = 0; + + if (error) + return -1; + + if (dmu_get_flag(&req->flags, DMU_FLAG_SYNC)) { + req->type = DM_USERSPACE_MAP_DONE; + add_tx_request(req->dev, req); + ret = 1; + } else { + INIT_WORK(&req->task, endio_worker, req); + schedule_work(&req->task); + } + + return ret; +} + +struct target_type userspace_target = { + .name = "userspace", + .version = {0, 1, 0}, + .module = THIS_MODULE, + .ctr = dmu_ctr, + .dtr = dmu_dtr, + .map = dmu_map, + .status = dmu_status, + .end_io = dmu_end_io +}; + +int __init dm_userspace_init(void) +{ + int r = dm_register_target(&userspace_target); + if (r < 0) { + DMERR("Register failed %d", r); + return 0; + } + + spin_lock_init(&devices_lock); + + request_cache = + kmem_cache_create("dm-userspace-requests", + sizeof(struct dmu_request), + __alignof__ (struct dmu_request), + 0, NULL, NULL); + if (!request_cache) { + DMERR("Failed to allocate request cache"); + goto bad; + } + + request_pool = mempool_create(64, + mempool_alloc_slab, mempool_free_slab, + request_cache); + if (!request_pool) { + DMERR("Failed to allocate request pool"); + goto bad2; + } + + r = init_chardev_transport(); + if (!r) + goto bad3; + + return 1; + + bad3: + mempool_destroy(request_pool); + bad2: + kmem_cache_destroy(request_cache); + bad: + dm_unregister_target(&userspace_target); + + return 0; +} + +void __exit dm_userspace_exit(void) +{ + int r; + struct list_head *cursor, *next; + struct dmu_device *dev; + + spin_lock(&devices_lock); + + list_for_each_safe(cursor, next, &devices) { + dev = list_entry(cursor, struct dmu_device, list); + list_del(cursor); + destroy_dmu_device(&dev->users); + DMERR("Destroying hanging device %s", dev->key); + } + + spin_unlock(&devices_lock); + + cleanup_chardev_transport(); + + mempool_destroy(request_pool); + kmem_cache_destroy(request_cache); + + r = dm_unregister_target(&userspace_target); + if (r < 0) + DMERR("unregister failed %d", r); +} + +module_init(dm_userspace_init); +module_exit(dm_userspace_exit); + +MODULE_DESCRIPTION(DM_NAME " userspace target"); +MODULE_AUTHOR("Dan Smith"); +MODULE_LICENSE("GPL"); diff -Naur linux-2.6.18-orig/drivers/md/dm-userspace-chardev.c linux-2.6.18-dmu/drivers/md/dm-userspace-chardev.c --- linux-2.6.18-orig/drivers/md/dm-userspace-chardev.c 1969-12-31 16:00:00.000000000 -0800 +++ linux-2.6.18-dmu/drivers/md/dm-userspace-chardev.c 2006-09-28 13:49:18.000000000 -0700 @@ -0,0 +1,598 @@ +/* + * Copyright (C) International Business Machines Corp., 2006 + * Author: Dan Smith <danms@xxxxxxxxxx> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; under version 2 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#include <linux/spinlock.h> +#include <linux/blkdev.h> +#include <linux/mempool.h> +#include <linux/dm-userspace.h> +#include <linux/list.h> +#include <linux/sched.h> +#include <linux/wait.h> +#include <linux/poll.h> +#include <linux/fs.h> +#include <linux/cdev.h> +#include <asm/uaccess.h> + +#include "dm.h" +#include "dm-bio-list.h" +#include "kcopyd.h" +#include "dm-user.h" + +#define DM_MSG_PREFIX "dm-userspace" + +/* This allows for a cleaner separation between the dm-userspace + * device-mapper target, and the userspace transport used. Right now, + * only a chardev transport exists, but it's possible that there could + * be more in the future + */ +struct chardev_transport { + struct cdev cdev; + dev_t ctl_dev; + struct dmu_device *parent; +}; + +static struct dmu_request *find_rx_request(struct dmu_device *dev, + uint64_t id) +{ + struct dmu_request *req, *next, *match = NULL; + + spin_lock(&dev->lock); + list_for_each_entry_safe(req, next, &dev->rx_requests, list) { + if (req->id == id) { + list_del_init(&req->list); + match = req; + atomic_dec(&dev->r_reqs); + break; + } + } + spin_unlock(&dev->lock); + + return match; +} + +static int have_pending_requests(struct dmu_device *dev) +{ + return atomic_read(&dev->t_reqs) != 0; +} + +static int send_userspace_message(uint8_t __user *buffer, + struct dmu_request *req) +{ + int ret = 0; + struct dmu_msg msg; + + memset(&msg, 0, sizeof(msg)); + + msg.hdr.id = req->id; + + switch (req->type) { + case DM_USERSPACE_MAP_BLOCK_REQ: + msg.hdr.msg_type = req->type; + msg.payload.map_req.org_block = req->u.block; + dmu_cpy_flag(&msg.payload.map_req.flags, + req->flags, DMU_FLAG_WR); + break; + + case DM_USERSPACE_MAP_DONE: + msg.hdr.msg_type = DM_USERSPACE_MAP_DONE; + msg.payload.map_done.id_of_op = req->id; + msg.payload.map_done.org_block = req->u.block; + dmu_cpy_flag(&msg.payload.map_done.flags, + req->flags, DMU_FLAG_WR); + break; + + default: + DMWARN("Unknown outgoing message type %i", req->type); + ret = 0; + } + + if (copy_to_user(buffer, &msg, sizeof(msg))) + return -EFAULT; + + ret = sizeof(msg); + + /* If this request is not on a list (the rx_requests list), + * then it needs to be freed after sending + */ + if (list_empty(&req->list)) + mempool_free(req, request_pool); + + return ret; +} + +struct dmu_request *pluck_next_request(struct dmu_device *dev) +{ + struct dmu_request *req = NULL; + unsigned long flags; + + spin_lock_irqsave(&dev->tx_lock, flags); + if (!list_empty(&dev->tx_requests)) { + req = list_entry(dev->tx_requests.next, + struct dmu_request, list); + list_del_init(&req->list); + + atomic_dec(&dev->t_reqs); + } + spin_unlock_irqrestore(&dev->tx_lock, flags); + + if (req && ((req->type == DM_USERSPACE_MAP_BLOCK_REQ) || + (req->type == DM_USERSPACE_MAP_DONE))) { + spin_lock(&dev->lock); + list_add_tail(&req->list, &dev->rx_requests); + atomic_inc(&dev->r_reqs); + spin_unlock(&dev->lock); + } + + return req; +} + +ssize_t dmu_ctl_read(struct file *file, char __user *buffer, + size_t size, loff_t *offset) +{ + + struct dmu_device *dev = (struct dmu_device *)file->private_data; + struct dmu_request *req = NULL; + int ret = 0, r; + + if (!capable(CAP_SYS_ADMIN)) + return -EACCES; + + if (size < sizeof(struct dmu_msg)) { + DMERR("Userspace buffer too small for a single message"); + return 0; + } + + while (!have_pending_requests(dev)) { + if (file->f_flags & O_NONBLOCK) { + return 0; + } + + if (wait_event_interruptible(dev->wqueue, + have_pending_requests(dev))) + return -ERESTARTSYS; + } + + while (ret < size) { + if ((size - ret) < sizeof(struct dmu_msg)) + break; + + req = pluck_next_request(dev); + if (!req) + break; + + r = send_userspace_message((void *)(buffer + ret), req); + if (r == 0) + continue; + else if (r < 0) + return r; + + ret += r; + } + + if (ret < sizeof(struct dmu_msg)) { + if (ret != 0) + DMERR("Sending partial message!"); + DMINFO("Sent 0 requests to userspace"); + } + + return ret; +} + +static struct dmu_request *pluck_dep_req(struct dmu_request *req) +{ + struct dmu_request *dreq = NULL; + + if (list_empty(&req->deps)) { + /* Delete from cp_requests */ + list_del_init(&req->copy); + } else { + /* Get next dependent request */ + dreq = list_entry(req->deps.next, struct dmu_request, list); + list_del_init(&dreq->list); + } + + return dreq; +} + +static void flush_block(int read_err, unsigned int write_err, void *data) +{ + struct dmu_request *req = data; + struct dmu_request *dreq; + uint64_t id = req->id; + + if (read_err || write_err) { + DMERR("Failed to copy block!"); + bio_io_error(req->bio, req->bio->bi_size); + while ((dreq = pluck_dep_req(req))) { + bio_io_error(dreq->bio, dreq->bio->bi_size); + } + return; + } + + atomic_inc(&req->dev->f_reqs); + generic_make_request(req->bio); + + down(&req->dev->cp_sem); + while ((dreq = pluck_dep_req(req))) { + if (id > dreq->id) { + printk(KERN_EMERG "Flushing %llu after %llu\n", + dreq->id, id); + } + id = dreq->id; + atomic_inc(&req->dev->f_reqs); + generic_make_request(dreq->bio); + } + up(&req->dev->cp_sem); +} + +static void copy_block(struct dmu_device *dev, + struct block_device *src_dev, + struct block_device *dst_dev, + struct dmu_request *req, + uint64_t org_block, + uint64_t new_block, + int64_t offset) +{ + struct io_region src, dst; + + src.bdev = src_dev; + src.sector = dmu_sector(dev, org_block); + src.count = dev->block_size; + + dst.bdev = dst_dev; + dst.sector = dmu_sector(dev, new_block); + dst.sector += offset; + dst.count = dev->block_size; + + kcopyd_copy(dev->kcopy, &src, 1, &dst, 0, flush_block, req); +} + +/* + * Queues @req with a waiting request to the same block, if one + * exists. Returns nonzero if queued. + */ +static int maybe_queue_dependent_request(struct dmu_request *req, + int is_copy_first) +{ + struct dmu_request *dreq = NULL; + int found = 0; + + BUG_ON(!list_empty(&req->list)); + + down(&req->dev->cp_sem); + + list_for_each_entry(dreq, &req->dev->cp_requests, copy) { + if (req->u.block == dreq->u.block) { + list_add_tail(&req->list, &dreq->deps); + found = 1; + break; + } + } + + if (!found && is_copy_first) { + BUG_ON(!list_empty(&req->copy)); + list_add(&req->copy, &req->dev->cp_requests); + } + + up(&req->dev->cp_sem); + + return found; +} + +static void map_worker(void *data) +{ + struct dmu_request *req = data; + struct dmu_msg_map_response *msg = &req->response; + struct dmu_device *dev = req->dev; + struct target_device *src_dev, *dst_dev; + int need_copy = dmu_get_flag(&msg->flags, DMU_FLAG_COPY_FIRST); + + if (dmu_get_flag(&msg->flags, DMU_FLAG_COPY_FIRST)) { + src_dev = find_target(dev, MKDEV(msg->src_maj, msg->src_min)); + if (!src_dev) { + DMERR("Failed to find src device %i:%i\n", + msg->src_maj, msg->src_min); + goto fail; + } + } else + src_dev = NULL; + + dst_dev = find_target(dev, MKDEV(msg->dst_maj, msg->dst_min)); + if (!dst_dev) { + DMERR("Failed to find dest device %i:%i\n", + msg->dst_maj, msg->dst_min); + goto fail; + } + + /* Remap the bio */ + req->bio->bi_sector = dmu_sector(dev, msg->new_block) + + dmu_sector_offset(dev, req->bio->bi_sector) + + msg->offset; + req->bio->bi_bdev = dst_dev->bdev; + + dmu_set_flag(&req->flags, DMU_FLAG_SYNC); + + if (!maybe_queue_dependent_request(req, need_copy)) { + if (need_copy) + copy_block(dev, src_dev->bdev, dst_dev->bdev, req, + req->u.block, msg->new_block, + msg->offset); + else + flush_block(0, 0, req); + } + + return; + + fail: + bio_io_error(req->bio, req->bio->bi_size); +} + +static void do_map_bio(struct dmu_device *dev, + struct dmu_msg_map_response *msg) +{ + struct dmu_request *req; + + req = find_rx_request(dev, msg->id_of_req); + if (!req) { + DMERR("Unable to complete unknown map: %llu\n", + msg->id_of_req); + return; + } + + memcpy(&req->response, msg, sizeof(req->response)); + +#if 0 + /* I think it would be nice to farm this out to a worker + * thread, so that userspace does not have to do all the work, + * but I wonder about the correctness of possibly reordering + * requests to a single block + */ + INIT_WORK(&req->task, map_worker, req); + schedule_work(&req->task); +#else + map_worker(req); +#endif +} + +static void do_map_done(struct dmu_device *dev, uint64_t id_of_op, int fail) +{ + struct dmu_request *req; + + req = find_rx_request(dev, id_of_op); + if (!req) { + DMERR("Unable to complete unknown request: %llu\n", + id_of_op); + return; + } + + dmu_clr_flag(&req->flags, DMU_FLAG_SYNC); + + req->bio->bi_end_io(req->bio, req->bio->bi_size, fail); +} + +static void do_map_failed(struct dmu_device *dev, uint64_t id_of_op) +{ + struct dmu_request *req; + + req = find_rx_request(dev, id_of_op); + if (!req) { + DMERR("Unable to fail unknown request: %llu\n", + id_of_op); + return; + } + + DMERR("Userspace failed to map id %llu (sector %llu)", + id_of_op, req->bio->bi_sector); + + bio_io_error(req->bio, req->bio->bi_size); + + mempool_free(req, request_pool); +} + +ssize_t dmu_ctl_write(struct file *file, const char __user *buffer, + size_t size, loff_t *offset) +{ + struct dmu_device *dev = (struct dmu_device *)file->private_data; + int ret = 0; + struct dmu_msg msg; + + if (!capable(CAP_SYS_ADMIN)) + return -EACCES; + + while ((ret + sizeof(msg)) <= size) { + if (copy_from_user(&msg, buffer+ret, sizeof(msg))) { + DMERR("%s copy_from_user failed!", __FUNCTION__); + ret = -EFAULT; + goto out; + } + + ret += sizeof(msg); + + switch (msg.hdr.msg_type) { + case DM_USERSPACE_MAP_BLOCK_RESP: + do_map_bio(dev, &msg.payload.map_rsp); + break; + + case DM_USERSPACE_MAP_FAILED: + do_map_failed(dev, msg.payload.map_rsp.id_of_req); + break; + + case DM_USERSPACE_MAP_DONE: + do_map_done(dev, msg.payload.map_done.id_of_op, 0); + break; + + case DM_USERSPACE_MAP_DONE_FAILED: + do_map_done(dev, msg.payload.map_done.id_of_op, 1); + break; + + default: + DMWARN("Unknown incoming request type: %i", + msg.hdr.msg_type); + } + } + out: + if (ret < sizeof(msg)) { + DMINFO("Received 0 responses from userspace"); + } + + return ret; +} + +int dmu_ctl_open(struct inode *inode, struct file *file) +{ + struct chardev_transport *t; + struct dmu_device *dev; + + if (!capable(CAP_SYS_ADMIN)) + return -EACCES; + + t = container_of(inode->i_cdev, struct chardev_transport, cdev); + dev = t->parent; + + get_dev(dev); + + file->private_data = dev; + + return 0; +} + +int dmu_ctl_release(struct inode *inode, struct file *file) +{ + struct dmu_device *dev; + + dev = (struct dmu_device *)file->private_data; + + put_dev(dev); + + return 0; +} + +unsigned dmu_ctl_poll(struct file *file, poll_table *wait) +{ + struct dmu_device *dev = (struct dmu_device *)file->private_data; + unsigned mask = 0; + + poll_wait(file, &dev->wqueue, wait); + + if (have_pending_requests(dev)) + mask |= POLLIN | POLLRDNORM; + + return mask; +} + +static struct file_operations ctl_fops = { + .open = dmu_ctl_open, + .release = dmu_ctl_release, + .read = dmu_ctl_read, + .write = dmu_ctl_write, + .poll = dmu_ctl_poll, + .owner = THIS_MODULE, +}; + +static int get_free_minor(void) +{ + struct dmu_device *dev; + int minor = 0; + + spin_lock(&devices_lock); + + while (1) { + list_for_each_entry(dev, &devices, list) { + struct chardev_transport *t = dev->transport_private; + if (MINOR(t->ctl_dev) == minor) + goto dupe; + } + break; + dupe: + minor++; + } + + spin_unlock(&devices_lock); + + return minor; +} + +int register_chardev_transport(struct dmu_device *dev) +{ + struct chardev_transport *t; + int ret; + + dev->transport_private = kmalloc(sizeof(struct chardev_transport), + GFP_KERNEL); + t = dev->transport_private; + + if (!t) { + DMERR("Failed to allocate chardev transport"); + goto bad; + } + + t->ctl_dev = MKDEV(MAJOR(dmu_dev), get_free_minor()); + t->parent = dev; + + cdev_init(&t->cdev, &ctl_fops); + t->cdev.owner = THIS_MODULE; + t->cdev.ops = &ctl_fops; + + ret = cdev_add(&t->cdev, t->ctl_dev, 1); + if (ret < 0) { + DMERR("Failed to register control device %d:%d", + MAJOR(t->ctl_dev), MINOR(t->ctl_dev)); + goto bad; + } + + return 1; + + bad: + kfree(t); + return 0; +} + +void unregister_chardev_transport(struct dmu_device *dev) +{ + struct chardev_transport *t = dev->transport_private; + + cdev_del(&t->cdev); + kfree(t); +} + +int init_chardev_transport(void) +{ + int r; + + r = alloc_chrdev_region(&dmu_dev, 0, 10, "dm-userspace"); + if (r) { + DMERR("Failed to allocate chardev region"); + return 0; + } else + return 1; +} + +void cleanup_chardev_transport(void) +{ + unregister_chrdev_region(dmu_dev, 10); +} + +void write_chardev_transport_info(struct dmu_device *dev, + char *buf, unsigned int maxlen) +{ + struct chardev_transport *t = dev->transport_private; + + snprintf(buf, maxlen, "%x:%x", + MAJOR(t->ctl_dev), MINOR(t->ctl_dev)); +} diff -Naur linux-2.6.18-orig/drivers/md/Kconfig linux-2.6.18-dmu/drivers/md/Kconfig --- linux-2.6.18-orig/drivers/md/Kconfig 2006-09-19 20:42:06.000000000 -0700 +++ linux-2.6.18-dmu/drivers/md/Kconfig 2006-09-28 13:49:18.000000000 -0700 @@ -223,6 +223,12 @@ ---help--- Allow volume managers to take writable snapshots of a device. +config DM_USERSPACE + tristate "Userspace target (EXPERIMENTAL)" + depends on BLK_DEV_DM && EXPERIMENTAL + ---help--- + A target that provides a userspace interface to device-mapper + config DM_MIRROR tristate "Mirror target (EXPERIMENTAL)" depends on BLK_DEV_DM && EXPERIMENTAL diff -Naur linux-2.6.18-orig/drivers/md/Kconfig.orig linux-2.6.18-dmu/drivers/md/Kconfig.orig --- linux-2.6.18-orig/drivers/md/Kconfig.orig 1969-12-31 16:00:00.000000000 -0800 +++ linux-2.6.18-dmu/drivers/md/Kconfig.orig 2006-09-19 20:42:06.000000000 -0700 @@ -0,0 +1,253 @@ +# +# Block device driver configuration +# + +menu "Multi-device support (RAID and LVM)" + +config MD + bool "Multiple devices driver support (RAID and LVM)" + help + Support multiple physical spindles through a single logical device. + Required for RAID and logical volume management. + +config BLK_DEV_MD + tristate "RAID support" + depends on MD + ---help--- + This driver lets you combine several hard disk partitions into one + logical block device. This can be used to simply append one + partition to another one or to combine several redundant hard disks + into a RAID1/4/5 device so as to provide protection against hard + disk failures. This is called "Software RAID" since the combining of + the partitions is done by the kernel. "Hardware RAID" means that the + combining is done by a dedicated controller; if you have such a + controller, you do not need to say Y here. + + More information about Software RAID on Linux is contained in the + Software RAID mini-HOWTO, available from + <http://www.tldp.org/docs.html#howto>. There you will also learn + where to get the supporting user space utilities raidtools. + + If unsure, say N. + +config MD_LINEAR + tristate "Linear (append) mode" + depends on BLK_DEV_MD + ---help--- + If you say Y here, then your multiple devices driver will be able to + use the so-called linear mode, i.e. it will combine the hard disk + partitions by simply appending one to the other. + + To compile this as a module, choose M here: the module + will be called linear. + + If unsure, say Y. + +config MD_RAID0 + tristate "RAID-0 (striping) mode" + depends on BLK_DEV_MD + ---help--- + If you say Y here, then your multiple devices driver will be able to + use the so-called raid0 mode, i.e. it will combine the hard disk + partitions into one logical device in such a fashion as to fill them + up evenly, one chunk here and one chunk there. This will increase + the throughput rate if the partitions reside on distinct disks. + + Information about Software RAID on Linux is contained in the + Software-RAID mini-HOWTO, available from + <http://www.tldp.org/docs.html#howto>. There you will also + learn where to get the supporting user space utilities raidtools. + + To compile this as a module, choose M here: the module + will be called raid0. + + If unsure, say Y. + +config MD_RAID1 + tristate "RAID-1 (mirroring) mode" + depends on BLK_DEV_MD + ---help--- + A RAID-1 set consists of several disk drives which are exact copies + of each other. In the event of a mirror failure, the RAID driver + will continue to use the operational mirrors in the set, providing + an error free MD (multiple device) to the higher levels of the + kernel. In a set with N drives, the available space is the capacity + of a single drive, and the set protects against a failure of (N - 1) + drives. + + Information about Software RAID on Linux is contained in the + Software-RAID mini-HOWTO, available from + <http://www.tldp.org/docs.html#howto>. There you will also + learn where to get the supporting user space utilities raidtools. + + If you want to use such a RAID-1 set, say Y. To compile this code + as a module, choose M here: the module will be called raid1. + + If unsure, say Y. + +config MD_RAID10 + tristate "RAID-10 (mirrored striping) mode (EXPERIMENTAL)" + depends on BLK_DEV_MD && EXPERIMENTAL + ---help--- + RAID-10 provides a combination of striping (RAID-0) and + mirroring (RAID-1) with easier configuration and more flexible + layout. + Unlike RAID-0, but like RAID-1, RAID-10 requires all devices to + be the same size (or at least, only as much as the smallest device + will be used). + RAID-10 provides a variety of layouts that provide different levels + of redundancy and performance. + + RAID-10 requires mdadm-1.7.0 or later, available at: + + ftp://ftp.kernel.org/pub/linux/utils/raid/mdadm/ + + If unsure, say Y. + +config MD_RAID456 + tristate "RAID-4/RAID-5/RAID-6 mode" + depends on BLK_DEV_MD + ---help--- + A RAID-5 set of N drives with a capacity of C MB per drive provides + the capacity of C * (N - 1) MB, and protects against a failure + of a single drive. For a given sector (row) number, (N - 1) drives + contain data sectors, and one drive contains the parity protection. + For a RAID-4 set, the parity blocks are present on a single drive, + while a RAID-5 set distributes the parity across the drives in one + of the available parity distribution methods. + + A RAID-6 set of N drives with a capacity of C MB per drive + provides the capacity of C * (N - 2) MB, and protects + against a failure of any two drives. For a given sector + (row) number, (N - 2) drives contain data sectors, and two + drives contains two independent redundancy syndromes. Like + RAID-5, RAID-6 distributes the syndromes across the drives + in one of the available parity distribution methods. + + Information about Software RAID on Linux is contained in the + Software-RAID mini-HOWTO, available from + <http://www.tldp.org/docs.html#howto>. There you will also + learn where to get the supporting user space utilities raidtools. + + If you want to use such a RAID-4/RAID-5/RAID-6 set, say Y. To + compile this code as a module, choose M here: the module + will be called raid456. + + If unsure, say Y. + +config MD_RAID5_RESHAPE + bool "Support adding drives to a raid-5 array (experimental)" + depends on MD_RAID456 && EXPERIMENTAL + ---help--- + A RAID-5 set can be expanded by adding extra drives. This + requires "restriping" the array which means (almost) every + block must be written to a different place. + + This option allows such restriping to be done while the array + is online. However it is still EXPERIMENTAL code. It should + work, but please be sure that you have backups. + + You will need mdadm version 2.4.1 or later to use this + feature safely. During the early stage of reshape there is + a critical section where live data is being over-written. A + crash during this time needs extra care for recovery. The + newer mdadm takes a copy of the data in the critical section + and will restore it, if necessary, after a crash. + + The mdadm usage is e.g. + mdadm --grow /dev/md1 --raid-disks=6 + to grow '/dev/md1' to having 6 disks. + + Note: The array can only be expanded, not contracted. + There should be enough spares already present to make the new + array workable. + +config MD_MULTIPATH + tristate "Multipath I/O support" + depends on BLK_DEV_MD + help + Multipath-IO is the ability of certain devices to address the same + physical disk over multiple 'IO paths'. The code ensures that such + paths can be defined and handled at runtime, and ensures that a + transparent failover to the backup path(s) happens if a IO errors + arrives on the primary path. + + If unsure, say N. + +config MD_FAULTY + tristate "Faulty test module for MD" + depends on BLK_DEV_MD + help + The "faulty" module allows for a block device that occasionally returns + read or write errors. It is useful for testing. + + In unsure, say N. + +config BLK_DEV_DM + tristate "Device mapper support" + depends on MD + ---help--- + Device-mapper is a low level volume manager. It works by allowing + people to specify mappings for ranges of logical sectors. Various + mapping types are available, in addition people may write their own + modules containing custom mappings if they wish. + + Higher level volume managers such as LVM2 use this driver. + + To compile this as a module, choose M here: the module will be + called dm-mod. + + If unsure, say N. + +config DM_CRYPT + tristate "Crypt target support" + depends on BLK_DEV_DM && EXPERIMENTAL + select CRYPTO + ---help--- + This device-mapper target allows you to create a device that + transparently encrypts the data on it. You'll need to activate + the ciphers you're going to use in the cryptoapi configuration. + + Information on how to use dm-crypt can be found on + + <http://www.saout.de/misc/dm-crypt/> + + To compile this code as a module, choose M here: the module will + be called dm-crypt. + + If unsure, say N. + +config DM_SNAPSHOT + tristate "Snapshot target (EXPERIMENTAL)" + depends on BLK_DEV_DM && EXPERIMENTAL + ---help--- + Allow volume managers to take writable snapshots of a device. + +config DM_MIRROR + tristate "Mirror target (EXPERIMENTAL)" + depends on BLK_DEV_DM && EXPERIMENTAL + ---help--- + Allow volume managers to mirror logical volumes, also + needed for live data migration tools such as 'pvmove'. + +config DM_ZERO + tristate "Zero target (EXPERIMENTAL)" + depends on BLK_DEV_DM && EXPERIMENTAL + ---help--- + A target that discards writes, and returns all zeroes for + reads. Useful in some recovery situations. + +config DM_MULTIPATH + tristate "Multipath target (EXPERIMENTAL)" + depends on BLK_DEV_DM && EXPERIMENTAL + ---help--- + Allow volume managers to support multipath hardware. + +config DM_MULTIPATH_EMC + tristate "EMC CX/AX multipath support (EXPERIMENTAL)" + depends on DM_MULTIPATH && BLK_DEV_DM && EXPERIMENTAL + ---help--- + Multipath support for EMC CX/AX series hardware. + +endmenu + diff -Naur linux-2.6.18-orig/drivers/md/Makefile linux-2.6.18-dmu/drivers/md/Makefile --- linux-2.6.18-orig/drivers/md/Makefile 2006-09-19 20:42:06.000000000 -0700 +++ linux-2.6.18-dmu/drivers/md/Makefile 2006-09-28 13:49:18.000000000 -0700 @@ -14,6 +14,7 @@ raid6altivec1.o raid6altivec2.o raid6altivec4.o \ raid6altivec8.o \ raid6mmx.o raid6sse1.o raid6sse2.o +dm-user-objs := dm-userspace.o dm-userspace-chardev.o hostprogs-y := mktables # Note: link order is important. All raid personalities @@ -36,6 +37,7 @@ obj-$(CONFIG_DM_SNAPSHOT) += dm-snapshot.o obj-$(CONFIG_DM_MIRROR) += dm-mirror.o obj-$(CONFIG_DM_ZERO) += dm-zero.o +obj-$(CONFIG_DM_USERSPACE) += dm-user.o quiet_cmd_unroll = UNROLL $@ cmd_unroll = $(PERL) $(srctree)/$(src)/unroll.pl $(UNROLL) \ diff -Naur linux-2.6.18-orig/drivers/md/Makefile.orig linux-2.6.18-dmu/drivers/md/Makefile.orig --- linux-2.6.18-orig/drivers/md/Makefile.orig 1969-12-31 16:00:00.000000000 -0800 +++ linux-2.6.18-dmu/drivers/md/Makefile.orig 2006-09-19 20:42:06.000000000 -0700 @@ -0,0 +1,107 @@ +# +# Makefile for the kernel software RAID and LVM drivers. +# + +dm-mod-objs := dm.o dm-table.o dm-target.o dm-linear.o dm-stripe.o \ + dm-ioctl.o dm-io.o kcopyd.o +dm-multipath-objs := dm-hw-handler.o dm-path-selector.o dm-mpath.o +dm-snapshot-objs := dm-snap.o dm-exception-store.o +dm-mirror-objs := dm-log.o dm-raid1.o +md-mod-objs := md.o bitmap.o +raid456-objs := raid5.o raid6algos.o raid6recov.o raid6tables.o \ + raid6int1.o raid6int2.o raid6int4.o \ + raid6int8.o raid6int16.o raid6int32.o \ + raid6altivec1.o raid6altivec2.o raid6altivec4.o \ + raid6altivec8.o \ + raid6mmx.o raid6sse1.o raid6sse2.o +hostprogs-y := mktables + +# Note: link order is important. All raid personalities +# and xor.o must come before md.o, as they each initialise +# themselves, and md.o may use the personalities when it +# auto-initialised. + +obj-$(CONFIG_MD_LINEAR) += linear.o +obj-$(CONFIG_MD_RAID0) += raid0.o +obj-$(CONFIG_MD_RAID1) += raid1.o +obj-$(CONFIG_MD_RAID10) += raid10.o +obj-$(CONFIG_MD_RAID456) += raid456.o xor.o +obj-$(CONFIG_MD_MULTIPATH) += multipath.o +obj-$(CONFIG_MD_FAULTY) += faulty.o +obj-$(CONFIG_BLK_DEV_MD) += md-mod.o +obj-$(CONFIG_BLK_DEV_DM) += dm-mod.o +obj-$(CONFIG_DM_CRYPT) += dm-crypt.o +obj-$(CONFIG_DM_MULTIPATH) += dm-multipath.o dm-round-robin.o +obj-$(CONFIG_DM_MULTIPATH_EMC) += dm-emc.o +obj-$(CONFIG_DM_SNAPSHOT) += dm-snapshot.o +obj-$(CONFIG_DM_MIRROR) += dm-mirror.o +obj-$(CONFIG_DM_ZERO) += dm-zero.o + +quiet_cmd_unroll = UNROLL $@ + cmd_unroll = $(PERL) $(srctree)/$(src)/unroll.pl $(UNROLL) \ + < $< > $@ || ( rm -f $@ && exit 1 ) + +ifeq ($(CONFIG_ALTIVEC),y) +altivec_flags := -maltivec -mabi=altivec +endif + +targets += raid6int1.c +$(obj)/raid6int1.c: UNROLL := 1 +$(obj)/raid6int1.c: $(src)/raid6int.uc $(src)/unroll.pl FORCE + $(call if_changed,unroll) + +targets += raid6int2.c +$(obj)/raid6int2.c: UNROLL := 2 +$(obj)/raid6int2.c: $(src)/raid6int.uc $(src)/unroll.pl FORCE + $(call if_changed,unroll) + +targets += raid6int4.c +$(obj)/raid6int4.c: UNROLL := 4 +$(obj)/raid6int4.c: $(src)/raid6int.uc $(src)/unroll.pl FORCE + $(call if_changed,unroll) + +targets += raid6int8.c +$(obj)/raid6int8.c: UNROLL := 8 +$(obj)/raid6int8.c: $(src)/raid6int.uc $(src)/unroll.pl FORCE + $(call if_changed,unroll) + +targets += raid6int16.c +$(obj)/raid6int16.c: UNROLL := 16 +$(obj)/raid6int16.c: $(src)/raid6int.uc $(src)/unroll.pl FORCE + $(call if_changed,unroll) + +targets += raid6int32.c +$(obj)/raid6int32.c: UNROLL := 32 +$(obj)/raid6int32.c: $(src)/raid6int.uc $(src)/unroll.pl FORCE + $(call if_changed,unroll) + +CFLAGS_raid6altivec1.o += $(altivec_flags) +targets += raid6altivec1.c +$(obj)/raid6altivec1.c: UNROLL := 1 +$(obj)/raid6altivec1.c: $(src)/raid6altivec.uc $(src)/unroll.pl FORCE + $(call if_changed,unroll) + +CFLAGS_raid6altivec2.o += $(altivec_flags) +targets += raid6altivec2.c +$(obj)/raid6altivec2.c: UNROLL := 2 +$(obj)/raid6altivec2.c: $(src)/raid6altivec.uc $(src)/unroll.pl FORCE + $(call if_changed,unroll) + +CFLAGS_raid6altivec4.o += $(altivec_flags) +targets += raid6altivec4.c +$(obj)/raid6altivec4.c: UNROLL := 4 +$(obj)/raid6altivec4.c: $(src)/raid6altivec.uc $(src)/unroll.pl FORCE + $(call if_changed,unroll) + +CFLAGS_raid6altivec8.o += $(altivec_flags) +targets += raid6altivec8.c +$(obj)/raid6altivec8.c: UNROLL := 8 +$(obj)/raid6altivec8.c: $(src)/raid6altivec.uc $(src)/unroll.pl FORCE + $(call if_changed,unroll) + +quiet_cmd_mktable = TABLE $@ + cmd_mktable = $(obj)/mktables > $@ || ( rm -f $@ && exit 1 ) + +targets += raid6tables.c +$(obj)/raid6tables.c: $(obj)/mktables FORCE + $(call if_changed,mktable) diff -Naur linux-2.6.18-orig/include/linux/dm-userspace.h linux-2.6.18-dmu/include/linux/dm-userspace.h --- linux-2.6.18-orig/include/linux/dm-userspace.h 1969-12-31 16:00:00.000000000 -0800 +++ linux-2.6.18-dmu/include/linux/dm-userspace.h 2006-09-28 13:49:18.000000000 -0700 @@ -0,0 +1,115 @@ +/* + * Copyright (C) International Business Machines Corp., 2006 + * Author: Dan Smith <danms@xxxxxxxxxx> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; under version 2 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#ifndef __DM_USERSPACE_H +#define __DM_USERSPACE_H + +#include <linux/types.h> + +/* + * Message Types + */ +#define DM_USERSPACE_MAP_BLOCK_REQ 1 +#define DM_USERSPACE_MAP_BLOCK_RESP 2 +#define DM_USERSPACE_MAP_FAILED 3 +#define DM_USERSPACE_MAP_DONE 4 +#define DM_USERSPACE_MAP_DONE_FAILED 5 + +/* + * Flags and associated macros + */ +#define DMU_FLAG_VALID 1 +#define DMU_FLAG_WR 2 +#define DMU_FLAG_COPY_FIRST 4 +#define DMU_FLAG_SYNC 8 + +static inline int dmu_get_flag(uint32_t *flags, uint32_t flag) +{ + return (*flags & flag) != 0; +} + +static inline void dmu_set_flag(uint32_t *flags, uint32_t flag) +{ + *flags |= flag; +} + +static inline void dmu_clr_flag(uint32_t *flags, uint32_t flag) +{ + *flags &= (~flag); +} + +static inline void dmu_cpy_flag(uint32_t *flags, uint32_t src, uint32_t flag) +{ + *flags = (*flags & ~flag) | (src & flag); +} + +/* + * This message header is sent in front of every message, in both + * directions + */ +struct dmu_msg_header { + uint64_t id; + uint32_t msg_type; + uint32_t payload_len; +}; + +/* DM_USERSPACE_MAP_DONE + * DM_USERSPACE_MAP_DONE_FAILED + */ +struct dmu_msg_map_done { + uint64_t id_of_op; + uint64_t org_block; + uint32_t flags; +}; + +/* DM_USERSPACE_MAP_BLOCK_REQ */ +struct dmu_msg_map_request { + uint64_t org_block; + + uint32_t flags; +}; + +/* DM_USERSPACE_MAP_BLOCK_RESP + * DM_USERSPACE_MAP_BLOCK_FAILED + */ +struct dmu_msg_map_response { + uint64_t new_block; + int64_t offset; + + uint64_t id_of_req; + uint32_t flags; + + uint32_t src_maj; + uint32_t src_min; + + uint32_t dst_maj; + uint32_t dst_min; +}; + +/* A full message */ +struct dmu_msg { + struct dmu_msg_header hdr; + union { + struct dmu_msg_map_done map_done; + struct dmu_msg_map_request map_req; + struct dmu_msg_map_response map_rsp; + } payload; +}; + +#endif
Attachment:
pgpp7y23eZGas.pgp
Description: PGP signature
-- dm-devel mailing list dm-devel@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/dm-devel