[dm-devel] Another experimental dm target... an encryption target

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hi!

It looks like Kernel programming is real fun... I hope Jon isn't going
to kill me for that.

I wrote a dm-crypt target that works like a linear target but does
encryption using the new cryptoapi in the 2.5/2.6 kernel.

The target configuration syntax is "<cipher> <key> <iv_offset> <device>
<sector>".

<cipher> is something like aes or des. It can optionally be suffixed by
the mode (like des-ecb). There are two modes: ecb and cbc. The cbc mode
uses the sector number as additional perturbation so that identical
sectors look different when they get encrypted (like zero-filled
sectors). In this case the <iv_offset> is an additional sector offset
for this, if you want to map only a part of the encrypted device and
still want to be able to read your data. cbc doesn't actually chain the
blocks because that would be bad for a random-access device, so the name
might be misleading.

<key> is the key in hex (a long string, two digits per byte).

<device> and <sector> are the same as in the linear target.

The implementation is somewhat sub-optimal. There are some issues that
lead to unnecessary complexity:

1. I could save a mempool if I could access the target_io structure (I
could, but that's really ugly). That's because I need to access my
crypt_c structure in my endio function. Because I also need to access
the original bio and I can't put two pointers in cloned_bio->bi_private
I have to allocate a crypt_io structure that contains both pointers.

2. I can't use the "new" end_io mechanism for which Joe posted a patch
some days ago. That's because I need to clone the bio (again) because I
have to associate some new pages, at least for writes.
Because of this I have to implement my own endio function for the cloned
bios. Another problem with the dm end_io handler is that the bio loses
the information about where the size of the data and it's pages so that
I can convert it. That's why I also need to clone the bio on reads. At
least I don't need to allocate additional buffers because I can decrypt
the data in place.

The bios device-mapper gives to the target are never sent down. I've got
the same problem in dm-file (at least I don't need to clone them again
so it's not a too big waste).

Perhaps this could be simplified somehow in the generic device-mapper
layer, but at the moment I'm not sure how.

So you see, at lot of room for improvements. ;)

Well, the cryptoapi is dog-slow anyway so it's not a performance waste,
just unneeded complexity.

--
Christophe Saout <christophe@xxxxxxxx>
Please avoid sending me Word or PowerPoint attachments.
See http://www.fsf.org/philosophy/no-word-attachments.html
/*
 * Copyright (C) 2001 Sistina Software (UK) Limited.
 *
 * This file is released under the GPL.
 */

#include "dm.h"

#include <linux/module.h>
#include <linux/init.h>
#include <linux/bio.h>
#include <linux/mempool.h>
#include <linux/slab.h>
#include <linux/crypto.h>
#include <linux/spinlock.h>
#include <asm/scatterlist.h>

/*
 * per bio private data
 */
struct crypt_io {
	struct dm_target *target;
	struct bio *bio;
	struct bio *clone;
};

/*
 * Crypt: maps a linear range of a block device
 * and encrypts / decrypts at the same time.
 */
struct crypt_c {
	struct dm_dev *dev;
	sector_t start;

	/*
	 * pool for per bio private data
	 */
	mempool_t *io_pool;

	/*
	 * worker thread related data
	 */
	spinlock_t lock;
	struct bio *bio_head;
	struct bio *bio_tail;
	struct semaphore bh_mutex;
	struct semaphore sem;

	/*
	 * crypto related data
	 */
	struct crypto_tfm *tfm;
	sector_t iv_offset;
	int iv_size;
	int key_size;
	u8 key[0];
};

#define MIN_IOS 256
static kmem_cache_t *_io_cache;

static inline struct crypt_io *crypt_alloc_io(struct crypt_c *cc)
{
	return mempool_alloc(cc->io_pool, GFP_NOIO);
}

static inline void crypt_free_io(struct crypt_c *cc, struct crypt_io *io)
{
	return mempool_free(io, cc->io_pool);
}

/*
 * Encrypt / decrypt a single sector, source and destination buffers
 * are stored in scatterlists. In CBC mode initialise the "previous
 * block" with the sector number (it's not a real chaining because
 * it would not allow to seek on the device...)
 */
static inline int
crypt_convert_scatterlist(struct crypt_c *cc, struct scatterlist *out,
                          struct scatterlist *in, unsigned int length,
                          int write, sector_t sector)
{
	u8 iv[cc->iv_size];
	int r;

	if (cc->iv_size) {
		*(u32 *)iv = cpu_to_le32(sector & 0xffffffff);
		if (cc->iv_size > sizeof(u32) / sizeof(u8))
			memset(iv + (sizeof(u32) / sizeof(u8)), 0,
			       cc->iv_size - (sizeof(u32) / sizeof(u8)));

		if (write)
			r = crypto_cipher_encrypt_iv(cc->tfm, out, in, length, iv);
		else
			r = crypto_cipher_decrypt_iv(cc->tfm, out, in, length, iv);
	} else {
		if (write)
			r = crypto_cipher_encrypt(cc->tfm, out, in, length);
		else
			r = crypto_cipher_decrypt(cc->tfm, out, in, length);
	}

	return r;
}

/*
 * Encrypt / decrypt data from one bio to another one (may be the same)
 */
static int crypt_convert(struct crypt_c *cc, struct bio *bio_out,
                         struct bio *bio_in, sector_t sector, int write)
{
	unsigned int offset_in = 0;
	unsigned int offset_out = 0;
	int idx_in = bio_in->bi_idx;
	int idx_out = bio_out->bi_idx;
	int r = 0;

 	sector += cc->iv_offset;

	while(idx_in < bio_in->bi_vcnt) {
		struct bio_vec *bv_in = bio_iovec_idx(bio_in, idx_in);
		struct bio_vec *bv_out = bio_iovec_idx(bio_out, idx_out);
		struct scatterlist sg_in = {
			.page = bv_in->bv_page,
			.offset = bv_in->bv_offset + offset_in,
			.length = 1 << SECTOR_SHIFT
		};
		struct scatterlist sg_out = {
			.page = bv_out->bv_page,
			.offset = bv_out->bv_offset + offset_out,
			.length = 1 << SECTOR_SHIFT
		};

		offset_in += sg_in.length;
		if (offset_in >= bv_in->bv_len) {
			offset_in = 0;
			idx_in++;
		}

		offset_out += sg_out.length;
		if (offset_out >= bv_out->bv_len) {
			offset_out = 0;
			idx_out++;
		}

		r = crypt_convert_scatterlist(cc, &sg_out, &sg_in, sg_in.length,
		                              write, sector);
		if (r < 0)
			break;

		sector++;
	}

	return r;
}

/*
 * Generate a new unfragmented bio with the given size
 * This shoule never violate the device limitations
 */
static struct bio *crypt_alloc_sized_bio(unsigned int size)
{
	struct bio *bio;
	struct bio_vec *bv;
	int nr_iovecs = dm_div_up(size, PAGE_SIZE);
	int i;

	bio = bio_alloc(__GFP_NOWARN, nr_iovecs);
	if (!bio)
		return NULL;

	bio->bi_vcnt = nr_iovecs;
	bio->bi_size = size;
	__bio_for_each_segment(bv, bio, i, 0) {
		bv->bv_page = alloc_page(__GFP_NOWARN|__GFP_HIGHMEM);
		if (!bv->bv_page)
			goto oom;

		bv->bv_offset = 0;
		if (size > PAGE_SIZE)
			bv->bv_len = PAGE_SIZE;
		else
			bv->bv_len = size;

		size -= PAGE_SIZE;
	}

	return bio;

oom:
	while(--i >= 0)
		__free_page(bio->bi_io_vec[i].bv_page);
	bio_put(bio);
	return NULL;
}

static struct bio *crypt_alloc_buffer(struct bio *bio)
{
	struct bio *new_bio;
	int retries = 50;

	/*
	 * When called on the page reclaim -> writepage path, this code can
	 * trivially consume all memory.  So we drop PF_MEMALLOC to avoid
	 * stealing all the page reserves and throttle to the writeout rate.
	 * pdflush will have been woken by page reclaim.  Let it do its work.
	 */
	do {
		int flags = current->flags;

		current->flags &= ~PF_MEMALLOC;
		new_bio = crypt_alloc_sized_bio(bio->bi_size);
		if (flags & PF_MEMALLOC)
			current->flags |= PF_MEMALLOC;

		if (!new_bio) {
			if (!--retries)
				return NULL;
			blk_congestion_wait(WRITE, HZ/10);
		}
	} while(!new_bio);

	new_bio->bi_rw = bio->bi_rw;

	return new_bio;
}

static void crypt_free_buffer(struct bio *bio)
{
	struct bio_vec *bv;
	int i;

	BUG_ON(bio_flagged(bio, BIO_CLONED));

	__bio_for_each_segment(bv, bio, i, 0)
		__free_page(bv->bv_page);

	bio_put(bio);
}

/*
 * Fetch next bio from work queue
 */
static struct bio *cryptio_next_bio(struct crypt_c *cc)
{
	struct bio *bio;

	spin_lock_irq(&cc->lock);
	bio = cc->bio_head;
	if (bio) {
		if (bio == cc->bio_tail)
			cc->bio_tail = NULL;
		cc->bio_head = bio->bi_next;
		bio->bi_next = NULL;
	}
	spin_unlock_irq(&cc->lock);

	return bio;
}

/*
 * Append bio to work queue
 */
static void cryptio_queue_bio(struct crypt_c *cc, struct bio *bio)
{
	unsigned long flags;

	spin_lock_irqsave(&cc->lock, flags);
	if (cc->bio_tail)
		cc->bio_tail->bi_next = bio;
	else
		cc->bio_head = bio;
	cc->bio_tail = bio;
	spin_unlock_irqrestore(&cc->lock, flags);
}

/*
 * Worker thread
 * Needed because we can't decrypt when called in an interrupt
 * context, so returning bios from read requests get queued here
 */
static int cryptio_thread(void *data)
{
	struct dm_target *ti = (struct dm_target*) data;
	struct crypt_c *cc = (struct crypt_c*) ti->private;
	int r;

	daemonize("dm-crypt");

	current->flags |= PF_IOTHREAD;
	set_user_nice(current, -20);

	up(&cc->sem);

	while (1) {
		struct bio *bio;
		struct crypt_io *io;

		while (down_interruptible(&cc->bh_mutex) == -EINTR);

		bio = cryptio_next_bio(cc);

		/* woken up but no data: termination signal */
		if (!bio)
			 break;

		io = (struct crypt_io*) bio->bi_private;

		r = crypt_convert(cc, io->bio, io->bio,
		                  io->bio->bi_sector - ti->begin, 0);

		bio_put(bio);

		bio_endio(io->bio, io->bio->bi_size, r);
		crypt_free_io(cc, io);
	}

	up(&cc->sem);
	return 0;
}

/*
 * Decode key from its hex representation
 */
static int crypt_decode_key(u8 *key, char *hex, int size)
{
	int i;
	for(i = 0; i < size; i++) {
		int digits;
		if (*hex >= 'a' && *hex <= 'f')
			digits = *hex - ('a' - 10);
		else if (*hex >= 'A' && *hex <= 'F')
			digits = *hex - ('A' - 10);
		else if (*hex >= '0' && *hex <= '9')
			digits = *hex - '0';
		else
			return -EINVAL;

		digits <<= 4;
		hex++;

		if (*hex >= 'a' && *hex <= 'f')
			digits += *hex - ('a' - 10);
		else if (*hex >= 'A' && *hex <= 'F')
			digits += *hex - ('A' - 10);
		else if (*hex >= '0' && *hex <= '9')
			digits += *hex - '0';
		else
			return -EINVAL;

		hex++;
		key[i] = (u8)digits;
	}

	if (*hex != '\0')
		return -EINVAL;

	return 0;
}

/*
 * Encode key into its hex representation
 */
static void crypt_encode_key(char *hex, u8 *key, int size)
{
	static char hex_digits[] = "0123456789abcdef";
	int i;

	for(i = 0; i < size; i++) {
		*hex++ = hex_digits[*key >> 4];
		*hex++ = hex_digits[*key & 0x0f];
		key++;
	}

	*key++ = '\0';
}

/*
 * Construct an encryption mapping:
 * <cipher> <key> <iv_offset> <dev_path> <start>
 */
static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
{
	struct crypt_c *cc;
	struct crypto_tfm *tfm;
	char *tmp;
	char *cipher;
	char *mode;
	int crypto_flags;
	int iv_size;
	int key_size;

	if (argc != 5) {
		ti->error = "dm-crypt: Not enough arguments";
		return -EINVAL;
	}

	tmp = argv[0];
	cipher = strsep(&tmp, "-");
	mode = strsep(&tmp, "-");

	if (tmp)
		DMWARN("dm-crypt: Unexpected additional cipher options");

	if (!mode || strcmp(mode, "cbc") == 0)
		crypto_flags = CRYPTO_TFM_MODE_CBC;
	else if (strcmp(mode, "ecb") == 0)
		crypto_flags = CRYPTO_TFM_MODE_ECB;
	else {
		ti->error = "dm-crypt: Invalid chaining mode";
		return -EINVAL;
	}

	tfm = crypto_alloc_tfm(cipher, crypto_flags);
	if (!tfm) {
		ti->error = "dm-crypt: Error allocating crypto tfm";
		return -EINVAL;
	}

	key_size = strlen(argv[1]) >> 1;
	if (tfm->crt_u.cipher.cit_decrypt_iv && tfm->crt_u.cipher.cit_encrypt_iv)
		iv_size = max(crypto_tfm_alg_ivsize(tfm), sizeof(u32) / sizeof(u8));
	else
		iv_size = 0;

	cc = kmalloc(sizeof(*cc) + key_size * sizeof(u8), GFP_KERNEL);
	if (cc == NULL) {
		ti->error = "dm-crypt: Cannot allocate transparent encryption context";
		crypto_free_tfm(tfm);
		return -ENOMEM;
	}

	cc->io_pool = mempool_create(MIN_IOS, mempool_alloc_slab,
				     mempool_free_slab, _io_cache);
	if (!cc->io_pool) {
		ti->error = "dm-crypt: Cannot allocate crypt io mempool";
		crypto_free_tfm(tfm);
		kfree(cc);
		return -ENOMEM;
	}

	cc->tfm = tfm;
	cc->iv_size = iv_size;
	cc->key_size = key_size;
	if ((key_size == 0 && strcmp(argv[1], "-") != 0)
	    || crypt_decode_key(cc->key, argv[1], key_size) < 0) {
		ti->error = "dm-crypt: Error decoding key";
		goto bad;
	}

	if (tfm->crt_u.cipher.cit_setkey(tfm, cc->key, key_size) < 0) {
		ti->error = "dm-crypt: Error setting key";
		goto bad;
	}

	if (sscanf(argv[2], SECTOR_FORMAT, &cc->iv_offset) != 1) {
		ti->error = "dm-crypt: Invalid iv_offset sector";
		goto bad;
	}

	if (sscanf(argv[4], SECTOR_FORMAT, &cc->start) != 1) {
		ti->error = "dm-crypt: Invalid device sector";
		goto bad;
	}

	if (dm_get_device(ti, argv[3], cc->start, ti->len,
	                  dm_table_get_mode(ti->table), &cc->dev)) {
		ti->error = "dm-crypt: Device lookup failed";
		goto bad;
	}

	spin_lock_init(&cc->lock);
	cc->bio_head = NULL;
	cc->bio_tail = NULL;
	init_MUTEX_LOCKED(&cc->sem);
	init_MUTEX_LOCKED(&cc->bh_mutex);

	ti->private = cc;
	kernel_thread(cryptio_thread, ti, CLONE_FS | CLONE_FILES | CLONE_SIGHAND);
	down(&cc->sem);

	return 0;

bad:
	mempool_destroy(cc->io_pool);
	crypto_free_tfm(tfm);
	kfree(cc);
	return -EINVAL;
}

static void crypt_dtr(struct dm_target *ti)
{
	struct crypt_c *cc = (struct crypt_c *) ti->private;

	/*
	 * device-mapper invokes us when there are no bios left
	 * this is the termination signal for the worker thread
	 */
	up(&cc->bh_mutex);

	/* wait for worker thread to terminate */
	down(&cc->sem);

	mempool_destroy(cc->io_pool);
	crypto_free_tfm(cc->tfm);
	dm_put_device(ti, cc->dev);
	kfree(cc);
}

static int crypt_endio(struct bio *bio, unsigned int done, int error)
{
	struct crypt_io *io = (struct crypt_io*) bio->bi_private;
	struct crypt_c *cc = (struct crypt_c*) io->target->private;

	if (bio->bi_size)
		return 1;

	if (bio_rw(bio) == WRITE)
		crypt_free_buffer(bio);
	else {
		/*
		 * successful reads get decrypted by the worker thread
		 * because we never want to decrypt in an irq context
		 */
		if (bio_flagged(bio, BIO_UPTODATE)) {
			cryptio_queue_bio(cc, bio);
			up(&cc->bh_mutex);

			return 0;
		}

		bio_put(bio);
	}

	bio_endio(io->bio, io->bio->bi_size, error);
	crypt_free_io(cc, io);

	return error;
}

static int crypt_map(struct dm_target *ti, struct bio *bio,
                     union map_info *map_context)
{
	struct crypt_c *cc = (struct crypt_c*) ti->private;
	struct crypt_io *io;
	struct bio *clone;
	int r = 0;

	if (bio_rw(bio) == WRITE) {
		clone = crypt_alloc_buffer(bio);
		if (clone) {
			r = crypt_convert(cc, clone, bio, bio->bi_sector - ti->begin, 1);
			if (r < 0) {
				crypt_free_buffer(clone);
				return r;
			}
		}
	} else
		clone = bio_clone(bio, GFP_NOIO);

	if (!clone)
		return -ENOMEM;

	io = crypt_alloc_io(cc);
	io->target = ti;
	io->bio = bio;
	io->clone = clone;

	clone->bi_private = io;
	clone->bi_end_io = crypt_endio;
	clone->bi_bdev = cc->dev->bdev;
	clone->bi_sector = cc->start + (bio->bi_sector - ti->begin);

	generic_make_request(clone);

	return 0;
}

static int crypt_status(struct dm_target *ti, status_type_t type,
		       char *result, unsigned int maxlen)
{
	struct crypt_c *cc = (struct crypt_c *) ti->private;
	char b[BDEVNAME_SIZE];
	const char *cipher;
	const char *mode = NULL;
	int offset;

	switch (type) {
	case STATUSTYPE_INFO:
		result[0] = '\0';
		break;

	case STATUSTYPE_TABLE:
		cipher = crypto_tfm_alg_name(cc->tfm);

		switch(cc->tfm->crt_u.cipher.cit_mode) {
		case CRYPTO_TFM_MODE_CBC:
			mode = "cbc";
			break;
		case CRYPTO_TFM_MODE_ECB:
			mode = "ecb";
			break;
		default:
			BUG();
		}

		snprintf(result, maxlen, "%s-%s ", cipher, mode);
		offset = strlen(result);

		if (cc->key_size > 0) {
			if ((maxlen - offset) < (cc->key_size << 1))
				return -ENOMEM;

			crypt_encode_key(result + offset, cc->key, cc->key_size);
			offset += cc->key_size << 1;
		} else {
			if (offset >= maxlen)
				return -ENOMEM;
			result[offset++] = '-';
		}

		snprintf(result + offset, maxlen - offset, " %s " SECTOR_FORMAT,
		         bdevname(cc->dev->bdev, b), cc->start);
		break;
	}
	return 0;
}

static struct target_type crypt_target = {
	.name   = "crypt",
	.module = THIS_MODULE,
	.ctr    = crypt_ctr,
	.dtr    = crypt_dtr,
	.map    = crypt_map,
	.status = crypt_status,
};

int __init dm_crypt_init(void)
{
	int r;

	_io_cache = kmem_cache_create("dm-crypt io", sizeof(struct crypt_io),
	                              0, 0, NULL, NULL);
	if (!_io_cache)
		return -ENOMEM;

	r = dm_register_target(&crypt_target);
	if (r < 0) {
		kmem_cache_destroy(_io_cache);
		DMERR("crypt: register failed %d", r);
	}

	return r;
}

void dm_crypt_exit(void)
{
	int r = dm_unregister_target(&crypt_target);

	if (r < 0)
		DMERR("crypt: unregister failed %d", r);

	kmem_cache_destroy(_io_cache);
}

/*
 * module hooks
 */
module_init(dm_crypt_init)
module_exit(dm_crypt_exit)

MODULE_AUTHOR("Christophe Saout <christophe@xxxxxxxx>");
MODULE_DESCRIPTION(DM_NAME " target for transparent encryption / decryption");
MODULE_LICENSE("GPL");

[Index of Archives]     [DM Crypt]     [Fedora Desktop]     [ATA RAID]     [Fedora Marketing]     [Fedora Packaging]     [Fedora SELinux]     [Yosemite Discussion]     [KDE Users]     [Fedora Docs]

  Powered by Linux