Making a 2.4.9-ac kernel diff for loop-AES-2.4-1.4e

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



(Jari, you don't want to know about this so hit D now ;-)

There are just three hunks of the kernel-2.4.diff patch against 2.4.10
that don't apply to 2.4.9-ac17.  They're relatively easy to apply by
hand, but replacing the entire loop.c section of Jari's kernel patch
with this diff could save 2.4.9-ac users that trouble.  The three hunks
that have changed have been proofread but only very lightly tested, so
caution is urged.

--- linux-2.4.9-ac17-vanilla/drivers/block/loop.c	Sat Sep 29 14:05:22 2001
+++ linux-2.4.9ac17/drivers/block/loop.c	Sat Sep 29 13:42:32 2001
@@ -49,6 +49,15 @@
  *   problem above. Encryption modules that used to rely on the old scheme
  *   should just call ->i_mapping->bmap() to calculate the physical block
  *   number.
+ *
+ * AES transfer added. IV is now passed as (512 byte) sector number.
+ * Jari Ruusu <jari.ruusu@xxxxxxxxxx>, May 18 2001
+ *
+ * External encryption module locking bug fixed.
+ * Ingo Rohloff <rohloff@xxxxxxxxx>, June 21 2001
+ *
+ * Make device backed loop work with swap (pre-allocated buffers + queue rewrite).
+ * Jari Ruusu <jari.ruusu@xxxxxxxxxx>, September 2 2001
  */ 
 
 #include <linux/config.h>
@@ -72,11 +81,11 @@
 #include <asm/uaccess.h>
 
 #include <linux/loop.h>		
+#include <linux/aes.h>
 
 #define MAJOR_NR LOOP_MAJOR
 
 static int max_loop = 8;
-static struct loop_device *loop_dev;
 static int *loop_sizes;
 static int *loop_blksizes;
 static devfs_handle_t devfs_handle;      /*  For the directory */
@@ -140,12 +149,351 @@
 	init: xor_status
 }; 	
 
+#if CONFIG_BLK_DEV_LOOP_AES
+int transfer_aes(struct loop_device *lo, int cmd, char *raw_buf,
+          char *loop_buf, int size, int devSect)
+{
+    register int        x;
+    union {
+        u_int32_t       w[8];
+        unsigned char   b[32];
+    } iv;
+
+    if(!size || (size & 511)) {
+        return -EINVAL;
+    }
+    if(cmd == READ) {
+        while(size) {
+            iv.w[0] = cpu_to_le32(devSect);
+            iv.w[3] = iv.w[2] = iv.w[1] = 0;
+            x = 16;
+            do {
+                memcpy(&iv.b[16], raw_buf, 16);
+                aes_decrypt((aes_context *)lo->key_data, raw_buf, loop_buf);
+                *((u_int32_t *)(&loop_buf[ 0])) ^= iv.w[0];
+                *((u_int32_t *)(&loop_buf[ 4])) ^= iv.w[1];
+                *((u_int32_t *)(&loop_buf[ 8])) ^= iv.w[2];
+                *((u_int32_t *)(&loop_buf[12])) ^= iv.w[3];
+                raw_buf += 16;
+                loop_buf += 16;
+                memcpy(&iv.b[0], raw_buf, 16);
+                aes_decrypt((aes_context *)lo->key_data, raw_buf, loop_buf);
+                *((u_int32_t *)(&loop_buf[ 0])) ^= iv.w[4];
+                *((u_int32_t *)(&loop_buf[ 4])) ^= iv.w[5];
+                *((u_int32_t *)(&loop_buf[ 8])) ^= iv.w[6];
+                *((u_int32_t *)(&loop_buf[12])) ^= iv.w[7];
+                raw_buf += 16;
+                loop_buf += 16;
+                if(current->need_resched) schedule();
+            } while(--x);
+            size -= 512;
+            devSect++;
+        }
+    } else {
+        while(size) {
+            iv.w[0] = cpu_to_le32(devSect);
+            iv.w[3] = iv.w[2] = iv.w[1] = 0;
+            x = 16;
+            do {
+                iv.w[0] ^= *((u_int32_t *)(&loop_buf[ 0]));
+                iv.w[1] ^= *((u_int32_t *)(&loop_buf[ 4]));
+                iv.w[2] ^= *((u_int32_t *)(&loop_buf[ 8]));
+                iv.w[3] ^= *((u_int32_t *)(&loop_buf[12]));
+                aes_encrypt((aes_context *)lo->key_data, &iv.b[0], raw_buf);
+                memcpy(&iv.b[0], raw_buf, 16);
+                loop_buf += 16;
+                raw_buf += 16;
+                iv.w[0] ^= *((u_int32_t *)(&loop_buf[ 0]));
+                iv.w[1] ^= *((u_int32_t *)(&loop_buf[ 4]));
+                iv.w[2] ^= *((u_int32_t *)(&loop_buf[ 8]));
+                iv.w[3] ^= *((u_int32_t *)(&loop_buf[12]));
+                aes_encrypt((aes_context *)lo->key_data, &iv.b[0], raw_buf);
+                memcpy(&iv.b[0], raw_buf, 16);
+                loop_buf += 16;
+                raw_buf += 16;
+                if(current->need_resched) schedule();
+            } while(--x);
+            size -= 512;
+            devSect++;
+        }
+    }
+    return(0);
+}
+
+int keySetup_aes(struct loop_device *lo, struct loop_info *info)
+{
+    lo->key_data = (aes_context *) kmalloc(sizeof(aes_context), GFP_KERNEL);
+    if(!lo->key_data) return(-ENOMEM);
+
+    aes_set_key((aes_context *)lo->key_data, &info->lo_encrypt_key[0],
+            info->lo_encrypt_key_size, 0);
+    memset(&info->lo_encrypt_key[0], 0, sizeof(info->lo_encrypt_key));
+    return(0);
+}
+
+int keyClean_aes(struct loop_device *lo)
+{
+    if(lo->key_data) {
+        memset(lo->key_data, 0, sizeof(aes_context));
+        kfree(lo->key_data);
+        lo->key_data = 0;
+    }
+    return(0);
+}
+
+static struct loop_func_table funcs_aes = { 
+    number:     LO_CRYPT_AES,
+    transfer:   transfer_aes,
+    init:       keySetup_aes,
+    release:    keyClean_aes
+};
+#endif /* CONFIG_BLK_DEV_LOOP_AES */
+
 /* xfer_funcs[0] is special - its release function is never called */ 
 struct loop_func_table *xfer_funcs[MAX_LO_CRYPT] = {
 	&none_funcs,
-	&xor_funcs  
+	&xor_funcs,
+#if CONFIG_BLK_DEV_LOOP_AES
+	[LO_CRYPT_AES] = &funcs_aes,
+#endif
 };
 
+/*
+ *  First number of 'lo_prealloc' is the default number of RAM pages
+ *  to pre-allocate for each device backed loop. Every (configured)
+ *  device backed loop pre-allocates this amount of RAM pages unless
+ *  later 'lo_prealloc' numbers provide an override. 'lo_prealloc'
+ *  overrides are defined in pairs: loop_index,number_of_pages
+ */
+static int lo_prealloc[9] = { 125, 999, 0, 999, 0, 999, 0, 999, 0 };
+#define LO_PREALLOC_MIN 4    /* minimum user defined pre-allocated RAM pages */
+#define LO_PREALLOC_MAX 512  /* maximum user defined pre-allocated RAM pages */
+
+#ifdef MODULE
+MODULE_PARM(lo_prealloc, "1-9i");
+MODULE_PARM_DESC(lo_prealloc, "Number of pre-allocated pages [,index,pages]...");
+#else
+static int __init lo_prealloc_setup(char *str)
+{
+    int x, y, z;
+
+    for(x = 0; x < (sizeof(lo_prealloc) / sizeof(int)); x++) {
+        z = get_option(&str, &y);
+        if(z > 0) lo_prealloc[x] = y;
+        if(z < 2) break;
+    }
+    return 1;
+}
+__setup("lo_prealloc=", lo_prealloc_setup);
+#endif
+
+typedef struct {
+    struct loop_device   lo_orig;
+    struct buffer_head   *lo_bhQue2;
+    struct buffer_head   *lo_bhFree;
+    int                  lo_bhFlsh;
+    int                  lo_bhNeed;
+    wait_queue_head_t    lo_bhWait;
+} LoDevExt;
+static LoDevExt *loop_dev;
+
+#define bhQue0(r) ((r)->lo_bh)
+#define bhQue1(r) ((r)->lo_bhtail)
+#define bhQue2(r) (((LoDevExt *)(r))->lo_bhQue2)
+#define bhFree(r) (((LoDevExt *)(r))->lo_bhFree)
+#define bhFlsh(r) (((LoDevExt *)(r))->lo_bhFlsh)
+#define bhNeed(r) (((LoDevExt *)(r))->lo_bhNeed)
+#define bhWait(r) (((LoDevExt *)(r))->lo_bhWait)
+
+typedef struct {
+    struct buffer_head  **q0;
+    struct buffer_head  **q1;
+    struct buffer_head  **q2;
+    int                 x0;    
+    int                 x1;    
+    int                 x2;    
+} QueLookUpTable;
+
+static void loop_prealloc_cleanup(struct loop_device *lo)
+{
+    struct buffer_head *bh;
+
+    while((bh = bhFree(lo))) {
+        __free_page(bh->b_page);
+        bhFree(lo) = bh->b_reqnext;
+        bh->b_reqnext = NULL;
+        kmem_cache_free(bh_cachep, bh);
+    }
+}
+
+static int loop_prealloc_init(struct loop_device *lo, int pgCnt)
+{
+    struct buffer_head *bh;
+    int x;
+
+    for(x = 0; x < pgCnt; x++) {
+        bh = kmem_cache_alloc(bh_cachep, SLAB_KERNEL);
+        if(!bh) {
+            loop_prealloc_cleanup(lo);
+            return(1);
+        }
+        bh->b_page = alloc_page(GFP_KERNEL);
+        if(!bh->b_page) {
+            bh->b_reqnext = NULL;
+            kmem_cache_free(bh_cachep, bh);
+            loop_prealloc_cleanup(lo);
+            return(1);
+        }
+        bh->b_reqnext = bhFree(lo);
+        bhFree(lo) = bh;
+    }
+    return(0);
+}
+
+static void loop_add_queue_last(struct loop_device *lo, struct buffer_head *bh, struct buffer_head **q)
+{
+    unsigned long flags;
+
+    spin_lock_irqsave(&lo->lo_lock, flags);
+    if(*q) {
+        bh->b_reqnext = (*q)->b_reqnext;
+        (*q)->b_reqnext = bh;
+    } else {
+        bh->b_reqnext = bh;
+    }
+    *q = bh;
+    spin_unlock_irqrestore(&lo->lo_lock, flags);
+
+    if(waitqueue_active(&bhWait(lo))) {
+        wake_up_interruptible(&bhWait(lo));
+    }
+}
+
+static void loop_add_queue_first(struct loop_device *lo, struct buffer_head *bh, struct buffer_head **q)
+{
+    spin_lock_irq(&lo->lo_lock);
+    if(*q) {
+        bh->b_reqnext = (*q)->b_reqnext;
+        (*q)->b_reqnext = bh;
+    } else {
+        bh->b_reqnext = bh;
+        *q = bh;
+    }
+    spin_unlock_irq(&lo->lo_lock);
+}
+
+static struct buffer_head *loop_get_bh(struct loop_device *lo, int *listNr, QueLookUpTable *qt)
+{
+    struct buffer_head *bh = NULL, *last;
+
+    spin_lock_irq(&lo->lo_lock);
+    if((last = *qt->q0)) {
+        bh = last->b_reqnext;
+        if(bh == last) {
+            *qt->q0 = NULL;
+        } else {
+            last->b_reqnext = bh->b_reqnext;
+        }
+        bh->b_reqnext = NULL;
+        *listNr = qt->x0;
+    } else if((last = *qt->q1)) {
+        bh = last->b_reqnext;
+        if(bh == last) {
+            *qt->q1 = NULL;
+        } else {
+            last->b_reqnext = bh->b_reqnext;
+        }
+        bh->b_reqnext = NULL;
+        *listNr = qt->x1;
+    } else if((last = *qt->q2)) {
+        bh = last->b_reqnext;
+        if(bh == last) {
+            *qt->q2 = NULL;
+        } else {
+            last->b_reqnext = bh->b_reqnext;
+        }
+        bh->b_reqnext = NULL;
+        *listNr = qt->x2;
+    }
+    spin_unlock_irq(&lo->lo_lock);
+    return bh;
+}
+
+static void loop_put_buffer(struct loop_device *lo, struct buffer_head *b)
+{
+    unsigned long flags;
+    int wk;
+
+    spin_lock_irqsave(&lo->lo_lock, flags);
+    b->b_reqnext = bhFree(lo);
+    bhFree(lo) = b;
+    wk = bhNeed(lo);
+    spin_unlock_irqrestore(&lo->lo_lock, flags);
+
+    if(wk && waitqueue_active(&bhWait(lo))) {
+        wake_up_interruptible(&bhWait(lo));
+    }
+}
+
+static void loop_end_io_transfer_wr(struct buffer_head *bh, int uptodate)
+{
+    struct loop_device *lo = (struct loop_device *)(&loop_dev[MINOR(bh->b_dev)]);
+    struct buffer_head *rbh = bh->b_private;
+
+    rbh->b_reqnext = NULL;
+    rbh->b_end_io(rbh, uptodate);
+    loop_put_buffer(lo, bh);
+    if(atomic_dec_and_test(&lo->lo_pending)) {
+        wake_up_interruptible(&bhWait(lo));
+    }
+}
+
+static void loop_end_io_transfer_rd(struct buffer_head *bh, int uptodate)
+{
+    struct loop_device *lo = (struct loop_device *)(&loop_dev[MINOR(bh->b_dev)]);
+
+    if(!uptodate) {
+        loop_end_io_transfer_wr(bh, uptodate);
+    } else {
+        loop_add_queue_last(lo, bh, &bhQue0(lo));
+    }
+}
+
+static struct buffer_head *loop_get_buffer(struct loop_device *lo,
+                       struct buffer_head *rbh, int fromThread, int rw)
+{
+    struct buffer_head *bh;
+    struct page *p;
+    unsigned long flags;
+
+    spin_lock_irqsave(&lo->lo_lock, flags);
+    bh = bhFree(lo);
+    if(bh) {
+        bhFree(lo) = bh->b_reqnext;
+        if(fromThread) bhNeed(lo) = 0;
+    } else {
+        if(fromThread) bhNeed(lo) = 1;
+    }
+    spin_unlock_irqrestore(&lo->lo_lock, flags);
+    if(!bh) return((struct buffer_head *)0);
+
+    p = bh->b_page;
+    memset(bh, 0, sizeof(struct buffer_head));
+    bh->b_page = p;
+
+    bh->b_private = rbh;
+    bh->b_size = rbh->b_size;
+    bh->b_dev = rbh->b_rdev;
+    bh->b_rdev = lo->lo_device;
+    bh->b_state = (1 << BH_Req) | (1 << BH_Mapped) | (1 << BH_Lock);
+    bh->b_data = page_address(bh->b_page);
+    bh->b_end_io = (rw == WRITE) ? loop_end_io_transfer_wr : loop_end_io_transfer_rd;
+    bh->b_rsector = rbh->b_rsector + (lo->lo_offset >> 9);
+    init_waitqueue_head(&bh->b_wait);
+
+    return bh;
+}
+
 #define MAX_DISK_SIZE 1024*1024*1024
 
 static int compute_loop_size(struct loop_device *lo, struct dentry * lo_dentry, kdev_t lodev)
@@ -165,8 +513,7 @@
 					lo->lo_device);
 }
 
-static int lo_send(struct loop_device *lo, struct buffer_head *bh, int bsize,
-		   loff_t pos)
+static int lo_send(struct loop_device *lo, struct buffer_head *bh, loff_t pos)
 {
 	struct file *file = lo->lo_backing_file; /* kudos to NFsckingS */
 	struct address_space *mapping = file->f_dentry->d_inode->i_mapping;
@@ -184,7 +531,7 @@
 	len = bh->b_size;
 	data = bh->b_data;
 	while (len > 0) {
-		int IV = index * (PAGE_CACHE_SIZE/bsize) + offset/bsize;
+		int IV = index * (PAGE_CACHE_SIZE >> 9) + (offset >> 9);
 		size = PAGE_CACHE_SIZE - offset;
 		if (size > len)
 			size = len;
@@ -207,7 +554,6 @@
 		len -= size;
 		offset = 0;
 		index++;
-		pos += size;
 		UnlockPage(page);
 		deactivate_page(page);
 		page_cache_release(page);
@@ -231,7 +577,6 @@
 struct lo_read_data {
 	struct loop_device *lo;
 	char *data;
-	int bsize;
 };
 
 static int lo_read_actor(read_descriptor_t * desc, struct page *page, unsigned long offset, unsigned long size)
@@ -240,7 +585,7 @@
 	unsigned long count = desc->count;
 	struct lo_read_data *p = (struct lo_read_data*)desc->buf;
 	struct loop_device *lo = p->lo;
-	int IV = page->index * (PAGE_CACHE_SIZE/p->bsize) + offset/p->bsize;
+	int IV = page->index * (PAGE_CACHE_SIZE >> 9) + (offset >> 9);
 
 	if (size > count)
 		size = count;
@@ -259,8 +604,7 @@
 	return size;
 }
 
-static int lo_receive(struct loop_device *lo, struct buffer_head *bh, int bsize,
-		      loff_t pos)
+static int lo_receive(struct loop_device *lo, struct buffer_head *bh, loff_t pos)
 {
 	struct lo_read_data cookie;
 	read_descriptor_t desc;
@@ -268,7 +612,6 @@
 
 	cookie.lo = lo;
 	cookie.data = bh->b_data;
-	cookie.bsize = bsize;
 	desc.written = 0;
 	desc.count = bh->b_size;
 	desc.buf = (char*)&cookie;
@@ -280,32 +623,6 @@
 	return desc.error;
 }
 
-static inline int loop_get_bs(struct loop_device *lo)
-{
-	int bs = 0;
-
-	if (blksize_size[MAJOR(lo->lo_device)])
-		bs = blksize_size[MAJOR(lo->lo_device)][MINOR(lo->lo_device)];
-	if (!bs)
-		bs = BLOCK_SIZE;	
-
-	return bs;
-}
-
-static inline unsigned long loop_get_iv(struct loop_device *lo,
-					unsigned long sector)
-{
-	int bs = loop_get_bs(lo);
-	unsigned long offset, IV;
-
-	IV = sector / (bs >> 9) + lo->lo_offset / bs;
-	offset = ((sector % (bs >> 9)) << 9) + lo->lo_offset % bs;
-	if (offset >= bs)
-		IV++;
-
-	return IV;
-}
-
 static int do_bh_filebacked(struct loop_device *lo, struct buffer_head *bh, int rw)
 {
 	loff_t pos;
@@ -314,129 +631,17 @@
 	pos = ((loff_t) bh->b_rsector << 9) + lo->lo_offset;
 
 	if (rw == WRITE)
-		ret = lo_send(lo, bh, loop_get_bs(lo), pos);
+		ret = lo_send(lo, bh, pos);
 	else
-		ret = lo_receive(lo, bh, loop_get_bs(lo), pos);
+		ret = lo_receive(lo, bh, pos);
 
 	return ret;
 }
 
-static void loop_put_buffer(struct buffer_head *bh)
-{
-	if (bh) {
-		__free_page(bh->b_page);
-		kmem_cache_free(bh_cachep, bh);
-	}
-}
-
-/*
- * Add buffer_head to back of pending list
- */
-static void loop_add_bh(struct loop_device *lo, struct buffer_head *bh)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&lo->lo_lock, flags);
-	if (lo->lo_bhtail) {
-		lo->lo_bhtail->b_reqnext = bh;
-		lo->lo_bhtail = bh;
-	} else
-		lo->lo_bh = lo->lo_bhtail = bh;
-	spin_unlock_irqrestore(&lo->lo_lock, flags);
-
-	up(&lo->lo_bh_mutex);
-}
-
-/*
- * Grab first pending buffer
- */
-static struct buffer_head *loop_get_bh(struct loop_device *lo)
-{
-	struct buffer_head *bh;
-
-	spin_lock_irq(&lo->lo_lock);
-	if ((bh = lo->lo_bh)) {
-		if (bh == lo->lo_bhtail)
-			lo->lo_bhtail = NULL;
-		lo->lo_bh = bh->b_reqnext;
-		bh->b_reqnext = NULL;
-	}
-	spin_unlock_irq(&lo->lo_lock);
-
-	return bh;
-}
-
-/*
- * when buffer i/o has completed. if BH_Dirty is set, this was a WRITE
- * and lo->transfer stuff has already been done. if not, it was a READ
- * so queue it for the loop thread and let it do the transfer out of
- * b_end_io context (we don't want to do decrypt of a page with irqs
- * disabled)
- */
-static void loop_end_io_transfer(struct buffer_head *bh, int uptodate)
-{
-	struct loop_device *lo = &loop_dev[MINOR(bh->b_dev)];
-
-	if (!uptodate || test_bit(BH_Dirty, &bh->b_state)) {
-		struct buffer_head *rbh = bh->b_private;
-
-		rbh->b_end_io(rbh, uptodate);
-		if (atomic_dec_and_test(&lo->lo_pending))
-			up(&lo->lo_bh_mutex);
-		loop_put_buffer(bh);
-	} else
-		loop_add_bh(lo, bh);
-}
-
-static struct buffer_head *loop_get_buffer(struct loop_device *lo,
-					   struct buffer_head *rbh)
-{
-	struct buffer_head *bh;
-
-	do {
-		bh = kmem_cache_alloc(bh_cachep, SLAB_NOIO);
-		if (bh)
-			break;
-
-		run_task_queue(&tq_disk);
-		schedule_timeout(HZ);
-	} while (1);
-	memset(bh, 0, sizeof(*bh));
-
-	bh->b_size = rbh->b_size;
-	bh->b_dev = rbh->b_rdev;
-	spin_lock_irq(&lo->lo_lock);
-	bh->b_rdev = lo->lo_device;
-	spin_unlock_irq(&lo->lo_lock);
-	bh->b_state = (1 << BH_Req) | (1 << BH_Mapped) | (1 << BH_Lock);
-
-	/*
-	 * easy way out, although it does waste some memory for < PAGE_SIZE
-	 * blocks... if highmem bounce buffering can get away with it,
-	 * so can we :-)
-	 */
-	do {
-		bh->b_page = alloc_page(GFP_NOIO);
-		if (bh->b_page)
-			break;
-
-		run_task_queue(&tq_disk);
-		schedule_timeout(HZ);
-	} while (1);
-
-	bh->b_data = page_address(bh->b_page);
-	bh->b_end_io = loop_end_io_transfer;
-	bh->b_rsector = rbh->b_rsector + (lo->lo_offset >> 9);
-	init_waitqueue_head(&bh->b_wait);
-
-	return bh;
-}
-
 static int loop_make_request(request_queue_t *q, int rw, struct buffer_head *rbh)
 {
-	struct buffer_head *bh = NULL;
+	struct buffer_head *bh;
 	struct loop_device *lo;
-	unsigned long IV;
 
 	if (!buffer_locked(rbh))
 		BUG();
@@ -444,7 +649,7 @@
 	if (MINOR(rbh->b_rdev) >= max_loop)
 		goto out;
 
-	lo = &loop_dev[MINOR(rbh->b_rdev)];
+	lo = (struct loop_device *)(&loop_dev[MINOR(rbh->b_rdev)]);
 	spin_lock_irq(&lo->lo_lock);
 	if (lo->lo_state != Lo_bound)
 		goto inactive;
@@ -469,36 +674,31 @@
 	 * file backed, queue for loop_thread to handle
 	 */
 	if (lo->lo_flags & LO_FLAGS_DO_BMAP) {
-		/*
-		 * rbh locked at this point, noone else should clear
-		 * the dirty flag
-		 */
-		if (rw == WRITE)
-			set_bit(BH_Dirty, &rbh->b_state);
-		loop_add_bh(lo, rbh);
+		loop_add_queue_last(lo, rbh, (rw == WRITE) ? &bhQue1(lo) : &bhQue0(lo));
 		return 0;
 	}
 
 	/*
 	 * piggy old buffer on original, and submit for I/O
+	 * device backed, start reads now, queue writes for thread to handle
 	 */
-	bh = loop_get_buffer(lo, rbh);
-	bh->b_private = rbh;
-	IV = loop_get_iv(lo, bh->b_rsector);
-	if (rw == WRITE) {
-		set_bit(BH_Dirty, &bh->b_state);
-		if (lo_do_transfer(lo, WRITE, bh->b_data, rbh->b_data,
-				   bh->b_size, IV))
-			goto err;
+	if(rw == READ) {
+		bh = loop_get_buffer(lo, rbh, 0, rw);
+	} else {
+		bh = NULL;
+	}
+	if(!bh) {
+		/* just queue request and let thread handle alloc later */
+		loop_add_queue_last(lo, rbh, (rw == WRITE) ? &bhQue1(lo) : &bhQue2(lo));
+		return 0;
 	}
-
 	generic_make_request(rw, bh);
 	return 0;
 
 err:
-	if (atomic_dec_and_test(&lo->lo_pending))
-		up(&lo->lo_bh_mutex);
-	loop_put_buffer(bh);
+	if (atomic_dec_and_test(&lo->lo_pending)) {
+		wake_up_interruptible(&bhWait(lo));
+	}
 out:
 	buffer_IO_error(rbh);
 	return 0;
@@ -507,41 +707,23 @@
 	goto out;
 }
 
-static inline void loop_handle_bh(struct loop_device *lo,struct buffer_head *bh)
-{
-	int ret;
-
-	/*
-	 * For block backed loop, we know this is a READ
-	 */
-	if (lo->lo_flags & LO_FLAGS_DO_BMAP) {
-		int rw = !!test_and_clear_bit(BH_Dirty, &bh->b_state);
-
-		ret = do_bh_filebacked(lo, bh, rw);
-		bh->b_end_io(bh, !ret);
-	} else {
-		struct buffer_head *rbh = bh->b_private;
-		unsigned long IV = loop_get_iv(lo, rbh->b_rsector);
-
-		ret = lo_do_transfer(lo, READ, bh->b_data, rbh->b_data,
-				     bh->b_size, IV);
-
-		rbh->b_end_io(rbh, !ret);
-		loop_put_buffer(bh);
-	}
-}
-
 /*
- * worker thread that handles reads/writes to file backed loop devices,
- * to avoid blocking in our make_request_fn. it also does loop decrypting
- * on reads for block backed loop, as that is too heavy to do from
- * b_end_io context where irqs may be disabled.
+ * worker thread that handles all encryption and decryption.
  */
 static int loop_thread(void *data)
 {
 	struct loop_device *lo = data;
-	struct buffer_head *bh;
+	struct buffer_head *bh, *xbh;
+	int x, rw, qi = 0, flushcnt = 0;
+	wait_queue_t waitq;
+	QueLookUpTable qt[4] = {
+		{ &bhQue0(lo), &bhQue1(lo), &bhQue2(lo), 0, 1, 2 },
+		{ &bhQue2(lo), &bhQue0(lo), &bhQue1(lo), 2, 0, 1 },
+		{ &bhQue0(lo), &bhQue2(lo), &bhQue1(lo), 0, 2, 1 },
+		{ &bhQue1(lo), &bhQue0(lo), &bhQue2(lo), 1, 0, 2 }
+	};
 
+	init_waitqueue_entry(&waitq, current);  
 	daemonize();
 	exit_files(current);
 
@@ -566,27 +748,101 @@
 	up(&lo->lo_sem);
 
 	for (;;) {
-		down_interruptible(&lo->lo_bh_mutex);
+		add_wait_queue(&bhWait(lo), &waitq);
+		for(;;) {
+			set_current_state(TASK_INTERRUPTIBLE);
+			if(!atomic_read(&lo->lo_pending)) break;
+
+			x = 0;
+			spin_lock_irq(&lo->lo_lock);
+			if(bhQue0(lo)) {
+				x = 1;
+			} else if(bhQue1(lo) || bhQue2(lo)) {
+				/* file backed works too because bhNeed(lo) == 0 */
+				if(bhFree(lo) || !bhNeed(lo)) x = 1;
+			}
+			spin_unlock_irq(&lo->lo_lock);
+			if(x) break;
+
+			schedule();
+		}
+		current->state = TASK_RUNNING;
+		remove_wait_queue(&bhWait(lo), &waitq);
+
 		/*
-		 * could be upped because of tear-down, not because of
+		 * could be woken because of tear-down, not because of
 		 * pending work
 		 */
-		if (!atomic_read(&lo->lo_pending))
-			break;
+		if(!atomic_read(&lo->lo_pending)) break;
+
+		/*
+		 * read queues using alternating order to prevent starvation
+		 */
+		bh = loop_get_bh(lo, &x, &qt[++qi & 3]);
+		if(!bh) continue;
+
+		/*
+		 *  x  list tag    usage(buffer-allocated)
+		 * --- --------    -----------------------
+		 *  0  bhQue0(lo)  dev-read(y) / file-read
+		 *  1  bhQue1(lo)  dev-write(n) / file-write
+		 *  2  bhQue2(lo)  dev-read(n)
+		 */
+		rw = (x == 1) ? WRITE : READ;
+		if((x >= 1) && !(lo->lo_flags & LO_FLAGS_DO_BMAP)) {
+			/* loop_make_request didn't allocate a buffer, do that now */
+			xbh = loop_get_buffer(lo, bh, 1, rw);
+			if(!xbh) {
+				run_task_queue(&tq_disk);
+				flushcnt = 0;
+				loop_add_queue_first(lo, bh, (rw == WRITE) ? &bhQue1(lo) : &bhQue2(lo));
+				/* bhNeed(lo) should be 1 now, go back to sleep */
+				continue;
+			}
+			if(rw == WRITE) {
+				if(lo_do_transfer(lo, WRITE, xbh->b_data, bh->b_data, xbh->b_size, xbh->b_rsector)) {
+					loop_put_buffer(lo, xbh);
+					buffer_IO_error(bh);
+					atomic_dec(&lo->lo_pending);
+					continue;
+				}
+			}
+			generic_make_request(rw, xbh);
+
+			/* start I/O if there are no more requests lacking buffers */
+			x = 0;
+			spin_lock_irq(&lo->lo_lock);
+			if(!bhQue1(lo) && !bhQue2(lo)) x = 1;
+			spin_unlock_irq(&lo->lo_lock);
+			if(x || (++flushcnt >= bhFlsh(lo))) {
+				run_task_queue(&tq_disk);
+				flushcnt = 0;
+			}
 
-		bh = loop_get_bh(lo);
-		if (!bh) {
-			printk("loop: missing bh\n");
+			/* request not completely processed yet */
 			continue;
 		}
-		loop_handle_bh(lo, bh);
+		if(lo->lo_flags & LO_FLAGS_DO_BMAP) {
+			/* request is for file backed device */
+			x = do_bh_filebacked(lo, bh, rw);
+			bh->b_reqnext = NULL;
+			bh->b_end_io(bh, !x);
+		} else {
+			/* device backed read has completed, do decrypt now */
+			xbh = bh->b_private;
+			/* must not use bh->b_rsector as IV, as it may be modified by LVM at this point */
+			/* instead, recompute IV from original request */
+			x = lo_do_transfer(lo, READ, bh->b_data, xbh->b_data, bh->b_size, xbh->b_rsector + (lo->lo_offset >> 9));
+			xbh->b_reqnext = NULL;
+			xbh->b_end_io(xbh, !x);
+			loop_put_buffer(lo, bh);
+		}
 
 		/*
-		 * upped both for pending work and tear-down, lo_pending
+		 * woken both for pending work and tear-down, lo_pending
 		 * will hit zero then
 		 */
-		if (atomic_dec_and_test(&lo->lo_pending))
-			break;
+		if(atomic_dec_and_test(&lo->lo_pending)) break;
 	}
 
 	up(&lo->lo_sem);
@@ -620,7 +876,22 @@
 	if (!(file->f_mode & FMODE_WRITE))
 		lo_flags |= LO_FLAGS_READ_ONLY;
 
+	bhFree(lo) = bhQue2(lo) = bhQue1(lo) = bhQue0(lo) = NULL;
+	bhNeed(lo) = bhFlsh(lo) = 0;
+	init_waitqueue_head(&bhWait(lo));
 	if (S_ISBLK(inode->i_mode)) {
+		int i, x = lo_prealloc[0];
+		for(i = 1; i < (sizeof(lo_prealloc) / sizeof(int)); i += 2) {
+			if(lo_prealloc[i+1] && (lo->lo_number == lo_prealloc[i])) {
+				x = lo_prealloc[i+1];
+				break;
+			}
+		}
+		bhFlsh(lo) = (x * 3) / 4;
+		if(loop_prealloc_init(lo, x)) {
+			error = -ENOMEM;
+			goto out_putf;
+		}
 		lo_device = inode->i_rdev;
 	} else if (S_ISREG(inode->i_mode)) {
 		struct address_space_operations *aops = inode->i_mapping->a_ops;
@@ -664,9 +935,9 @@
 	if (!bs)
 		bs = BLOCK_SIZE;
 
+	if(S_ISREG(inode->i_mode)) bs = BLOCK_SIZE;
 	set_blocksize(dev, bs);
 
-	lo->lo_bh = lo->lo_bhtail = NULL;
 	kernel_thread(loop_thread, lo, CLONE_FS | CLONE_FILES | CLONE_SIGHAND);
 	down(&lo->lo_sem);
 
@@ -724,12 +995,14 @@
 
 	spin_lock_irq(&lo->lo_lock);
 	lo->lo_state = Lo_rundown;
-	if (atomic_dec_and_test(&lo->lo_pending))
-		up(&lo->lo_bh_mutex);
+	if(atomic_dec_and_test(&lo->lo_pending)) {
+		wake_up_interruptible(&bhWait(lo));
+	}
 	spin_unlock_irq(&lo->lo_lock);
 
 	down(&lo->lo_sem);
 
+	loop_prealloc_cleanup(lo);
 	lo->lo_backing_file = NULL;
 
 	loop_release_xfer(lo);
@@ -836,7 +1109,7 @@
 	dev = MINOR(inode->i_rdev);
 	if (dev >= max_loop)
 		return -ENODEV;
-	lo = &loop_dev[dev];
+	lo = (struct loop_device *)(&loop_dev[dev]);
 	down(&lo->lo_ctl_mutex);
 	switch (cmd) {
 	case LOOP_SET_FD:
@@ -879,7 +1152,7 @@
 static int lo_open(struct inode *inode, struct file *file)
 {
 	struct loop_device *lo;
-	int	dev, type;
+	int	dev;
 
 	if (!inode)
 		return -EINVAL;
@@ -891,13 +1164,9 @@
 	if (dev >= max_loop)
 		return -ENODEV;
 
-	lo = &loop_dev[dev];
+	lo = (struct loop_device *)(&loop_dev[dev]);
 	MOD_INC_USE_COUNT;
 	down(&lo->lo_ctl_mutex);
-
-	type = lo->lo_encrypt_type; 
-	if (type && xfer_funcs[type] && xfer_funcs[type]->lock)
-		xfer_funcs[type]->lock(lo);
 	lo->lo_refcnt++;
 	up(&lo->lo_ctl_mutex);
 	return 0;
@@ -906,7 +1175,7 @@
 static int lo_release(struct inode *inode, struct file *file)
 {
 	struct loop_device *lo;
-	int	dev, type;
+	int	dev;
 
 	if (!inode)
 		return 0;
@@ -919,13 +1188,9 @@
 	if (dev >= max_loop)
 		return 0;
 
-	lo = &loop_dev[dev];
+	lo = (struct loop_device *)(&loop_dev[dev]);
 	down(&lo->lo_ctl_mutex);
-	type = lo->lo_encrypt_type;
 	--lo->lo_refcnt;
-	if (xfer_funcs[type] && xfer_funcs[type]->unlock)
-		xfer_funcs[type]->unlock(lo);
-
 	up(&lo->lo_ctl_mutex);
 	MOD_DEC_USE_COUNT;
 	return 0;
@@ -955,11 +1220,13 @@
 int loop_unregister_transfer(int number)
 {
 	struct loop_device *lo; 
+	int x, type;
 
 	if ((unsigned)number >= MAX_LO_CRYPT)
 		return -EINVAL; 
-	for (lo = &loop_dev[0]; lo < &loop_dev[max_loop]; lo++) { 
-		int type = lo->lo_encrypt_type;
+	for(x = 0; x < max_loop; x++) {
+		lo = (struct loop_device *)(&loop_dev[x]);
+		type = lo->lo_encrypt_type;
 		if (type == number) { 
 			xfer_funcs[type]->release(lo);
 			lo->transfer = NULL; 
@@ -995,7 +1262,7 @@
 			      S_IFBLK | S_IRUSR | S_IWUSR | S_IRGRP,
 			      &lo_fops, NULL);
 
-	loop_dev = kmalloc(max_loop * sizeof(struct loop_device), GFP_KERNEL);
+	loop_dev = kmalloc(max_loop * sizeof(LoDevExt), GFP_KERNEL);
 	if (!loop_dev)
 		return -ENOMEM;
 
@@ -1010,8 +1277,8 @@
 	blk_queue_make_request(BLK_DEFAULT_QUEUE(MAJOR_NR), loop_make_request);
 
 	for (i = 0; i < max_loop; i++) {
-		struct loop_device *lo = &loop_dev[i];
-		memset(lo, 0, sizeof(struct loop_device));
+		struct loop_device *lo = (struct loop_device *)(&loop_dev[i]);
+		memset(lo, 0, sizeof(LoDevExt));
 		init_MUTEX(&lo->lo_ctl_mutex);
 		init_MUTEX_LOCKED(&lo->lo_sem);
 		init_MUTEX_LOCKED(&lo->lo_bh_mutex);
@@ -1026,13 +1293,18 @@
 	for (i = 0; i < max_loop; i++)
 		register_disk(NULL, MKDEV(MAJOR_NR, i), 1, &lo_fops, 0);
 
+	for(i = 0; i < (sizeof(lo_prealloc) / sizeof(int)); i += 2) {
+		if(!lo_prealloc[i]) continue;
+		if(lo_prealloc[i] < LO_PREALLOC_MIN) lo_prealloc[i] = LO_PREALLOC_MIN;
+		if(lo_prealloc[i] > LO_PREALLOC_MAX) lo_prealloc[i] = LO_PREALLOC_MAX;
+	}
 	printk(KERN_INFO "loop: loaded (max %d devices)\n", max_loop);
 	return 0;
 
-out_sizes:
-	kfree(loop_dev);
 out_blksizes:
 	kfree(loop_sizes);
+out_sizes:
+	kfree(loop_dev);
 	printk(KERN_ERR "loop: ran out of memory\n");
 	return -ENOMEM;
 }


-- 
| G r e g  L o u i s          | gpg public key:      |
|   http://www.bgl.nu/~glouis |   finger greg@xxxxxx |

Linux-crypto:  cryptography in and on the Linux system
Archive:       http://mail.nl.linux.org/linux-crypto/


[Index of Archives]     [Kernel]     [Linux Crypto]     [Gnu Crypto]     [Gnu Classpath]     [Netfilter]     [Bugtraq]
  Powered by Linux