(Jari, you don't want to know about this so hit D now ;-) There are just three hunks of the kernel-2.4.diff patch against 2.4.10 that don't apply to 2.4.9-ac17. They're relatively easy to apply by hand, but replacing the entire loop.c section of Jari's kernel patch with this diff could save 2.4.9-ac users that trouble. The three hunks that have changed have been proofread but only very lightly tested, so caution is urged. --- linux-2.4.9-ac17-vanilla/drivers/block/loop.c Sat Sep 29 14:05:22 2001 +++ linux-2.4.9ac17/drivers/block/loop.c Sat Sep 29 13:42:32 2001 @@ -49,6 +49,15 @@ * problem above. Encryption modules that used to rely on the old scheme * should just call ->i_mapping->bmap() to calculate the physical block * number. + * + * AES transfer added. IV is now passed as (512 byte) sector number. + * Jari Ruusu <jari.ruusu@xxxxxxxxxx>, May 18 2001 + * + * External encryption module locking bug fixed. + * Ingo Rohloff <rohloff@xxxxxxxxx>, June 21 2001 + * + * Make device backed loop work with swap (pre-allocated buffers + queue rewrite). + * Jari Ruusu <jari.ruusu@xxxxxxxxxx>, September 2 2001 */ #include <linux/config.h> @@ -72,11 +81,11 @@ #include <asm/uaccess.h> #include <linux/loop.h> +#include <linux/aes.h> #define MAJOR_NR LOOP_MAJOR static int max_loop = 8; -static struct loop_device *loop_dev; static int *loop_sizes; static int *loop_blksizes; static devfs_handle_t devfs_handle; /* For the directory */ @@ -140,12 +149,351 @@ init: xor_status }; +#if CONFIG_BLK_DEV_LOOP_AES +int transfer_aes(struct loop_device *lo, int cmd, char *raw_buf, + char *loop_buf, int size, int devSect) +{ + register int x; + union { + u_int32_t w[8]; + unsigned char b[32]; + } iv; + + if(!size || (size & 511)) { + return -EINVAL; + } + if(cmd == READ) { + while(size) { + iv.w[0] = cpu_to_le32(devSect); + iv.w[3] = iv.w[2] = iv.w[1] = 0; + x = 16; + do { + memcpy(&iv.b[16], raw_buf, 16); + aes_decrypt((aes_context *)lo->key_data, raw_buf, loop_buf); + *((u_int32_t *)(&loop_buf[ 0])) ^= iv.w[0]; + *((u_int32_t *)(&loop_buf[ 4])) ^= iv.w[1]; + *((u_int32_t *)(&loop_buf[ 8])) ^= iv.w[2]; + *((u_int32_t *)(&loop_buf[12])) ^= iv.w[3]; + raw_buf += 16; + loop_buf += 16; + memcpy(&iv.b[0], raw_buf, 16); + aes_decrypt((aes_context *)lo->key_data, raw_buf, loop_buf); + *((u_int32_t *)(&loop_buf[ 0])) ^= iv.w[4]; + *((u_int32_t *)(&loop_buf[ 4])) ^= iv.w[5]; + *((u_int32_t *)(&loop_buf[ 8])) ^= iv.w[6]; + *((u_int32_t *)(&loop_buf[12])) ^= iv.w[7]; + raw_buf += 16; + loop_buf += 16; + if(current->need_resched) schedule(); + } while(--x); + size -= 512; + devSect++; + } + } else { + while(size) { + iv.w[0] = cpu_to_le32(devSect); + iv.w[3] = iv.w[2] = iv.w[1] = 0; + x = 16; + do { + iv.w[0] ^= *((u_int32_t *)(&loop_buf[ 0])); + iv.w[1] ^= *((u_int32_t *)(&loop_buf[ 4])); + iv.w[2] ^= *((u_int32_t *)(&loop_buf[ 8])); + iv.w[3] ^= *((u_int32_t *)(&loop_buf[12])); + aes_encrypt((aes_context *)lo->key_data, &iv.b[0], raw_buf); + memcpy(&iv.b[0], raw_buf, 16); + loop_buf += 16; + raw_buf += 16; + iv.w[0] ^= *((u_int32_t *)(&loop_buf[ 0])); + iv.w[1] ^= *((u_int32_t *)(&loop_buf[ 4])); + iv.w[2] ^= *((u_int32_t *)(&loop_buf[ 8])); + iv.w[3] ^= *((u_int32_t *)(&loop_buf[12])); + aes_encrypt((aes_context *)lo->key_data, &iv.b[0], raw_buf); + memcpy(&iv.b[0], raw_buf, 16); + loop_buf += 16; + raw_buf += 16; + if(current->need_resched) schedule(); + } while(--x); + size -= 512; + devSect++; + } + } + return(0); +} + +int keySetup_aes(struct loop_device *lo, struct loop_info *info) +{ + lo->key_data = (aes_context *) kmalloc(sizeof(aes_context), GFP_KERNEL); + if(!lo->key_data) return(-ENOMEM); + + aes_set_key((aes_context *)lo->key_data, &info->lo_encrypt_key[0], + info->lo_encrypt_key_size, 0); + memset(&info->lo_encrypt_key[0], 0, sizeof(info->lo_encrypt_key)); + return(0); +} + +int keyClean_aes(struct loop_device *lo) +{ + if(lo->key_data) { + memset(lo->key_data, 0, sizeof(aes_context)); + kfree(lo->key_data); + lo->key_data = 0; + } + return(0); +} + +static struct loop_func_table funcs_aes = { + number: LO_CRYPT_AES, + transfer: transfer_aes, + init: keySetup_aes, + release: keyClean_aes +}; +#endif /* CONFIG_BLK_DEV_LOOP_AES */ + /* xfer_funcs[0] is special - its release function is never called */ struct loop_func_table *xfer_funcs[MAX_LO_CRYPT] = { &none_funcs, - &xor_funcs + &xor_funcs, +#if CONFIG_BLK_DEV_LOOP_AES + [LO_CRYPT_AES] = &funcs_aes, +#endif }; +/* + * First number of 'lo_prealloc' is the default number of RAM pages + * to pre-allocate for each device backed loop. Every (configured) + * device backed loop pre-allocates this amount of RAM pages unless + * later 'lo_prealloc' numbers provide an override. 'lo_prealloc' + * overrides are defined in pairs: loop_index,number_of_pages + */ +static int lo_prealloc[9] = { 125, 999, 0, 999, 0, 999, 0, 999, 0 }; +#define LO_PREALLOC_MIN 4 /* minimum user defined pre-allocated RAM pages */ +#define LO_PREALLOC_MAX 512 /* maximum user defined pre-allocated RAM pages */ + +#ifdef MODULE +MODULE_PARM(lo_prealloc, "1-9i"); +MODULE_PARM_DESC(lo_prealloc, "Number of pre-allocated pages [,index,pages]..."); +#else +static int __init lo_prealloc_setup(char *str) +{ + int x, y, z; + + for(x = 0; x < (sizeof(lo_prealloc) / sizeof(int)); x++) { + z = get_option(&str, &y); + if(z > 0) lo_prealloc[x] = y; + if(z < 2) break; + } + return 1; +} +__setup("lo_prealloc=", lo_prealloc_setup); +#endif + +typedef struct { + struct loop_device lo_orig; + struct buffer_head *lo_bhQue2; + struct buffer_head *lo_bhFree; + int lo_bhFlsh; + int lo_bhNeed; + wait_queue_head_t lo_bhWait; +} LoDevExt; +static LoDevExt *loop_dev; + +#define bhQue0(r) ((r)->lo_bh) +#define bhQue1(r) ((r)->lo_bhtail) +#define bhQue2(r) (((LoDevExt *)(r))->lo_bhQue2) +#define bhFree(r) (((LoDevExt *)(r))->lo_bhFree) +#define bhFlsh(r) (((LoDevExt *)(r))->lo_bhFlsh) +#define bhNeed(r) (((LoDevExt *)(r))->lo_bhNeed) +#define bhWait(r) (((LoDevExt *)(r))->lo_bhWait) + +typedef struct { + struct buffer_head **q0; + struct buffer_head **q1; + struct buffer_head **q2; + int x0; + int x1; + int x2; +} QueLookUpTable; + +static void loop_prealloc_cleanup(struct loop_device *lo) +{ + struct buffer_head *bh; + + while((bh = bhFree(lo))) { + __free_page(bh->b_page); + bhFree(lo) = bh->b_reqnext; + bh->b_reqnext = NULL; + kmem_cache_free(bh_cachep, bh); + } +} + +static int loop_prealloc_init(struct loop_device *lo, int pgCnt) +{ + struct buffer_head *bh; + int x; + + for(x = 0; x < pgCnt; x++) { + bh = kmem_cache_alloc(bh_cachep, SLAB_KERNEL); + if(!bh) { + loop_prealloc_cleanup(lo); + return(1); + } + bh->b_page = alloc_page(GFP_KERNEL); + if(!bh->b_page) { + bh->b_reqnext = NULL; + kmem_cache_free(bh_cachep, bh); + loop_prealloc_cleanup(lo); + return(1); + } + bh->b_reqnext = bhFree(lo); + bhFree(lo) = bh; + } + return(0); +} + +static void loop_add_queue_last(struct loop_device *lo, struct buffer_head *bh, struct buffer_head **q) +{ + unsigned long flags; + + spin_lock_irqsave(&lo->lo_lock, flags); + if(*q) { + bh->b_reqnext = (*q)->b_reqnext; + (*q)->b_reqnext = bh; + } else { + bh->b_reqnext = bh; + } + *q = bh; + spin_unlock_irqrestore(&lo->lo_lock, flags); + + if(waitqueue_active(&bhWait(lo))) { + wake_up_interruptible(&bhWait(lo)); + } +} + +static void loop_add_queue_first(struct loop_device *lo, struct buffer_head *bh, struct buffer_head **q) +{ + spin_lock_irq(&lo->lo_lock); + if(*q) { + bh->b_reqnext = (*q)->b_reqnext; + (*q)->b_reqnext = bh; + } else { + bh->b_reqnext = bh; + *q = bh; + } + spin_unlock_irq(&lo->lo_lock); +} + +static struct buffer_head *loop_get_bh(struct loop_device *lo, int *listNr, QueLookUpTable *qt) +{ + struct buffer_head *bh = NULL, *last; + + spin_lock_irq(&lo->lo_lock); + if((last = *qt->q0)) { + bh = last->b_reqnext; + if(bh == last) { + *qt->q0 = NULL; + } else { + last->b_reqnext = bh->b_reqnext; + } + bh->b_reqnext = NULL; + *listNr = qt->x0; + } else if((last = *qt->q1)) { + bh = last->b_reqnext; + if(bh == last) { + *qt->q1 = NULL; + } else { + last->b_reqnext = bh->b_reqnext; + } + bh->b_reqnext = NULL; + *listNr = qt->x1; + } else if((last = *qt->q2)) { + bh = last->b_reqnext; + if(bh == last) { + *qt->q2 = NULL; + } else { + last->b_reqnext = bh->b_reqnext; + } + bh->b_reqnext = NULL; + *listNr = qt->x2; + } + spin_unlock_irq(&lo->lo_lock); + return bh; +} + +static void loop_put_buffer(struct loop_device *lo, struct buffer_head *b) +{ + unsigned long flags; + int wk; + + spin_lock_irqsave(&lo->lo_lock, flags); + b->b_reqnext = bhFree(lo); + bhFree(lo) = b; + wk = bhNeed(lo); + spin_unlock_irqrestore(&lo->lo_lock, flags); + + if(wk && waitqueue_active(&bhWait(lo))) { + wake_up_interruptible(&bhWait(lo)); + } +} + +static void loop_end_io_transfer_wr(struct buffer_head *bh, int uptodate) +{ + struct loop_device *lo = (struct loop_device *)(&loop_dev[MINOR(bh->b_dev)]); + struct buffer_head *rbh = bh->b_private; + + rbh->b_reqnext = NULL; + rbh->b_end_io(rbh, uptodate); + loop_put_buffer(lo, bh); + if(atomic_dec_and_test(&lo->lo_pending)) { + wake_up_interruptible(&bhWait(lo)); + } +} + +static void loop_end_io_transfer_rd(struct buffer_head *bh, int uptodate) +{ + struct loop_device *lo = (struct loop_device *)(&loop_dev[MINOR(bh->b_dev)]); + + if(!uptodate) { + loop_end_io_transfer_wr(bh, uptodate); + } else { + loop_add_queue_last(lo, bh, &bhQue0(lo)); + } +} + +static struct buffer_head *loop_get_buffer(struct loop_device *lo, + struct buffer_head *rbh, int fromThread, int rw) +{ + struct buffer_head *bh; + struct page *p; + unsigned long flags; + + spin_lock_irqsave(&lo->lo_lock, flags); + bh = bhFree(lo); + if(bh) { + bhFree(lo) = bh->b_reqnext; + if(fromThread) bhNeed(lo) = 0; + } else { + if(fromThread) bhNeed(lo) = 1; + } + spin_unlock_irqrestore(&lo->lo_lock, flags); + if(!bh) return((struct buffer_head *)0); + + p = bh->b_page; + memset(bh, 0, sizeof(struct buffer_head)); + bh->b_page = p; + + bh->b_private = rbh; + bh->b_size = rbh->b_size; + bh->b_dev = rbh->b_rdev; + bh->b_rdev = lo->lo_device; + bh->b_state = (1 << BH_Req) | (1 << BH_Mapped) | (1 << BH_Lock); + bh->b_data = page_address(bh->b_page); + bh->b_end_io = (rw == WRITE) ? loop_end_io_transfer_wr : loop_end_io_transfer_rd; + bh->b_rsector = rbh->b_rsector + (lo->lo_offset >> 9); + init_waitqueue_head(&bh->b_wait); + + return bh; +} + #define MAX_DISK_SIZE 1024*1024*1024 static int compute_loop_size(struct loop_device *lo, struct dentry * lo_dentry, kdev_t lodev) @@ -165,8 +513,7 @@ lo->lo_device); } -static int lo_send(struct loop_device *lo, struct buffer_head *bh, int bsize, - loff_t pos) +static int lo_send(struct loop_device *lo, struct buffer_head *bh, loff_t pos) { struct file *file = lo->lo_backing_file; /* kudos to NFsckingS */ struct address_space *mapping = file->f_dentry->d_inode->i_mapping; @@ -184,7 +531,7 @@ len = bh->b_size; data = bh->b_data; while (len > 0) { - int IV = index * (PAGE_CACHE_SIZE/bsize) + offset/bsize; + int IV = index * (PAGE_CACHE_SIZE >> 9) + (offset >> 9); size = PAGE_CACHE_SIZE - offset; if (size > len) size = len; @@ -207,7 +554,6 @@ len -= size; offset = 0; index++; - pos += size; UnlockPage(page); deactivate_page(page); page_cache_release(page); @@ -231,7 +577,6 @@ struct lo_read_data { struct loop_device *lo; char *data; - int bsize; }; static int lo_read_actor(read_descriptor_t * desc, struct page *page, unsigned long offset, unsigned long size) @@ -240,7 +585,7 @@ unsigned long count = desc->count; struct lo_read_data *p = (struct lo_read_data*)desc->buf; struct loop_device *lo = p->lo; - int IV = page->index * (PAGE_CACHE_SIZE/p->bsize) + offset/p->bsize; + int IV = page->index * (PAGE_CACHE_SIZE >> 9) + (offset >> 9); if (size > count) size = count; @@ -259,8 +604,7 @@ return size; } -static int lo_receive(struct loop_device *lo, struct buffer_head *bh, int bsize, - loff_t pos) +static int lo_receive(struct loop_device *lo, struct buffer_head *bh, loff_t pos) { struct lo_read_data cookie; read_descriptor_t desc; @@ -268,7 +612,6 @@ cookie.lo = lo; cookie.data = bh->b_data; - cookie.bsize = bsize; desc.written = 0; desc.count = bh->b_size; desc.buf = (char*)&cookie; @@ -280,32 +623,6 @@ return desc.error; } -static inline int loop_get_bs(struct loop_device *lo) -{ - int bs = 0; - - if (blksize_size[MAJOR(lo->lo_device)]) - bs = blksize_size[MAJOR(lo->lo_device)][MINOR(lo->lo_device)]; - if (!bs) - bs = BLOCK_SIZE; - - return bs; -} - -static inline unsigned long loop_get_iv(struct loop_device *lo, - unsigned long sector) -{ - int bs = loop_get_bs(lo); - unsigned long offset, IV; - - IV = sector / (bs >> 9) + lo->lo_offset / bs; - offset = ((sector % (bs >> 9)) << 9) + lo->lo_offset % bs; - if (offset >= bs) - IV++; - - return IV; -} - static int do_bh_filebacked(struct loop_device *lo, struct buffer_head *bh, int rw) { loff_t pos; @@ -314,129 +631,17 @@ pos = ((loff_t) bh->b_rsector << 9) + lo->lo_offset; if (rw == WRITE) - ret = lo_send(lo, bh, loop_get_bs(lo), pos); + ret = lo_send(lo, bh, pos); else - ret = lo_receive(lo, bh, loop_get_bs(lo), pos); + ret = lo_receive(lo, bh, pos); return ret; } -static void loop_put_buffer(struct buffer_head *bh) -{ - if (bh) { - __free_page(bh->b_page); - kmem_cache_free(bh_cachep, bh); - } -} - -/* - * Add buffer_head to back of pending list - */ -static void loop_add_bh(struct loop_device *lo, struct buffer_head *bh) -{ - unsigned long flags; - - spin_lock_irqsave(&lo->lo_lock, flags); - if (lo->lo_bhtail) { - lo->lo_bhtail->b_reqnext = bh; - lo->lo_bhtail = bh; - } else - lo->lo_bh = lo->lo_bhtail = bh; - spin_unlock_irqrestore(&lo->lo_lock, flags); - - up(&lo->lo_bh_mutex); -} - -/* - * Grab first pending buffer - */ -static struct buffer_head *loop_get_bh(struct loop_device *lo) -{ - struct buffer_head *bh; - - spin_lock_irq(&lo->lo_lock); - if ((bh = lo->lo_bh)) { - if (bh == lo->lo_bhtail) - lo->lo_bhtail = NULL; - lo->lo_bh = bh->b_reqnext; - bh->b_reqnext = NULL; - } - spin_unlock_irq(&lo->lo_lock); - - return bh; -} - -/* - * when buffer i/o has completed. if BH_Dirty is set, this was a WRITE - * and lo->transfer stuff has already been done. if not, it was a READ - * so queue it for the loop thread and let it do the transfer out of - * b_end_io context (we don't want to do decrypt of a page with irqs - * disabled) - */ -static void loop_end_io_transfer(struct buffer_head *bh, int uptodate) -{ - struct loop_device *lo = &loop_dev[MINOR(bh->b_dev)]; - - if (!uptodate || test_bit(BH_Dirty, &bh->b_state)) { - struct buffer_head *rbh = bh->b_private; - - rbh->b_end_io(rbh, uptodate); - if (atomic_dec_and_test(&lo->lo_pending)) - up(&lo->lo_bh_mutex); - loop_put_buffer(bh); - } else - loop_add_bh(lo, bh); -} - -static struct buffer_head *loop_get_buffer(struct loop_device *lo, - struct buffer_head *rbh) -{ - struct buffer_head *bh; - - do { - bh = kmem_cache_alloc(bh_cachep, SLAB_NOIO); - if (bh) - break; - - run_task_queue(&tq_disk); - schedule_timeout(HZ); - } while (1); - memset(bh, 0, sizeof(*bh)); - - bh->b_size = rbh->b_size; - bh->b_dev = rbh->b_rdev; - spin_lock_irq(&lo->lo_lock); - bh->b_rdev = lo->lo_device; - spin_unlock_irq(&lo->lo_lock); - bh->b_state = (1 << BH_Req) | (1 << BH_Mapped) | (1 << BH_Lock); - - /* - * easy way out, although it does waste some memory for < PAGE_SIZE - * blocks... if highmem bounce buffering can get away with it, - * so can we :-) - */ - do { - bh->b_page = alloc_page(GFP_NOIO); - if (bh->b_page) - break; - - run_task_queue(&tq_disk); - schedule_timeout(HZ); - } while (1); - - bh->b_data = page_address(bh->b_page); - bh->b_end_io = loop_end_io_transfer; - bh->b_rsector = rbh->b_rsector + (lo->lo_offset >> 9); - init_waitqueue_head(&bh->b_wait); - - return bh; -} - static int loop_make_request(request_queue_t *q, int rw, struct buffer_head *rbh) { - struct buffer_head *bh = NULL; + struct buffer_head *bh; struct loop_device *lo; - unsigned long IV; if (!buffer_locked(rbh)) BUG(); @@ -444,7 +649,7 @@ if (MINOR(rbh->b_rdev) >= max_loop) goto out; - lo = &loop_dev[MINOR(rbh->b_rdev)]; + lo = (struct loop_device *)(&loop_dev[MINOR(rbh->b_rdev)]); spin_lock_irq(&lo->lo_lock); if (lo->lo_state != Lo_bound) goto inactive; @@ -469,36 +674,31 @@ * file backed, queue for loop_thread to handle */ if (lo->lo_flags & LO_FLAGS_DO_BMAP) { - /* - * rbh locked at this point, noone else should clear - * the dirty flag - */ - if (rw == WRITE) - set_bit(BH_Dirty, &rbh->b_state); - loop_add_bh(lo, rbh); + loop_add_queue_last(lo, rbh, (rw == WRITE) ? &bhQue1(lo) : &bhQue0(lo)); return 0; } /* * piggy old buffer on original, and submit for I/O + * device backed, start reads now, queue writes for thread to handle */ - bh = loop_get_buffer(lo, rbh); - bh->b_private = rbh; - IV = loop_get_iv(lo, bh->b_rsector); - if (rw == WRITE) { - set_bit(BH_Dirty, &bh->b_state); - if (lo_do_transfer(lo, WRITE, bh->b_data, rbh->b_data, - bh->b_size, IV)) - goto err; + if(rw == READ) { + bh = loop_get_buffer(lo, rbh, 0, rw); + } else { + bh = NULL; + } + if(!bh) { + /* just queue request and let thread handle alloc later */ + loop_add_queue_last(lo, rbh, (rw == WRITE) ? &bhQue1(lo) : &bhQue2(lo)); + return 0; } - generic_make_request(rw, bh); return 0; err: - if (atomic_dec_and_test(&lo->lo_pending)) - up(&lo->lo_bh_mutex); - loop_put_buffer(bh); + if (atomic_dec_and_test(&lo->lo_pending)) { + wake_up_interruptible(&bhWait(lo)); + } out: buffer_IO_error(rbh); return 0; @@ -507,41 +707,23 @@ goto out; } -static inline void loop_handle_bh(struct loop_device *lo,struct buffer_head *bh) -{ - int ret; - - /* - * For block backed loop, we know this is a READ - */ - if (lo->lo_flags & LO_FLAGS_DO_BMAP) { - int rw = !!test_and_clear_bit(BH_Dirty, &bh->b_state); - - ret = do_bh_filebacked(lo, bh, rw); - bh->b_end_io(bh, !ret); - } else { - struct buffer_head *rbh = bh->b_private; - unsigned long IV = loop_get_iv(lo, rbh->b_rsector); - - ret = lo_do_transfer(lo, READ, bh->b_data, rbh->b_data, - bh->b_size, IV); - - rbh->b_end_io(rbh, !ret); - loop_put_buffer(bh); - } -} - /* - * worker thread that handles reads/writes to file backed loop devices, - * to avoid blocking in our make_request_fn. it also does loop decrypting - * on reads for block backed loop, as that is too heavy to do from - * b_end_io context where irqs may be disabled. + * worker thread that handles all encryption and decryption. */ static int loop_thread(void *data) { struct loop_device *lo = data; - struct buffer_head *bh; + struct buffer_head *bh, *xbh; + int x, rw, qi = 0, flushcnt = 0; + wait_queue_t waitq; + QueLookUpTable qt[4] = { + { &bhQue0(lo), &bhQue1(lo), &bhQue2(lo), 0, 1, 2 }, + { &bhQue2(lo), &bhQue0(lo), &bhQue1(lo), 2, 0, 1 }, + { &bhQue0(lo), &bhQue2(lo), &bhQue1(lo), 0, 2, 1 }, + { &bhQue1(lo), &bhQue0(lo), &bhQue2(lo), 1, 0, 2 } + }; + init_waitqueue_entry(&waitq, current); daemonize(); exit_files(current); @@ -566,27 +748,101 @@ up(&lo->lo_sem); for (;;) { - down_interruptible(&lo->lo_bh_mutex); + add_wait_queue(&bhWait(lo), &waitq); + for(;;) { + set_current_state(TASK_INTERRUPTIBLE); + if(!atomic_read(&lo->lo_pending)) break; + + x = 0; + spin_lock_irq(&lo->lo_lock); + if(bhQue0(lo)) { + x = 1; + } else if(bhQue1(lo) || bhQue2(lo)) { + /* file backed works too because bhNeed(lo) == 0 */ + if(bhFree(lo) || !bhNeed(lo)) x = 1; + } + spin_unlock_irq(&lo->lo_lock); + if(x) break; + + schedule(); + } + current->state = TASK_RUNNING; + remove_wait_queue(&bhWait(lo), &waitq); + /* - * could be upped because of tear-down, not because of + * could be woken because of tear-down, not because of * pending work */ - if (!atomic_read(&lo->lo_pending)) - break; + if(!atomic_read(&lo->lo_pending)) break; + + /* + * read queues using alternating order to prevent starvation + */ + bh = loop_get_bh(lo, &x, &qt[++qi & 3]); + if(!bh) continue; + + /* + * x list tag usage(buffer-allocated) + * --- -------- ----------------------- + * 0 bhQue0(lo) dev-read(y) / file-read + * 1 bhQue1(lo) dev-write(n) / file-write + * 2 bhQue2(lo) dev-read(n) + */ + rw = (x == 1) ? WRITE : READ; + if((x >= 1) && !(lo->lo_flags & LO_FLAGS_DO_BMAP)) { + /* loop_make_request didn't allocate a buffer, do that now */ + xbh = loop_get_buffer(lo, bh, 1, rw); + if(!xbh) { + run_task_queue(&tq_disk); + flushcnt = 0; + loop_add_queue_first(lo, bh, (rw == WRITE) ? &bhQue1(lo) : &bhQue2(lo)); + /* bhNeed(lo) should be 1 now, go back to sleep */ + continue; + } + if(rw == WRITE) { + if(lo_do_transfer(lo, WRITE, xbh->b_data, bh->b_data, xbh->b_size, xbh->b_rsector)) { + loop_put_buffer(lo, xbh); + buffer_IO_error(bh); + atomic_dec(&lo->lo_pending); + continue; + } + } + generic_make_request(rw, xbh); + + /* start I/O if there are no more requests lacking buffers */ + x = 0; + spin_lock_irq(&lo->lo_lock); + if(!bhQue1(lo) && !bhQue2(lo)) x = 1; + spin_unlock_irq(&lo->lo_lock); + if(x || (++flushcnt >= bhFlsh(lo))) { + run_task_queue(&tq_disk); + flushcnt = 0; + } - bh = loop_get_bh(lo); - if (!bh) { - printk("loop: missing bh\n"); + /* request not completely processed yet */ continue; } - loop_handle_bh(lo, bh); + if(lo->lo_flags & LO_FLAGS_DO_BMAP) { + /* request is for file backed device */ + x = do_bh_filebacked(lo, bh, rw); + bh->b_reqnext = NULL; + bh->b_end_io(bh, !x); + } else { + /* device backed read has completed, do decrypt now */ + xbh = bh->b_private; + /* must not use bh->b_rsector as IV, as it may be modified by LVM at this point */ + /* instead, recompute IV from original request */ + x = lo_do_transfer(lo, READ, bh->b_data, xbh->b_data, bh->b_size, xbh->b_rsector + (lo->lo_offset >> 9)); + xbh->b_reqnext = NULL; + xbh->b_end_io(xbh, !x); + loop_put_buffer(lo, bh); + } /* - * upped both for pending work and tear-down, lo_pending + * woken both for pending work and tear-down, lo_pending * will hit zero then */ - if (atomic_dec_and_test(&lo->lo_pending)) - break; + if(atomic_dec_and_test(&lo->lo_pending)) break; } up(&lo->lo_sem); @@ -620,7 +876,22 @@ if (!(file->f_mode & FMODE_WRITE)) lo_flags |= LO_FLAGS_READ_ONLY; + bhFree(lo) = bhQue2(lo) = bhQue1(lo) = bhQue0(lo) = NULL; + bhNeed(lo) = bhFlsh(lo) = 0; + init_waitqueue_head(&bhWait(lo)); if (S_ISBLK(inode->i_mode)) { + int i, x = lo_prealloc[0]; + for(i = 1; i < (sizeof(lo_prealloc) / sizeof(int)); i += 2) { + if(lo_prealloc[i+1] && (lo->lo_number == lo_prealloc[i])) { + x = lo_prealloc[i+1]; + break; + } + } + bhFlsh(lo) = (x * 3) / 4; + if(loop_prealloc_init(lo, x)) { + error = -ENOMEM; + goto out_putf; + } lo_device = inode->i_rdev; } else if (S_ISREG(inode->i_mode)) { struct address_space_operations *aops = inode->i_mapping->a_ops; @@ -664,9 +935,9 @@ if (!bs) bs = BLOCK_SIZE; + if(S_ISREG(inode->i_mode)) bs = BLOCK_SIZE; set_blocksize(dev, bs); - lo->lo_bh = lo->lo_bhtail = NULL; kernel_thread(loop_thread, lo, CLONE_FS | CLONE_FILES | CLONE_SIGHAND); down(&lo->lo_sem); @@ -724,12 +995,14 @@ spin_lock_irq(&lo->lo_lock); lo->lo_state = Lo_rundown; - if (atomic_dec_and_test(&lo->lo_pending)) - up(&lo->lo_bh_mutex); + if(atomic_dec_and_test(&lo->lo_pending)) { + wake_up_interruptible(&bhWait(lo)); + } spin_unlock_irq(&lo->lo_lock); down(&lo->lo_sem); + loop_prealloc_cleanup(lo); lo->lo_backing_file = NULL; loop_release_xfer(lo); @@ -836,7 +1109,7 @@ dev = MINOR(inode->i_rdev); if (dev >= max_loop) return -ENODEV; - lo = &loop_dev[dev]; + lo = (struct loop_device *)(&loop_dev[dev]); down(&lo->lo_ctl_mutex); switch (cmd) { case LOOP_SET_FD: @@ -879,7 +1152,7 @@ static int lo_open(struct inode *inode, struct file *file) { struct loop_device *lo; - int dev, type; + int dev; if (!inode) return -EINVAL; @@ -891,13 +1164,9 @@ if (dev >= max_loop) return -ENODEV; - lo = &loop_dev[dev]; + lo = (struct loop_device *)(&loop_dev[dev]); MOD_INC_USE_COUNT; down(&lo->lo_ctl_mutex); - - type = lo->lo_encrypt_type; - if (type && xfer_funcs[type] && xfer_funcs[type]->lock) - xfer_funcs[type]->lock(lo); lo->lo_refcnt++; up(&lo->lo_ctl_mutex); return 0; @@ -906,7 +1175,7 @@ static int lo_release(struct inode *inode, struct file *file) { struct loop_device *lo; - int dev, type; + int dev; if (!inode) return 0; @@ -919,13 +1188,9 @@ if (dev >= max_loop) return 0; - lo = &loop_dev[dev]; + lo = (struct loop_device *)(&loop_dev[dev]); down(&lo->lo_ctl_mutex); - type = lo->lo_encrypt_type; --lo->lo_refcnt; - if (xfer_funcs[type] && xfer_funcs[type]->unlock) - xfer_funcs[type]->unlock(lo); - up(&lo->lo_ctl_mutex); MOD_DEC_USE_COUNT; return 0; @@ -955,11 +1220,13 @@ int loop_unregister_transfer(int number) { struct loop_device *lo; + int x, type; if ((unsigned)number >= MAX_LO_CRYPT) return -EINVAL; - for (lo = &loop_dev[0]; lo < &loop_dev[max_loop]; lo++) { - int type = lo->lo_encrypt_type; + for(x = 0; x < max_loop; x++) { + lo = (struct loop_device *)(&loop_dev[x]); + type = lo->lo_encrypt_type; if (type == number) { xfer_funcs[type]->release(lo); lo->transfer = NULL; @@ -995,7 +1262,7 @@ S_IFBLK | S_IRUSR | S_IWUSR | S_IRGRP, &lo_fops, NULL); - loop_dev = kmalloc(max_loop * sizeof(struct loop_device), GFP_KERNEL); + loop_dev = kmalloc(max_loop * sizeof(LoDevExt), GFP_KERNEL); if (!loop_dev) return -ENOMEM; @@ -1010,8 +1277,8 @@ blk_queue_make_request(BLK_DEFAULT_QUEUE(MAJOR_NR), loop_make_request); for (i = 0; i < max_loop; i++) { - struct loop_device *lo = &loop_dev[i]; - memset(lo, 0, sizeof(struct loop_device)); + struct loop_device *lo = (struct loop_device *)(&loop_dev[i]); + memset(lo, 0, sizeof(LoDevExt)); init_MUTEX(&lo->lo_ctl_mutex); init_MUTEX_LOCKED(&lo->lo_sem); init_MUTEX_LOCKED(&lo->lo_bh_mutex); @@ -1026,13 +1293,18 @@ for (i = 0; i < max_loop; i++) register_disk(NULL, MKDEV(MAJOR_NR, i), 1, &lo_fops, 0); + for(i = 0; i < (sizeof(lo_prealloc) / sizeof(int)); i += 2) { + if(!lo_prealloc[i]) continue; + if(lo_prealloc[i] < LO_PREALLOC_MIN) lo_prealloc[i] = LO_PREALLOC_MIN; + if(lo_prealloc[i] > LO_PREALLOC_MAX) lo_prealloc[i] = LO_PREALLOC_MAX; + } printk(KERN_INFO "loop: loaded (max %d devices)\n", max_loop); return 0; -out_sizes: - kfree(loop_dev); out_blksizes: kfree(loop_sizes); +out_sizes: + kfree(loop_dev); printk(KERN_ERR "loop: ran out of memory\n"); return -ENOMEM; } -- | G r e g L o u i s | gpg public key: | | http://www.bgl.nu/~glouis | finger greg@xxxxxx | Linux-crypto: cryptography in and on the Linux system Archive: http://mail.nl.linux.org/linux-crypto/