This switches dio to work with the rwmem api to get memory pages for the IO instead of working with iovecs directly. It can use direct rwm struct accesses for some static universal properties of a set of memory segments that make up the buffer argument. It uses helper functions to work with the underlying data structures directly. --- fs/direct-io.c | 123 +++++++++++++++++++++++++------------------------------- 1 files changed, 55 insertions(+), 68 deletions(-) diff --git a/fs/direct-io.c b/fs/direct-io.c index acf0da1..0d5ed41 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -34,7 +34,7 @@ #include <linux/blkdev.h> #include <linux/buffer_head.h> #include <linux/rwsem.h> -#include <linux/uio.h> +#include <linux/rwmem.h> #include <asm/atomic.h> /* @@ -105,11 +105,12 @@ struct dio { sector_t cur_page_block; /* Where it starts */ /* - * Page fetching state. These variables belong to dio_refill_pages(). + * Page fetching state. direct_io_worker() sets these for + * dio_refill_pages() who modifies them as it fetches. */ - int curr_page; /* changes */ - int total_pages; /* doesn't change */ - unsigned long curr_user_address;/* changes */ + struct rwmem *rwm; + unsigned long cur_seg; + unsigned long cur_seg_cursor; /* * Page queue. These variables belong to dio_refill_pages() and @@ -146,21 +147,11 @@ static inline unsigned dio_pages_present(struct dio *dio) */ static int dio_refill_pages(struct dio *dio) { + struct rwmem *rwm = dio->rwm; int ret; - int nr_pages; - - nr_pages = min(dio->total_pages - dio->curr_page, DIO_PAGES); - down_read(¤t->mm->mmap_sem); - ret = get_user_pages( - current, /* Task for fault acounting */ - current->mm, /* whose pages? */ - dio->curr_user_address, /* Where from? */ - nr_pages, /* How many pages? */ - dio->rw == READ, /* Write to memory? */ - 0, /* force (?) */ - &dio->pages[0], - NULL); /* vmas */ - up_read(¤t->mm->mmap_sem); + + ret = rwm->ops->get_seg_pages(rwm, dio->cur_seg, &dio->cur_seg_cursor, + dio->pages, DIO_PAGES, dio->rw == READ); if (ret < 0 && dio->blocks_available && (dio->rw & WRITE)) { struct page *page = ZERO_PAGE(0); @@ -180,8 +171,6 @@ static int dio_refill_pages(struct dio *dio) } if (ret >= 0) { - dio->curr_user_address += ret * PAGE_SIZE; - dio->curr_page += ret; dio->head = 0; dio->tail = ret; ret = 0; @@ -938,11 +927,9 @@ out: */ static ssize_t direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode, - const struct iovec *iov, loff_t offset, unsigned long nr_segs, - unsigned blkbits, get_block_t get_block, dio_iodone_t end_io, - struct dio *dio) + struct rwmem *rwm, loff_t offset, unsigned blkbits, + get_block_t get_block, dio_iodone_t end_io, struct dio *dio) { - unsigned long user_addr; unsigned long flags; int seg; ssize_t ret = 0; @@ -966,44 +953,33 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode, spin_lock_init(&dio->bio_lock); dio->refcount = 1; + dio->rwm = rwm; + /* * In case of non-aligned buffers, we may need 2 more * pages since we need to zero out first and last block. */ + dio->pages_in_io = rwm->nr_pages; if (unlikely(dio->blkfactor)) - dio->pages_in_io = 2; - - for (seg = 0; seg < nr_segs; seg++) { - user_addr = (unsigned long)iov[seg].iov_base; - dio->pages_in_io += - ((user_addr+iov[seg].iov_len +PAGE_SIZE-1)/PAGE_SIZE - - user_addr/PAGE_SIZE); - } + dio->pages_in_io += 2; - for (seg = 0; seg < nr_segs; seg++) { - user_addr = (unsigned long)iov[seg].iov_base; - dio->size += bytes = iov[seg].iov_len; + for (seg = 0; seg < rwm->nr_segs; seg++) { + dio->size += bytes = rwm->ops->seg_bytes(rwm, seg); /* Index into the first page of the first block */ - dio->first_block_in_page = (user_addr & ~PAGE_MASK) >> blkbits; + dio->first_block_in_page = + rwm->ops->seg_page_offset(rwm, seg) >> blkbits; dio->final_block_in_request = dio->block_in_file + (bytes >> blkbits); /* Page fetching state */ + dio->cur_seg = seg; + dio->cur_seg_cursor = 0; dio->head = 0; dio->tail = 0; - dio->curr_page = 0; - dio->total_pages = 0; - if (user_addr & (PAGE_SIZE-1)) { - dio->total_pages++; - bytes -= PAGE_SIZE - (user_addr & (PAGE_SIZE - 1)); - } - dio->total_pages += (bytes + PAGE_SIZE - 1) / PAGE_SIZE; - dio->curr_user_address = user_addr; - ret = do_direct_IO(dio); - dio->result += iov[seg].iov_len - + dio->result += bytes - ((dio->final_block_in_request - dio->block_in_file) << blkbits); @@ -1113,15 +1089,11 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode, * * Additional i_alloc_sem locking requirements described inline below. */ -ssize_t -__blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, - struct block_device *bdev, const struct iovec *iov, loff_t offset, - unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io, - int dio_lock_type) +static ssize_t +blockdev_direct_IO_rwmem(int rw, struct kiocb *iocb, struct inode *inode, + struct block_device *bdev, struct rwmem *rwm, loff_t offset, + get_block_t get_block, dio_iodone_t end_io, int dio_lock_type) { - int seg; - size_t size; - unsigned long addr; unsigned blkbits = inode->i_blkbits; unsigned bdev_blkbits = 0; unsigned blocksize_mask = (1 << blkbits) - 1; @@ -1146,17 +1118,12 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, } /* Check the memory alignment. Blocks cannot straddle pages */ - for (seg = 0; seg < nr_segs; seg++) { - addr = (unsigned long)iov[seg].iov_base; - size = iov[seg].iov_len; - end += size; - if ((addr & blocksize_mask) || (size & blocksize_mask)) { - if (bdev) - blkbits = bdev_blkbits; - blocksize_mask = (1 << blkbits) - 1; - if ((addr & blocksize_mask) || (size & blocksize_mask)) - goto out; - } + if (rwm->boundary_bits & blocksize_mask) { + if (bdev) + blkbits = bdev_blkbits; + blocksize_mask = (1 << blkbits) - 1; + if (rwm->boundary_bits & blocksize_mask) + goto out; } dio = kzalloc(sizeof(*dio), GFP_KERNEL); @@ -1212,8 +1179,8 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, dio->is_async = !is_sync_kiocb(iocb) && !((rw & WRITE) && (end > i_size_read(inode))); - retval = direct_io_worker(rw, iocb, inode, iov, offset, - nr_segs, blkbits, get_block, end_io, dio); + retval = direct_io_worker(rw, iocb, inode, rwm, offset, blkbits, + get_block, end_io, dio); if (rw == READ && dio_lock_type == DIO_LOCKING) release_i_mutex = 0; @@ -1225,4 +1192,24 @@ out: mutex_lock(&inode->i_mutex); return retval; } + +ssize_t +__blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, + struct block_device *bdev, const struct iovec *iov, loff_t offset, + unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io, + int dio_lock_type) +{ + struct rwmem_iovec rwi = { + .rwmem.ops = &rwmem_iovec_ops, + .rwmem.nr_segs = nr_segs, + .iov = iov, + }; + struct rwmem *rwm = &rwi.rwmem; + + rwm->ops->init(rwm); + + return blockdev_direct_IO_rwmem(rw, iocb, inode, bdev, rwm, offset, + get_block, end_io, dio_lock_type); +} + EXPORT_SYMBOL(__blockdev_direct_IO); -- 1.5.2.2 - To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html