----- Original Message ----- | This adds a full fledget direct I/O implementation using the iomap | interface. Full fledged in this case means all features are supported: | AIO, vectored I/O, any iov_iter type including kernel pointers, bvecs | and pipes, support for hole filling and async apending writes. It does | not mean supporting all the warts of the old generic code. We expect | i_rwsem to be held over the duration of the call, and we expect to | maintain i_dio_count ourselves, and we pass on any kinds of mapping | to the file system for now. | | The algorithm used is very simple: We use iomap_apply to iterate over | the range of the I/O, and then we use the new bio_iov_iter_get_pages | helper to lock down the user range for the size of the extent. | bio_iov_iter_get_pages can currently lock down twice as many pages as | the old direct I/O code did, which means that we will have a better | batch factor for everything but overwrites of badly fragmented files. | | Signed-off-by: Christoph Hellwig <hch@xxxxxx> | --- (snip) | +static blk_qc_t | +iomap_dio_zero(struct iomap_dio *dio, struct iomap *iomap, loff_t pos, | + unsigned len) | +{ | + struct page *page = ZERO_PAGE(0); | + struct bio *bio; | + | + bio = bio_alloc(GFP_KERNEL, 1); It's unlikely, but bio_alloc can return NULL; shouldn't the code be checking for that? | + bio->bi_bdev = iomap->bdev; | + bio->bi_iter.bi_sector = | + iomap->blkno + ((pos - iomap->offset) >> 9); | + bio->bi_private = dio; | + bio->bi_end_io = iomap_dio_bio_end_io; | + | + get_page(page); | + if (bio_add_page(bio, page, len, 0) != len) | + BUG(); | + bio_set_op_attrs(bio, REQ_OP_WRITE, WRITE_ODIRECT); | + | + atomic_inc(&dio->ref); | + return submit_bio(bio); | +} | + | +static loff_t | +iomap_dio_actor(struct inode *inode, loff_t pos, loff_t length, | + void *data, struct iomap *iomap) | +{ | + struct iomap_dio *dio = data; | + unsigned blkbits = blksize_bits(bdev_logical_block_size(iomap->bdev)); | + unsigned fs_block_size = (1 << inode->i_blkbits), pad; | + struct iov_iter iter = *dio->submit.iter; | + struct bio *bio; | + bool may_zero = false; | + int nr_pages, ret; | + | + if ((pos | length | iov_iter_alignment(&iter)) & ((1 << blkbits) - 1)) | + return -EINVAL; | + | + switch (iomap->type) { | + case IOMAP_HOLE: | + /* | + * We return -ENOTBLK to fall back to buffered I/O for file | + * systems that can't fill holes from direct writes. | + */ | + if (dio->flags & IOMAP_DIO_WRITE) | + return -ENOTBLK; | + /*FALLTHRU*/ | + case IOMAP_UNWRITTEN: | + if (!(dio->flags & IOMAP_DIO_WRITE)) { | + iov_iter_zero(length, dio->submit.iter); | + dio->size += length; | + return length; | + } | + dio->flags |= IOMAP_DIO_UNWRITTEN; | + may_zero = true; | + break; | + case IOMAP_MAPPED: | + if (iomap->flags & IOMAP_F_SHARED) | + dio->flags |= IOMAP_DIO_COW; | + if (iomap->flags & IOMAP_F_NEW) | + may_zero = true; | + break; | + default: | + WARN_ON_ONCE(1); | + return -EIO; | + } | + | + iov_iter_truncate(&iter, length); | + nr_pages = iov_iter_npages(&iter, BIO_MAX_PAGES); | + if (nr_pages <= 0) | + return nr_pages; | + | + if (may_zero) { | + pad = pos & (fs_block_size - 1); | + if (pad) | + iomap_dio_zero(dio, iomap, pos - pad, pad); | + } | + | + do { | + if (dio->error) | + return 0; | + | + bio = bio_alloc(GFP_KERNEL, nr_pages); Same here. Also: the code that follows is nearly identical; do you want to make it a macro or inline function or something? Regards, Bob Peterson Red Hat File Systems -- To unsubscribe from this list: send the line "unsubscribe linux-block" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html