Implement writing into the cache and reading back from the cache inside cachefiles using asynchronous direct I/O from the specified iterator. The size and position of the request should be aligned to the reported dio_block_size. Errors and completion are reported by callback. Signed-off-by: David Howells <dhowells@xxxxxxxxxx> --- fs/cachefiles/interface.c | 20 ++- fs/cachefiles/internal.h | 2 fs/cachefiles/io.c | 270 ++++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 277 insertions(+), 15 deletions(-) diff --git a/fs/cachefiles/interface.c b/fs/cachefiles/interface.c index 0e3d5b5ffc55..c14e2b4f5b24 100644 --- a/fs/cachefiles/interface.c +++ b/fs/cachefiles/interface.c @@ -12,8 +12,6 @@ #include "internal.h" static int cachefiles_attr_changed(struct cachefiles_object *object); -static void cachefiles_put_object(struct fscache_object *_object, - enum fscache_obj_ref_trace why); /* * Allocate an object record for a cookie lookup and prepare the lookup data. @@ -160,7 +158,8 @@ static void cachefiles_update_object(struct fscache_object *_object) struct cachefiles_object *object; struct cachefiles_cache *cache; const struct cred *saved_cred; - loff_t object_size; + struct inode *inode; + loff_t object_size, i_size; int ret; _enter("{OBJ%x}", _object->debug_id); @@ -172,12 +171,14 @@ static void cachefiles_update_object(struct fscache_object *_object) cachefiles_begin_secure(cache, &saved_cred); object_size = object->fscache.cookie->object_size; - if (i_size_read(d_inode(object->dentry)) > object_size) { + inode = d_inode(object->dentry); + i_size = i_size_read(inode); + if (i_size > object_size) { struct path path = { .mnt = cache->mnt, .dentry = object->dentry }; - _debug("trunc %llx -> %llx", i_size_read(d_inode(object->dentry)), object_size); + _debug("trunc %llx -> %llx", i_size, object_size); ret = vfs_truncate(&path, object_size); if (ret < 0) { cachefiles_io_error_obj(object, "Trunc-to-size failed"); @@ -186,8 +187,9 @@ static void cachefiles_update_object(struct fscache_object *_object) } object_size = round_up(object_size, CACHEFILES_DIO_BLOCK_SIZE); - _debug("trunc %llx -> %llx", i_size_read(d_inode(object->dentry)), object_size); - if (i_size_read(d_inode(object->dentry)) < object_size) { + i_size = i_size_read(inode); + _debug("trunc %llx -> %llx", i_size, object_size); + if (i_size < object_size) { ret = vfs_truncate(&path, object_size); if (ret < 0) { cachefiles_io_error_obj(object, "Trunc-to-dio-size failed"); @@ -283,8 +285,8 @@ static void cachefiles_drop_object(struct fscache_object *_object, /* * dispose of a reference to an object */ -static void cachefiles_put_object(struct fscache_object *_object, - enum fscache_obj_ref_trace why) +void cachefiles_put_object(struct fscache_object *_object, + enum fscache_obj_ref_trace why) { struct cachefiles_object *object; struct fscache_cache *cache; diff --git a/fs/cachefiles/internal.h b/fs/cachefiles/internal.h index e9f45d5053b1..24a8aed2600d 100644 --- a/fs/cachefiles/internal.h +++ b/fs/cachefiles/internal.h @@ -150,6 +150,8 @@ extern int cachefiles_has_space(struct cachefiles_cache *cache, extern const struct fscache_cache_ops cachefiles_cache_ops; extern struct fscache_object *cachefiles_grab_object(struct fscache_object *_object, enum fscache_obj_ref_trace why); +extern void cachefiles_put_object(struct fscache_object *_object, + enum fscache_obj_ref_trace why); /* * io.c diff --git a/fs/cachefiles/io.c b/fs/cachefiles/io.c index bf1930699636..4c66b9183dd6 100644 --- a/fs/cachefiles/io.c +++ b/fs/cachefiles/io.c @@ -10,9 +10,52 @@ #include <linux/file.h> #include <linux/uio.h> #include <linux/xattr.h> +#include <linux/sched/mm.h> #include "internal.h" #include <trace/events/fscache.h> +struct cachefiles_kiocb { + struct kiocb iocb; + refcount_t ki_refcnt; + loff_t start; + union { + size_t skipped; + size_t len; + }; + struct cachefiles_object *object; + fscache_io_terminated_t term_func; + void *term_func_priv; +}; + +static inline void cachefiles_put_kiocb(struct cachefiles_kiocb *ki) +{ + if (refcount_dec_and_test(&ki->ki_refcnt)) { + cachefiles_put_object(&ki->object->fscache, fscache_obj_put_ioreq); + fput(ki->iocb.ki_filp); + kfree(ki); + } +} + +/* + * Handle completion of a read from the cache. + */ +static void cachefiles_read_complete(struct kiocb *iocb, long ret, long ret2) +{ + struct cachefiles_kiocb *ki = container_of(iocb, struct cachefiles_kiocb, iocb); + + _enter("%ld,%ld", ret, ret2); + + if (ki->term_func) { + if (ret < 0) + ki->term_func(ki->term_func_priv, ret); + else + ki->term_func(ki->term_func_priv, ki->skipped + ret); + } + + fscache_uncount_io_operation(ki->object->fscache.cookie); + cachefiles_put_kiocb(ki); +} + /* * Initiate a read from the cache. */ @@ -23,11 +66,142 @@ int cachefiles_read(struct fscache_op_resources *opr, fscache_io_terminated_t term_func, void *term_func_priv) { - fscache_wait_for_operation(opr, FSCACHE_WANT_READ); + struct cachefiles_object *object = + container_of(opr->object, struct cachefiles_object, fscache); + struct cachefiles_kiocb *ki; + struct file *file; + unsigned int old_nofs; + ssize_t ret = -ENOBUFS; + size_t len = iov_iter_count(iter), skipped = 0; + + spin_lock(&object->fscache.lock); + file = get_file(object->backing_file); + spin_unlock(&object->fscache.lock); + + _enter("%pD,%li,%llx,%zx/%llx", + file, file_inode(file)->i_ino, start_pos, len, + i_size_read(file->f_inode)); + + __fscache_wait_for_operation(opr, FSCACHE_WANT_READ); fscache_count_io_operation(opr->object->cookie); + + /* If the caller asked us to seek for data before doing the read, then + * we should do that now. If we find a gap, we fill it with zeros. + */ + if (seek_data) { + loff_t off = start_pos, off2; + + off2 = vfs_llseek(file, off, SEEK_DATA); + if (off2 < 0 && off2 >= (loff_t)-MAX_ERRNO && off2 != -ENXIO) { + skipped = 0; + ret = off2; + goto presubmission_error; + } + + if (off2 == -ENXIO || off2 >= start_pos + len) { + /* The region is beyond the EOF or there's no more data + * in the region, so clear the rest of the buffer and + * return success. + */ + iov_iter_zero(len, iter); + skipped = len; + fscache_uncount_io_operation(object->fscache.cookie); + ret = 0; + goto presubmission_error; + } + + skipped = off2 - off; + iov_iter_zero(skipped, iter); + } + + ret = -ENOBUFS; + ki = kzalloc(sizeof(struct cachefiles_kiocb), GFP_KERNEL); + if (!ki) + goto presubmission_error; + + refcount_set(&ki->ki_refcnt, 2); + ki->iocb.ki_filp = file; + ki->iocb.ki_pos = start_pos + skipped; + ki->iocb.ki_flags = IOCB_DIRECT; + ki->iocb.ki_hint = ki_hint_validate(file_write_hint(file)); + ki->iocb.ki_ioprio = get_current_ioprio(); + ki->skipped = skipped; + ki->object = object; + ki->term_func = term_func; + ki->term_func_priv = term_func_priv; + + if (ki->term_func) + ki->iocb.ki_complete = cachefiles_read_complete; + + ret = rw_verify_area(READ, file, &ki->iocb.ki_pos, len - skipped); + if (ret < 0) + goto presubmission_error_free; + + cachefiles_grab_object(&object->fscache, fscache_obj_get_ioreq); + + old_nofs = memalloc_nofs_save(); + ret = call_read_iter(file, &ki->iocb, iter); + memalloc_nofs_restore(old_nofs); + switch (ret) { + case -EIOCBQUEUED: + goto in_progress; + + case -ERESTARTSYS: + case -ERESTARTNOINTR: + case -ERESTARTNOHAND: + case -ERESTART_RESTARTBLOCK: + /* There's no easy way to restart the syscall since other AIO's + * may be already running. Just fail this IO with EINTR. + */ + ret = -EINTR; + fallthrough; + default: + cachefiles_read_complete(&ki->iocb, ret, 0); + if (ret > 0) + ret = 0; + break; + } + +in_progress: + cachefiles_put_kiocb(ki); + _leave(" = %zd", ret); + return ret; + +presubmission_error_free: + kfree(ki); +presubmission_error: + fput(file); if (term_func) - term_func(term_func_priv, -ENODATA); - return -ENODATA; + term_func(term_func_priv, ret < 0 ? ret : skipped); + return ret; +} + +/* + * Handle completion of a write to the cache. + */ +static void cachefiles_write_complete(struct kiocb *iocb, long ret, long ret2) +{ + struct cachefiles_kiocb *ki = container_of(iocb, struct cachefiles_kiocb, iocb); + struct inode *inode = file_inode(ki->iocb.ki_filp); + + _enter("%ld,%ld", ret, ret2); + + /* Tell lockdep we inherited freeze protection from submission thread */ + __sb_writers_acquired(inode->i_sb, SB_FREEZE_WRITE); + __sb_end_write(inode->i_sb, SB_FREEZE_WRITE); + + if (ret < 0) { + if (ki->term_func) + ki->term_func(ki->term_func_priv, ret); + } else { + if (ret == ki->len) + cachefiles_mark_content_map(ki->object, ki->start, ki->len); + if (ki->term_func) + ki->term_func(ki->term_func_priv, ret); + } + + fscache_uncount_io_operation(ki->object->fscache.cookie); + cachefiles_put_kiocb(ki); } /* @@ -39,11 +213,95 @@ int cachefiles_write(struct fscache_op_resources *opr, fscache_io_terminated_t term_func, void *term_func_priv) { - fscache_wait_for_operation(opr, FSCACHE_WANT_WRITE); + struct cachefiles_object *object = + container_of(opr->object, struct cachefiles_object, fscache); + struct cachefiles_kiocb *ki; + struct inode *inode; + struct file *file; + unsigned int old_nofs; + ssize_t ret = -ENOBUFS; + size_t len = iov_iter_count(iter); + + spin_lock(&object->fscache.lock); + file = get_file(object->backing_file); + spin_unlock(&object->fscache.lock); + + _enter("%pD,%li,%llx,%zx/%llx", + file, file_inode(file)->i_ino, start_pos, len, + i_size_read(file->f_inode)); + + __fscache_wait_for_operation(opr, FSCACHE_WANT_WRITE); fscache_count_io_operation(opr->object->cookie); + + ki = kzalloc(sizeof(struct cachefiles_kiocb), GFP_KERNEL); + if (!ki) + goto presubmission_error; + + refcount_set(&ki->ki_refcnt, 2); + ki->iocb.ki_filp = file; + ki->iocb.ki_pos = start_pos; + ki->iocb.ki_flags = IOCB_DIRECT | IOCB_WRITE; + ki->iocb.ki_hint = ki_hint_validate(file_write_hint(file)); + ki->iocb.ki_ioprio = get_current_ioprio(); + ki->start = start_pos; + ki->len = len; + ki->object = object; + ki->term_func = term_func; + ki->term_func_priv = term_func_priv; + + if (ki->term_func) + ki->iocb.ki_complete = cachefiles_write_complete; + + ret = rw_verify_area(WRITE, file, &ki->iocb.ki_pos, iov_iter_count(iter)); + if (ret < 0) + goto presubmission_error_free; + + /* Open-code file_start_write here to grab freeze protection, which + * will be released by another thread in aio_complete_rw(). Fool + * lockdep by telling it the lock got released so that it doesn't + * complain about the held lock when we return to userspace. + */ + inode = file_inode(file); + __sb_start_write(inode->i_sb, SB_FREEZE_WRITE); + __sb_writers_release(inode->i_sb, SB_FREEZE_WRITE); + + cachefiles_grab_object(&object->fscache, fscache_obj_get_ioreq); + + old_nofs = memalloc_nofs_save(); + ret = call_write_iter(file, &ki->iocb, iter); + memalloc_nofs_restore(old_nofs); + switch (ret) { + case -EIOCBQUEUED: + goto in_progress; + + case -ERESTARTSYS: + case -ERESTARTNOINTR: + case -ERESTARTNOHAND: + case -ERESTART_RESTARTBLOCK: + /* There's no easy way to restart the syscall since other AIO's + * may be already running. Just fail this IO with EINTR. + */ + ret = -EINTR; + /* Fall through */ + default: + cachefiles_write_complete(&ki->iocb, ret, 0); + if (ret > 0) + ret = 0; + break; + } + +in_progress: + cachefiles_put_kiocb(ki); + _leave(" = %zd", ret); + return ret; + +presubmission_error_free: + kfree(ki); +presubmission_error: + fput(file); if (term_func) - term_func(term_func_priv, -ENOBUFS); - return -ENOBUFS; + term_func(term_func_priv, -ENOMEM); + return -ENOMEM; } /*