On 11/10/2009 07:20 PM, Boaz Harrosh wrote: > > In anticipation for multi-device operations, we separate osd operations > into an abstract I/O API. Currently only one device is used but later > when adding more devices, we will drive all devices in parallel according > to a "data_map" that describes how data is arranged on multiple devices. > The file system level operates, like before, as if there is one object > (inode-number) and an i_size. The io engine will split this to the same > object-number but on multiple device. > > At first we introduce Mirror (raid 1) layout. But at the final outcome > we intend to fully implement the pNFS-Objects data-map, including > raid 0,4,5,6 over mirrored devices, over multiple device-groups. And > more. See: http://tools.ietf.org/html/draft-ietf-nfsv4-pnfs-obj-12 > > * Define an io_state based API for accessing osd storage devices > in an abstract way. > Usage: > First a caller allocates an io state with: > exofs_get_io_state(struct exofs_sb_info *sbi, > struct exofs_io_state** ios); > > Then calles one of: > exofs_sbi_create(struct exofs_io_state *ios); > exofs_sbi_remove(struct exofs_io_state *ios); > exofs_sbi_write(struct exofs_io_state *ios); > exofs_sbi_read(struct exofs_io_state *ios); > exofs_oi_truncate(struct exofs_i_info *oi, u64 new_len); > > And when done > exofs_put_io_state(struct exofs_io_state *ios); > > * Convert all source files to use this new API > * Convert from bio_alloc to bio_kmalloc > * In io engine we make use of the now fixed osd_req_decode_sense > > There are no functional changes or on disk additions after this patch. > > Signed-off-by: Boaz Harrosh <bharrosh@xxxxxxxxxxx> Putting my mailing-list-reader hat produced some "What was that good for?" Below is the diff of the new posts from the old ones. (New as replay to originals) --- git diff --stat -p -M -R afb8f9c089c07711104bb26e224440415ba3d324 -- fs/exofs/ fs/exofs/inode.c | 14 ++++++-- fs/exofs/ios.c | 93 +++++++++++++---------------------------------------- 2 files changed, 33 insertions(+), 74 deletions(-) diff --git b/fs/exofs/inode.c a/fs/exofs/inode.c index f3f287a..698a863 100644 --- b/fs/exofs/inode.c +++ a/fs/exofs/inode.c @@ -444,7 +444,7 @@ static int exofs_readpage(struct file *file, struct page *page) return _readpage(page, false); } -/* Callback for osd_write. All writes are asynchronouse */ +/* Callback for osd_write. All writes are asynchronous */ static void writepages_done(struct exofs_io_state *ios, void *p) { struct page_collect *pcol = p; @@ -1029,9 +1029,15 @@ static void create_done(struct exofs_io_state *ios, void *p) if (unlikely(ret)) { EXOFS_ERR("object=0x%llx creation faild in pid=0x%llx", _LLU(exofs_oi_objno(oi)), _LLU(sbi->s_pid)); - make_bad_inode(inode); - } else - set_obj_created(oi); + /*TODO: When FS is corrupted creation can fail, object already + * exist. Get rid of this asynchronous creation, if exist + * increment the obj counter and try the next object. Until we + * succeed. All these dangling objects will be made into lost + * files by chkfs.exofs + */ + } + + set_obj_created(oi); atomic_dec(&inode->i_count); wake_up(&oi->i_wq); diff --git b/fs/exofs/ios.c a/fs/exofs/ios.c index 369c364..14b2600 100644 --- b/fs/exofs/ios.c +++ a/fs/exofs/ios.c @@ -125,8 +125,15 @@ static void _done_io(struct osd_request *or, void *p) static int exofs_io_execute(struct exofs_io_state *ios) { + DECLARE_COMPLETION_ONSTACK(wait); + bool sync = (ios->done == NULL); int i, ret; + if (sync) { + ios->done = _sync_done; + ios->private = &wait; + } + for (i = 0; i < ios->numdevs; i++) { struct osd_request *or = ios->per_dev[i].or; if (unlikely(!or)) @@ -140,6 +147,8 @@ static int exofs_io_execute(struct exofs_io_state *ios) } } + kref_init(&ios->kref); + for (i = 0; i < ios->numdevs; i++) { struct osd_request *or = ios->per_dev[i].or; if (unlikely(!or)) @@ -150,7 +159,13 @@ static int exofs_io_execute(struct exofs_io_state *ios) } kref_put(&ios->kref, _last_io); - return 0; + ret = 0; + + if (sync) { + wait_for_completion(&wait); + ret = exofs_check_io(ios, NULL); + } + return ret; } int exofs_check_io(struct exofs_io_state *ios, u64 *resid) @@ -192,16 +207,7 @@ int exofs_check_io(struct exofs_io_state *ios, u64 *resid) int exofs_sbi_create(struct exofs_io_state *ios) { - DECLARE_COMPLETION_ONSTACK(wait); - bool sync = (ios->done == NULL); - int i, ret = 0; - - if (sync) { - ios->done = _sync_done; - ios->private = &wait; - } - - kref_init(&ios->kref); + int i, ret; for (i = 0; i < ios->sbi->s_numdevs; i++) { struct osd_request *or; @@ -217,12 +223,7 @@ int exofs_sbi_create(struct exofs_io_state *ios) osd_req_create_object(or, &ios->obj); } - ret = exofs_io_execute(ios); - if (sync && likely(!ret)) { - wait_for_completion(&wait); -/* ret = exofs_check_io(ios);*/ - } out: return ret; @@ -230,16 +231,7 @@ out: int exofs_sbi_remove(struct exofs_io_state *ios) { - DECLARE_COMPLETION_ONSTACK(wait); - bool sync = (ios->done == NULL); - int i, ret = 0; - - if (sync) { - ios->done = _sync_done; - ios->private = &wait; - } - - kref_init(&ios->kref); + int i, ret; for (i = 0; i < ios->sbi->s_numdevs; i++) { struct osd_request *or; @@ -256,10 +248,6 @@ int exofs_sbi_remove(struct exofs_io_state *ios) osd_req_remove_object(or, &ios->obj); } ret = exofs_io_execute(ios); - if (sync && likely(!ret)) { - wait_for_completion(&wait); -/* ret = exofs_check_io(ios);*/ - } out: return ret; @@ -267,16 +255,7 @@ out: int exofs_sbi_write(struct exofs_io_state *ios) { - DECLARE_COMPLETION_ONSTACK(wait); - bool sync = (ios->done == NULL); - int i, ret = 0; - - if (sync) { - ios->done = _sync_done; - ios->private = &wait; - } - - kref_init(&ios->kref); + int i, ret; for (i = 0; i < ios->sbi->s_numdevs; i++) { struct osd_request *or; @@ -296,8 +275,10 @@ int exofs_sbi_write(struct exofs_io_state *ios) if (i != 0) { bio = bio_kmalloc(GFP_KERNEL, ios->bio->bi_max_vecs); - if (!bio) + if (unlikely(!bio)) { + ret = -ENOMEM; goto out; + } __bio_clone(bio, ios->bio); bio->bi_bdev = NULL; @@ -328,10 +309,6 @@ int exofs_sbi_write(struct exofs_io_state *ios) ios->in_attr_len); } ret = exofs_io_execute(ios); - if (sync && likely(!ret)) { - wait_for_completion(&wait); - ret = exofs_check_io(ios, NULL); - } out: return ret; @@ -339,16 +316,7 @@ out: int exofs_sbi_read(struct exofs_io_state *ios) { - DECLARE_COMPLETION_ONSTACK(wait); - bool sync = (ios->done == NULL); - int i, ret = 0; - - if (sync) { - ios->done = _sync_done; - ios->private = &wait; - } - - kref_init(&ios->kref); + int i, ret; for (i = 0; i < 1; i++) { struct osd_request *or; @@ -385,10 +353,6 @@ int exofs_sbi_read(struct exofs_io_state *ios) ios->in_attr_len); } ret = exofs_io_execute(ios); - if (sync && likely(!ret)) { - wait_for_completion(&wait); - ret = exofs_check_io(ios, NULL); - } out: return ret; @@ -417,7 +381,6 @@ int extract_attr_from_ios(struct exofs_io_state *ios, struct osd_attr *attr) int exofs_oi_truncate(struct exofs_i_info *oi, u64 size) { - DECLARE_COMPLETION_ONSTACK(wait); struct exofs_sb_info *sbi = oi->vfs_inode.i_sb->s_fs_info; struct exofs_io_state *ios; struct osd_attr attr; @@ -434,11 +397,6 @@ int exofs_oi_truncate(struct exofs_i_info *oi, u64 size) attr = g_attr_logical_length; attr.val_ptr = &newsize; - ios->done = _sync_done; - ios->private = &wait; - - kref_init(&ios->kref); - for (i = 0; i < sbi->s_numdevs; i++) { struct osd_request *or; @@ -455,11 +413,6 @@ int exofs_oi_truncate(struct exofs_i_info *oi, u64 size) osd_req_add_set_attr_list(or, &attr, 1); } ret = exofs_io_execute(ios); - if (unlikely(ret)) - goto out; - - wait_for_completion(&wait); - ret = exofs_check_io(ios, NULL); out: exofs_put_io_state(ios); -- To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html