Re: [osd-dev] [PATCH 2/3] exofs: Move all operations to an io_engine

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 11/10/2009 07:20 PM, Boaz Harrosh wrote:
> 
> In anticipation for multi-device operations, we separate osd operations
> into an abstract I/O API. Currently only one device is used but later
> when adding more devices, we will drive all devices in parallel according
> to a "data_map" that describes how data is arranged on multiple devices.
> The file system level operates, like before, as if there is one object
> (inode-number) and an i_size. The io engine will split this to the same
> object-number but on multiple device.
> 
> At first we introduce Mirror (raid 1) layout. But at the final outcome
> we intend to fully implement the pNFS-Objects data-map, including
> raid 0,4,5,6 over mirrored devices, over multiple device-groups. And
> more. See: http://tools.ietf.org/html/draft-ietf-nfsv4-pnfs-obj-12
> 
> * Define an io_state based API for accessing osd storage devices
>   in an abstract way.
>   Usage:
> 	First a caller allocates an io state with:
> 		exofs_get_io_state(struct exofs_sb_info *sbi,
> 				   struct exofs_io_state** ios);
> 
> 	Then calles one of:
> 		exofs_sbi_create(struct exofs_io_state *ios);
> 		exofs_sbi_remove(struct exofs_io_state *ios);
> 		exofs_sbi_write(struct exofs_io_state *ios);
> 		exofs_sbi_read(struct exofs_io_state *ios);
> 		exofs_oi_truncate(struct exofs_i_info *oi, u64 new_len);
> 
> 	And when done
> 		exofs_put_io_state(struct exofs_io_state *ios);
> 
> * Convert all source files to use this new API
> * Convert from bio_alloc to bio_kmalloc
> * In io engine we make use of the now fixed osd_req_decode_sense
> 
> There are no functional changes or on disk additions after this patch.
> 
> Signed-off-by: Boaz Harrosh <bharrosh@xxxxxxxxxxx>

Putting my mailing-list-reader hat produced some "What was that good for?"

Below is the diff of the new posts from the old ones. (New as replay to originals)

---
git diff --stat -p -M -R afb8f9c089c07711104bb26e224440415ba3d324 -- fs/exofs/
 fs/exofs/inode.c |   14 ++++++--
 fs/exofs/ios.c   |   93 +++++++++++++----------------------------------------
 2 files changed, 33 insertions(+), 74 deletions(-)

diff --git b/fs/exofs/inode.c a/fs/exofs/inode.c
index f3f287a..698a863 100644
--- b/fs/exofs/inode.c
+++ a/fs/exofs/inode.c
@@ -444,7 +444,7 @@ static int exofs_readpage(struct file *file, struct page *page)
 	return _readpage(page, false);
 }
 
-/* Callback for osd_write. All writes are asynchronouse */
+/* Callback for osd_write. All writes are asynchronous */
 static void writepages_done(struct exofs_io_state *ios, void *p)
 {
 	struct page_collect *pcol = p;
@@ -1029,9 +1029,15 @@ static void create_done(struct exofs_io_state *ios, void *p)
 	if (unlikely(ret)) {
 		EXOFS_ERR("object=0x%llx creation faild in pid=0x%llx",
 			  _LLU(exofs_oi_objno(oi)), _LLU(sbi->s_pid));
-		make_bad_inode(inode);
-	} else
-		set_obj_created(oi);
+		/*TODO: When FS is corrupted creation can fail, object already
+		 * exist. Get rid of this asynchronous creation, if exist
+		 * increment the obj counter and try the next object. Until we
+		 * succeed. All these dangling objects will be made into lost
+		 * files by chkfs.exofs
+		 */
+	}
+
+	set_obj_created(oi);
 
 	atomic_dec(&inode->i_count);
 	wake_up(&oi->i_wq);
diff --git b/fs/exofs/ios.c a/fs/exofs/ios.c
index 369c364..14b2600 100644
--- b/fs/exofs/ios.c
+++ a/fs/exofs/ios.c
@@ -125,8 +125,15 @@ static void _done_io(struct osd_request *or, void *p)
 
 static int exofs_io_execute(struct exofs_io_state *ios)
 {
+	DECLARE_COMPLETION_ONSTACK(wait);
+	bool sync = (ios->done == NULL);
 	int i, ret;
 
+	if (sync) {
+		ios->done = _sync_done;
+		ios->private = &wait;
+	}
+
 	for (i = 0; i < ios->numdevs; i++) {
 		struct osd_request *or = ios->per_dev[i].or;
 		if (unlikely(!or))
@@ -140,6 +147,8 @@ static int exofs_io_execute(struct exofs_io_state *ios)
 		}
 	}
 
+	kref_init(&ios->kref);
+
 	for (i = 0; i < ios->numdevs; i++) {
 		struct osd_request *or = ios->per_dev[i].or;
 		if (unlikely(!or))
@@ -150,7 +159,13 @@ static int exofs_io_execute(struct exofs_io_state *ios)
 	}
 
 	kref_put(&ios->kref, _last_io);
-	return 0;
+	ret = 0;
+
+	if (sync) {
+		wait_for_completion(&wait);
+		ret = exofs_check_io(ios, NULL);
+	}
+	return ret;
 }
 
 int exofs_check_io(struct exofs_io_state *ios, u64 *resid)
@@ -192,16 +207,7 @@ int exofs_check_io(struct exofs_io_state *ios, u64 *resid)
 
 int exofs_sbi_create(struct exofs_io_state *ios)
 {
-	DECLARE_COMPLETION_ONSTACK(wait);
-	bool sync = (ios->done == NULL);
-	int i, ret = 0;
-
-	if (sync) {
-		ios->done = _sync_done;
-		ios->private = &wait;
-	}
-
-	kref_init(&ios->kref);
+	int i, ret;
 
 	for (i = 0; i < ios->sbi->s_numdevs; i++) {
 		struct osd_request *or;
@@ -217,12 +223,7 @@ int exofs_sbi_create(struct exofs_io_state *ios)
 
 		osd_req_create_object(or, &ios->obj);
 	}
-
 	ret = exofs_io_execute(ios);
-	if (sync && likely(!ret)) {
-		wait_for_completion(&wait);
-/*		ret = exofs_check_io(ios);*/
-	}
 
 out:
 	return ret;
@@ -230,16 +231,7 @@ out:
 
 int exofs_sbi_remove(struct exofs_io_state *ios)
 {
-	DECLARE_COMPLETION_ONSTACK(wait);
-	bool sync = (ios->done == NULL);
-	int i, ret = 0;
-
-	if (sync) {
-		ios->done = _sync_done;
-		ios->private = &wait;
-	}
-
-	kref_init(&ios->kref);
+	int i, ret;
 
 	for (i = 0; i < ios->sbi->s_numdevs; i++) {
 		struct osd_request *or;
@@ -256,10 +248,6 @@ int exofs_sbi_remove(struct exofs_io_state *ios)
 		osd_req_remove_object(or, &ios->obj);
 	}
 	ret = exofs_io_execute(ios);
-	if (sync && likely(!ret)) {
-		wait_for_completion(&wait);
-/*		ret = exofs_check_io(ios);*/
-	}
 
 out:
 	return ret;
@@ -267,16 +255,7 @@ out:
 
 int exofs_sbi_write(struct exofs_io_state *ios)
 {
-	DECLARE_COMPLETION_ONSTACK(wait);
-	bool sync = (ios->done == NULL);
-	int i, ret = 0;
-
-	if (sync) {
-		ios->done = _sync_done;
-		ios->private = &wait;
-	}
-
-	kref_init(&ios->kref);
+	int i, ret;
 
 	for (i = 0; i < ios->sbi->s_numdevs; i++) {
 		struct osd_request *or;
@@ -296,8 +275,10 @@ int exofs_sbi_write(struct exofs_io_state *ios)
 			if (i != 0) {
 				bio = bio_kmalloc(GFP_KERNEL,
 						  ios->bio->bi_max_vecs);
-				if (!bio)
+				if (unlikely(!bio)) {
+					ret = -ENOMEM;
 					goto out;
+				}
 
 				__bio_clone(bio, ios->bio);
 				bio->bi_bdev = NULL;
@@ -328,10 +309,6 @@ int exofs_sbi_write(struct exofs_io_state *ios)
 						  ios->in_attr_len);
 	}
 	ret = exofs_io_execute(ios);
-	if (sync && likely(!ret)) {
-		wait_for_completion(&wait);
-		ret = exofs_check_io(ios, NULL);
-	}
 
 out:
 	return ret;
@@ -339,16 +316,7 @@ out:
 
 int exofs_sbi_read(struct exofs_io_state *ios)
 {
-	DECLARE_COMPLETION_ONSTACK(wait);
-	bool sync = (ios->done == NULL);
-	int i, ret = 0;
-
-	if (sync) {
-		ios->done = _sync_done;
-		ios->private = &wait;
-	}
-
-	kref_init(&ios->kref);
+	int i, ret;
 
 	for (i = 0; i < 1; i++) {
 		struct osd_request *or;
@@ -385,10 +353,6 @@ int exofs_sbi_read(struct exofs_io_state *ios)
 						  ios->in_attr_len);
 	}
 	ret = exofs_io_execute(ios);
-	if (sync && likely(!ret)) {
-		wait_for_completion(&wait);
-		ret = exofs_check_io(ios, NULL);
-	}
 
 out:
 	return ret;
@@ -417,7 +381,6 @@ int extract_attr_from_ios(struct exofs_io_state *ios, struct osd_attr *attr)
 
 int exofs_oi_truncate(struct exofs_i_info *oi, u64 size)
 {
-	DECLARE_COMPLETION_ONSTACK(wait);
 	struct exofs_sb_info *sbi = oi->vfs_inode.i_sb->s_fs_info;
 	struct exofs_io_state *ios;
 	struct osd_attr attr;
@@ -434,11 +397,6 @@ int exofs_oi_truncate(struct exofs_i_info *oi, u64 size)
 	attr = g_attr_logical_length;
 	attr.val_ptr = &newsize;
 
-	ios->done = _sync_done;
-	ios->private = &wait;
-
-	kref_init(&ios->kref);
-
 	for (i = 0; i < sbi->s_numdevs; i++) {
 		struct osd_request *or;
 
@@ -455,11 +413,6 @@ int exofs_oi_truncate(struct exofs_i_info *oi, u64 size)
 		osd_req_add_set_attr_list(or, &attr, 1);
 	}
 	ret = exofs_io_execute(ios);
-	if (unlikely(ret))
-		goto out;
-
-	wait_for_completion(&wait);
-	ret = exofs_check_io(ios, NULL);
 
 out:
 	exofs_put_io_state(ios);



--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [Samba]     [Device Mapper]     [CEPH Development]
  Powered by Linux