+ aio-add-per-task-aio-wait-event-condition.patch added to -mm tree

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



The patch titled
     aio: add per task aio wait event condition
has been added to the -mm tree.  Its filename is
     aio-add-per-task-aio-wait-event-condition.patch

See http://www.zip.com.au/~akpm/linux/patches/stuff/added-to-mm.txt to find
out what to do about this

------------------------------------------------------
Subject: aio: add per task aio wait event condition
From: "Chen, Kenneth W" <kenneth.w.chen@xxxxxxxxx>

The AIO wake-up notification from aio_complete is really inefficient
in current AIO implementation in the presence of process waiting in
io_getevents().

For example, if app calls io_getevents with min_nr > 1, and aio event
queue doesn't have enough completed aio event, the process will block
in read_events().  However, aio_complete() will wake up the waiting
process for *each* complete I/O even though number of events that an
app is waiting for is much larger than 1.  This makes excessive and
unnecessary context switch because the waiting process will just reap
one single event and goes back to sleep again.  It is much more efficient
to wake up the waiting process when there are enough events for it to
reap.

This patch adds a wait condition to the wait queue and only wake-up
process when that condition meets.  And this condition is added on a
per task base for handling multi-threaded app that shares single ioctx.

To show the effect of this patch, here is an vmstat output before and
after the patch. The app does random O_DIRECT AIO on 60 disks. Context
switch is reduced from 13 thousand+ down to just 40+, an significant
improvement.

Before:
procs -----------memory---------- ---swap-- -----io---- --system-- ----cpu----
 r  b   swpd   free   buff  cache   si   so    bi    bo   in    cs us sy id wa
 0  0      0 3972608   7056  31312    0    0 14000     0 7840 13715  0  2 98  0
 0  0      0 3972608   7056  31312    0    0 14300     0 7793 13641  0  2 98  0
 0  0      0 3972608   7056  31312    0    0 14100     0 7885 13747  0  2 98  0

After:
 0  0      0 3972608   7056  31312    0    0 14000     0 7840    49  0  2 98  0
 0  0      0 3972608   7056  31312    0    0 13800     0 7793    53  0  2 98  0
 0  0      0 3972608   7056  31312    0    0 13800     0 7885    42  0  2 98  0

Signed-off-by: Ken Chen <kenneth.w.chen@xxxxxxxxx>
Cc: Zach Brown <zach.brown@xxxxxxxxxx>
Cc: Suparna Bhattacharya <suparna@xxxxxxxxxx>
Cc: Benjamin LaHaise <bcrl@xxxxxxxxx>
Cc: Badari Pulavarty <pbadari@xxxxxxxxxx>
Signed-off-by: Andrew Morton <akpm@xxxxxxxx>
---

 fs/aio.c      |   43 ++++++++++++++++++++++++++++++++-----------
 fs/aio.c.orig |   20 +++++++++-----------
 2 files changed, 41 insertions(+), 22 deletions(-)

diff -puN fs/aio.c~aio-add-per-task-aio-wait-event-condition fs/aio.c
--- a/fs/aio.c~aio-add-per-task-aio-wait-event-condition
+++ a/fs/aio.c
@@ -193,6 +193,17 @@ static int aio_setup_ring(struct kioctx 
 	kunmap_atomic((void *)((unsigned long)__event & PAGE_MASK), km); \
 } while(0)
 
+struct aio_wait_queue {
+	int		nr_wait;	/* wake-up condition */
+	wait_queue_t	wait;
+};
+
+static inline void aio_init_wait(struct aio_wait_queue *wait)
+{
+	wait->nr_wait = 0;
+	init_wait(&wait->wait);
+}
+
 /* ioctx_alloc
  *	Allocates and initializes an ioctx.  Returns an ERR_PTR if it failed.
  */
@@ -295,13 +306,14 @@ static void aio_cancel_all(struct kioctx
 static void wait_for_all_aios(struct kioctx *ctx)
 {
 	struct task_struct *tsk = current;
-	DECLARE_WAITQUEUE(wait, tsk);
+	struct aio_wait_queue wait;
 
 	spin_lock_irq(&ctx->ctx_lock);
 	if (!ctx->reqs_active)
 		goto out;
 
-	add_wait_queue(&ctx->wait, &wait);
+	aio_init_wait(&wait);
+	add_wait_queue(&ctx->wait, &wait.wait);
 	set_task_state(tsk, TASK_UNINTERRUPTIBLE);
 	while (ctx->reqs_active) {
 		spin_unlock_irq(&ctx->ctx_lock);
@@ -310,7 +322,7 @@ static void wait_for_all_aios(struct kio
 		spin_lock_irq(&ctx->ctx_lock);
 	}
 	__set_task_state(tsk, TASK_RUNNING);
-	remove_wait_queue(&ctx->wait, &wait);
+	remove_wait_queue(&ctx->wait, &wait.wait);
 
 out:
 	spin_unlock_irq(&ctx->ctx_lock);
@@ -930,6 +942,7 @@ int fastcall aio_complete(struct kiocb *
 	unsigned long	flags;
 	unsigned long	tail;
 	int		ret;
+	int		nr_evt = 0;
 
 	/*
 	 * Special case handling for sync iocbs:
@@ -990,6 +1003,9 @@ int fastcall aio_complete(struct kiocb *
 	info->tail = tail;
 	ring->tail = tail;
 
+	nr_evt = ring->tail - ring->head;
+	if (nr_evt < 0)
+		nr_evt += info->nr;
 	put_aio_ring_event(event, KM_IRQ0);
 	kunmap_atomic(ring, KM_IRQ1);
 
@@ -998,8 +1014,13 @@ put_rq:
 	/* everything turned out well, dispose of the aiocb. */
 	ret = __aio_put_req(ctx, iocb);
 
-	if (waitqueue_active(&ctx->wait))
-		wake_up(&ctx->wait);
+	if (waitqueue_active(&ctx->wait)) {
+		struct aio_wait_queue *wait;
+		wait = container_of(ctx->wait.task_list.next,
+				    struct aio_wait_queue, wait.task_list);
+		if (nr_evt >= wait->nr_wait)
+			wake_up(&ctx->wait);
+	}
 
 	spin_unlock_irqrestore(&ctx->ctx_lock, flags);
 	return ret;
@@ -1092,7 +1113,7 @@ static int read_events(struct kioctx *ct
 {
 	long			start_jiffies = jiffies;
 	struct task_struct	*tsk = current;
-	DECLARE_WAITQUEUE(wait, tsk);
+	struct aio_wait_queue	wait;
 	int			ret;
 	int			i = 0;
 	struct io_event		ent;
@@ -1150,10 +1171,11 @@ retry:
 		set_timeout(start_jiffies, &to, &ts);
 	}
 
+	aio_init_wait(&wait);
 	while (likely(i < nr)) {
-		add_wait_queue_exclusive(&ctx->wait, &wait);
 		do {
-			set_task_state(tsk, TASK_INTERRUPTIBLE);
+			prepare_to_wait_exclusive(&ctx->wait, &wait.wait,
+						  TASK_INTERRUPTIBLE);
 			ret = aio_read_evt(ctx, &ent);
 			if (ret)
 				break;
@@ -1162,6 +1184,7 @@ retry:
 			ret = 0;
 			if (to.timed_out)	/* Only check after read evt */
 				break;
+			wait.nr_wait = min_nr - i;
 			schedule();
 			if (signal_pending(tsk)) {
 				ret = -EINTR;
@@ -1169,9 +1192,7 @@ retry:
 			}
 			/*ret = aio_read_evt(ctx, &ent);*/
 		} while (1) ;
-
-		set_task_state(tsk, TASK_RUNNING);
-		remove_wait_queue(&ctx->wait, &wait);
+		finish_wait(&ctx->wait, &wait.wait);
 
 		if (unlikely(ret <= 0))
 			break;
diff -puN fs/aio.c.orig~aio-add-per-task-aio-wait-event-condition fs/aio.c.orig
--- a/fs/aio.c.orig~aio-add-per-task-aio-wait-event-condition
+++ a/fs/aio.c.orig
@@ -297,17 +297,23 @@ static void wait_for_all_aios(struct kio
 	struct task_struct *tsk = current;
 	DECLARE_WAITQUEUE(wait, tsk);
 
+	spin_lock_irq(&ctx->ctx_lock);
 	if (!ctx->reqs_active)
-		return;
+		goto out;
 
 	add_wait_queue(&ctx->wait, &wait);
 	set_task_state(tsk, TASK_UNINTERRUPTIBLE);
 	while (ctx->reqs_active) {
+		spin_unlock_irq(&ctx->ctx_lock);
 		schedule();
 		set_task_state(tsk, TASK_UNINTERRUPTIBLE);
+		spin_lock_irq(&ctx->ctx_lock);
 	}
 	__set_task_state(tsk, TASK_RUNNING);
 	remove_wait_queue(&ctx->wait, &wait);
+
+out:
+	spin_unlock_irq(&ctx->ctx_lock);
 }
 
 /* wait_on_sync_kiocb:
@@ -423,7 +429,6 @@ static struct kiocb fastcall *__aio_get_
 	ring = kmap_atomic(ctx->ring_info.ring_pages[0], KM_USER0);
 	if (ctx->reqs_active < aio_ring_avail(&ctx->ring_info, ring)) {
 		list_add(&req->ki_list, &ctx->active_reqs);
-		get_ioctx(ctx);
 		ctx->reqs_active++;
 		okay = 1;
 	}
@@ -535,8 +540,6 @@ int fastcall aio_put_req(struct kiocb *r
 	spin_lock_irq(&ctx->ctx_lock);
 	ret = __aio_put_req(ctx, req);
 	spin_unlock_irq(&ctx->ctx_lock);
-	if (ret)
-		put_ioctx(ctx);
 	return ret;
 }
 
@@ -778,8 +781,7 @@ static int __aio_run_iocbs(struct kioctx
 		 */
 		iocb->ki_users++;       /* grab extra reference */
 		aio_run_iocb(iocb);
-		if (__aio_put_req(ctx, iocb))  /* drop extra ref */
-			put_ioctx(ctx);
+		__aio_put_req(ctx, iocb);
  	}
 	if (!list_empty(&ctx->run_list))
 		return 1;
@@ -996,14 +998,10 @@ put_rq:
 	/* everything turned out well, dispose of the aiocb. */
 	ret = __aio_put_req(ctx, iocb);
 
-	spin_unlock_irqrestore(&ctx->ctx_lock, flags);
-
 	if (waitqueue_active(&ctx->wait))
 		wake_up(&ctx->wait);
 
-	if (ret)
-		put_ioctx(ctx);
-
+	spin_unlock_irqrestore(&ctx->ctx_lock, flags);
 	return ret;
 }
 
_

Patches currently in -mm which might be from kenneth.w.chen@xxxxxxxxx are

ia64-alignment-bug-in-ldscript.patch
aio-fix-buggy-put_ioctx-call-in-aio_complete-v2.patch
aio-add-per-task-aio-wait-event-condition.patch
aio-streamline-read-events-after-woken-up.patch
aio-remove-spurious-ring-head-index-modulo-info-nr.patch
aio-make-aio_ring_info-nr_pages-an-unsigned-int.patch
mm-only-sched-add-a-few-scheduler-event-counters.patch

-
To unsubscribe from this list: send the line "unsubscribe mm-commits" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Index of Archives]     [Kernel Newbies FAQ]     [Kernel Archive]     [IETF Annouce]     [DCCP]     [Netdev]     [Networking]     [Security]     [Bugtraq]     [Photo]     [Yosemite]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux SCSI]

  Powered by Linux