[PATCH] per process request limits (was Re: IO scheduler, queue depth, nr_requests)

Nick Piggin <piggin@cyberone.com.au> · Fri, 20 Feb 2004 12:12:35 +1100

Hi Miquel,
Can you see if this patch helps you?


Even with this patch, it might still be a good idea to allow
pdflush to disregard the limits...


Nick



 linux-2.6-npiggin/drivers/block/ll_rw_blk.c |  113 ++++++++--------------------
 linux-2.6-npiggin/include/linux/blkdev.h    |    8 -
 2 files changed, 36 insertions(+), 85 deletions(-)

diff -puN include/linux/blkdev.h~blk-per-process-rqlim include/linux/blkdev.h

--- linux-2.6/include/linux/blkdev.h~blk-per-process-rqlim	2004-02-20 09:53:22.000000000 +1100
+++ linux-2.6-npiggin/include/linux/blkdev.h	2004-02-20 10:12:04.000000000 +1100
@@ -25,6 +25,7 @@ struct request_pm_state;
 
 #define BLKDEV_MIN_RQ	4
 #define BLKDEV_MAX_RQ	128	/* Default maximum */
+#define BLKDEV_MIN_PROC_RQ	64
 
 /*
  * This is the per-process anticipatory I/O scheduler state.
@@ -61,11 +62,7 @@ struct io_context {
 	atomic_t refcount;
 	pid_t pid;
 
-	/*
-	 * For request batching
-	 */
-	unsigned long last_waited; /* Time last woken after wait for request */
-	int nr_batch_requests;     /* Number of requests left in the batch */
+	unsigned int nr_requests;	/* Outstanding requests */
 
 	struct as_io_context *aic;
 };
@@ -141,6 +138,7 @@ struct request {
 	int ref_count;
 	request_queue_t *q;
 	struct request_list *rl;
+	struct io_context *ioc;
 
 	struct completion *waiting;
 	void *special;
diff -puN drivers/block/ll_rw_blk.c~blk-per-process-rqlim drivers/block/ll_rw_blk.c
--- linux-2.6/drivers/block/ll_rw_blk.c~blk-per-process-rqlim	2004-02-20 09:53:25.000000000 +1100
+++ linux-2.6-npiggin/drivers/block/ll_rw_blk.c	2004-02-20 10:40:43.000000000 +1100
@@ -55,12 +55,6 @@ unsigned long blk_max_low_pfn, blk_max_p
 EXPORT_SYMBOL(blk_max_low_pfn);
 EXPORT_SYMBOL(blk_max_pfn);
 
-/* Amount of time in which a process may batch requests */
-#define BLK_BATCH_TIME	(HZ/50UL)
-
-/* Number of requests a "batching" process may submit */
-#define BLK_BATCH_REQ	32
-
 /*
  * Return the threshold (number of used requests) at which the queue is
  * considered to be congested.  It include a little hysteresis to keep the
@@ -1495,57 +1489,27 @@ static inline struct request *blk_alloc_
 }
 
 /*
- * ioc_batching returns true if the ioc is a valid batching request and
- * should be given priority access to a request.
- */
-static inline int ioc_batching(struct io_context *ioc)
-{
-	if (!ioc)
-		return 0;
-
-	/*
-	 * Make sure the process is able to allocate at least 1 request
-	 * even if the batch times out, otherwise we could theoretically
-	 * lose wakeups.
-	 */
-	return ioc->nr_batch_requests == BLK_BATCH_REQ ||
-		(ioc->nr_batch_requests > 0
-		&& time_before(jiffies, ioc->last_waited + BLK_BATCH_TIME));
-}
-
-/*
- * ioc_set_batching sets ioc to be a new "batcher" if it is not one. This
- * will cause the process to be a "batcher" on all queues in the system. This
- * is the behaviour we want though - once it gets a wakeup it should be given
- * a nice run.
- */
-void ioc_set_batching(struct io_context *ioc)
-{
-	if (!ioc || ioc_batching(ioc))
-		return;
-
-	ioc->nr_batch_requests = BLK_BATCH_REQ;
-	ioc->last_waited = jiffies;
-}
-
-/*
  * A request has just been released.  Account for it, update the full and
  * congestion status, wake up any waiters.   Called under q->queue_lock.
  */
-static void freed_request(request_queue_t *q, int rw)
+static void freed_request(request_queue_t *q, struct io_context *ioc, int rw)
 {
 	struct request_list *rl = &q->rq;
 
+	ioc->nr_requests--;
 	rl->count[rw]--;
 	if (rl->count[rw] < queue_congestion_off_threshold(q))
 		clear_queue_congested(q, rw);
 	if (rl->count[rw]+1 <= q->nr_requests) {
-		smp_mb();
-		if (waitqueue_active(&rl->wait[rw]))
-			wake_up(&rl->wait[rw]);
 		if (!waitqueue_active(&rl->wait[rw]))
 			blk_clear_queue_full(q, rw);
 	}
+
+	if (rl->count[rw]-31 <= q->nr_requests
+			|| ioc->nr_requests <= BLKDEV_MIN_PROC_RQ - 32) {
+		if (waitqueue_active(&rl->wait[rw]))
+			wake_up(&rl->wait[rw]);
+	}
 }
 
 #define blkdev_free_rq(list) list_entry((list)->next, struct request, queuelist)
@@ -1556,32 +1520,34 @@ static struct request *get_request(reque
 {
 	struct request *rq = NULL;
 	struct request_list *rl = &q->rq;
-	struct io_context *ioc = get_io_context(gfp_mask);
+	struct io_context *ioc;
 
 	spin_lock_irq(q->queue_lock);
 	if (rl->count[rw]+1 >= q->nr_requests) {
 		/*
 		 * The queue will fill after this allocation, so set it as
-		 * full, and mark this process as "batching". This process
-		 * will be allowed to complete a batch of requests, others
-		 * will be blocked.
+		 * full.
 		 */
-		if (!blk_queue_full(q, rw)) {
-			ioc_set_batching(ioc);
+		if (!blk_queue_full(q, rw))
 			blk_set_queue_full(q, rw);
-		}
 	}
 
+	ioc = get_io_context(gfp_mask);
+	if (unlikely(ioc == NULL))
+		goto out;
+
 	if (blk_queue_full(q, rw)
-			&& !ioc_batching(ioc) && !elv_may_queue(q, rw)) {
+			&& ioc->nr_requests >= BLKDEV_MIN_PROC_RQ
+			&& !elv_may_queue(q, rw)) {
 		/*
-		 * The queue is full and the allocating process is not a
-		 * "batcher", and not exempted by the IO scheduler
+		 * The queue is full and the allocating process is over
+		 * it's limit, and not exempted by the IO scheduler
 		 */
 		spin_unlock_irq(q->queue_lock);
-		goto out;
+		goto out_ioc;
 	}
 
+	ioc->nr_requests++;
 	rl->count[rw]++;
 	if (rl->count[rw] >= queue_congestion_on_threshold(q))
 		set_queue_congested(q, rw);
@@ -1597,14 +1563,11 @@ static struct request *get_request(reque
 		 * wait queue, but this is pretty rare.
 		 */
 		spin_lock_irq(q->queue_lock);
-		freed_request(q, rw);
+		freed_request(q, ioc, rw);
 		spin_unlock_irq(q->queue_lock);
-		goto out;
+		goto out_ioc;
 	}
 
-	if (ioc_batching(ioc))
-		ioc->nr_batch_requests--;
-	
 	INIT_LIST_HEAD(&rq->queuelist);
 
 	/*
@@ -1620,13 +1583,15 @@ static struct request *get_request(reque
 	rq->ref_count = 1;
 	rq->q = q;
 	rq->rl = rl;
+	rq->ioc = ioc;
 	rq->waiting = NULL;
 	rq->special = NULL;
 	rq->data = NULL;
 	rq->sense = NULL;
 
-out:
+out_ioc:
 	put_io_context(ioc);
+out:
 	return rq;
 }
 
@@ -1644,25 +1609,12 @@ static struct request *get_request_wait(
 		struct request_list *rl = &q->rq;
 
 		prepare_to_wait_exclusive(&rl->wait[rw], &wait,
-				TASK_UNINTERRUPTIBLE);
+						TASK_UNINTERRUPTIBLE);
 
 		rq = get_request(q, rw, GFP_NOIO);
-
-		if (!rq) {
-			struct io_context *ioc;
-
+		if (!rq)
 			io_schedule();
 
-			/*
-			 * After sleeping, we become a "batching" process and
-			 * will be able to allocate at least one request, and
-			 * up to a big batch of them for a small period time.
-			 * See ioc_batching, ioc_set_batching
-			 */
-			ioc = get_io_context(GFP_NOIO);
-			ioc_set_batching(ioc);
-			put_io_context(ioc);
-		}
 		finish_wait(&rl->wait[rw], &wait);
 	} while (!rq);
 
@@ -1854,6 +1806,7 @@ void __blk_put_request(request_queue_t *
 	 * it didn't come out of our reserved rq pools
 	 */
 	if (rl) {
+		struct io_context *ioc = req->ioc;
 		int rw = rq_data_dir(req);
 
 		elv_completed_request(q, req);
@@ -1861,7 +1814,8 @@ void __blk_put_request(request_queue_t *
 		BUG_ON(!list_empty(&req->queuelist));
 
 		blk_free_request(q, req);
-		freed_request(q, rw);
+		freed_request(q, ioc, rw);
+		put_io_context(ioc);
 	}
 }
 
@@ -2721,7 +2675,7 @@ int __init blk_dev_init(void)
  */
 void put_io_context(struct io_context *ioc)
 {
-	if (ioc == NULL)
+	if (unlikely(ioc == NULL))
 		return;
 
 	BUG_ON(atomic_read(&ioc->refcount) == 0);
@@ -2772,8 +2726,7 @@ struct io_context *get_io_context(int gf
 		if (ret) {
 			atomic_set(&ret->refcount, 1);
 			ret->pid = tsk->pid;
-			ret->last_waited = jiffies; /* doesn't matter... */
-			ret->nr_batch_requests = 0; /* because this is 0 */
+			ret->nr_requests = 0;
 			ret->aic = NULL;
 			tsk->io_context = ret;
 		}

_