+
+ if (irq_comp)
+ local_irq_restore(flags);
+ else
+ preempt_enable();
+}
+
+static struct io_kiocb *io_alloc_req(struct io_ring_ctx *ctx, int opcode)
+{
+ struct io_kiocb *req;
+
+ req = io_req_cache_alloc(opcode);
+ if (req) {
+ req->opcode = opcode;
+ return req;
+ }
+
+ return io_get_fallback_req(ctx, opcode);
}
static inline void io_put_file(struct io_kiocb *req, struct file *file,
@@ -1345,7 +1449,8 @@ static void __io_req_aux_free(struct io_kiocb *req)
if (req->flags & REQ_F_NEED_CLEANUP)
io_cleanup_req(req);
- kfree(req->io);
+ if (req->io)
+ kfree(req->io);
if (req->file)
io_put_file(req, req->file, (req->flags & REQ_F_FIXED_FILE));
if (req->task)
@@ -1371,28 +1476,21 @@ static void __io_free_req(struct io_kiocb *req)
percpu_ref_put(&req->ctx->refs);
if (likely(!io_is_fallback_req(req)))
- kmem_cache_free(req_cachep, req);
+ io_req_cache_free(req);
else
clear_bit_unlock(0, (unsigned long *) &req->ctx->fallback_req);
}
-struct req_batch {
- void *reqs[IO_IOPOLL_BATCH];
- int to_free;
- int need_iter;
-};
-
static void io_free_req_many(struct io_ring_ctx *ctx, struct req_batch *rb)
{
if (!rb->to_free)
return;
if (rb->need_iter) {
- int i, inflight = 0;
+ struct io_kiocb *req;
unsigned long flags;
+ int inflight = 0;
- for (i = 0; i < rb->to_free; i++) {
- struct io_kiocb *req = rb->reqs[i];
-
+ list_for_each_entry(req, &rb->list, list) {
if (req->flags & REQ_F_FIXED_FILE) {
req->file = NULL;
percpu_ref_put(req->fixed_file_refs);
@@ -1405,9 +1503,7 @@ static void io_free_req_many(struct io_ring_ctx *ctx, struct req_batch *rb)
goto do_free;
spin_lock_irqsave(&ctx->inflight_lock, flags);
- for (i = 0; i < rb->to_free; i++) {
- struct io_kiocb *req = rb->reqs[i];
-
+ list_for_each_entry(req, &rb->list, list) {
if (req->flags & REQ_F_INFLIGHT) {
list_del(&req->inflight_entry);
if (!--inflight)
@@ -1420,9 +1516,10 @@ static void io_free_req_many(struct io_ring_ctx *ctx, struct req_batch *rb)
wake_up(&ctx->inflight_wait);
}
do_free:
- kmem_cache_free_bulk(req_cachep, rb->to_free, rb->reqs);
+ io_req_cache_free_bulk(rb);
percpu_ref_put_many(&ctx->refs, rb->to_free);
- rb->to_free = rb->need_iter = 0;
+ rb->to_free = 0;
+ rb->need_iter = rb->irq_comp = false;
}
static bool io_link_cancel_timeout(struct io_kiocb *req)
@@ -1670,11 +1767,12 @@ static inline bool io_req_multi_free(struct req_batch *rb, struct io_kiocb *req)
return false;
if (!(req->flags & REQ_F_FIXED_FILE) || req->io)
- rb->need_iter++;
+ rb->need_iter |= true;
+ if (!rb->irq_comp && io_op_defs[req->opcode].irq_comp)
+ rb->irq_comp |= true;
- rb->reqs[rb->to_free++] = req;
- if (unlikely(rb->to_free == ARRAY_SIZE(rb->reqs)))
- io_free_req_many(req->ctx, rb);
+ list_add(&req->list, &rb->list);
+ rb->to_free++;
return true;
}
@@ -1697,10 +1795,14 @@ static int io_put_kbuf(struct io_kiocb *req)
static void io_iopoll_complete(struct io_ring_ctx *ctx, unsigned int *nr_events,
struct list_head *done)
{
- struct req_batch rb;
+ struct req_batch rb = {
+ .list = LIST_HEAD_INIT(rb.list),
+ .to_free = 0,
+ .need_iter = false,
+ .irq_comp = false
+ };
struct io_kiocb *req;
- rb.to_free = rb.need_iter = 0;
while (!list_empty(done)) {
int cflags = 0;
@@ -5703,8 +5805,6 @@ static void io_submit_state_end(struct io_submit_state *state)
{
blk_finish_plug(&state->plug);
io_file_put(state);
- if (state->free_reqs)
- kmem_cache_free_bulk(req_cachep, state->free_reqs, state->reqs);
}
/*
@@ -5714,7 +5814,6 @@ static void io_submit_state_start(struct io_submit_state *state,
unsigned int max_ios)
{
blk_start_plug(&state->plug);
- state->free_reqs = 0;
state->file = NULL;
state->ios_left = max_ios;
}
@@ -5784,7 +5883,6 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
* link list.
*/
req->sequence = ctx->cached_sq_head - ctx->cached_sq_dropped;
- req->opcode = READ_ONCE(sqe->opcode);
req->user_data = READ_ONCE(sqe->user_data);
req->io = NULL;
req->file = NULL;
@@ -5872,14 +5970,14 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr,
io_consume_sqe(ctx);
break;
}
- req = io_alloc_req(ctx, statep);
+ req = io_alloc_req(ctx, READ_ONCE(sqe->opcode));
if (unlikely(!req)) {
if (!submitted)
submitted = -EAGAIN;
break;
}
- err = io_init_req(ctx, req, sqe, statep, async);
+ err = io_init_req(ctx, req, sqe, NULL, async);
io_consume_sqe(ctx);
/* will complete beyond this point, count as submitted */
submitted++;
@@ -7626,6 +7724,17 @@ static void __io_uring_show_fdinfo(struct io_ring_ctx *ctx, struct seq_file *m)
req->task->task_works != NULL);
}
spin_unlock_irq(&ctx->completion_lock);
+ seq_printf(m, "AllocCache:\n");
+ for_each_possible_cpu(i) {
+ struct io_kiocb_cache *cache = per_cpu_ptr(alloc_cache, i);
+ int j;
+
+ for (j = 0; j < ARRAY_SIZE(cache->caches); j++) {
+ struct io_kiocb_cache_entry *ce = &cache->caches[j];
+
+ seq_printf(m, " cpu%d: irq=%d, nr_free=%d, nr_avail=%d\n", i, j, ce->nr_free, ce->nr_avail);
+ }
+ }
mutex_unlock(&ctx->uring_lock);
}
@@ -8101,8 +8210,130 @@ SYSCALL_DEFINE4(io_uring_register, unsigned int, fd, unsigned int, opcode,
return ret;
}
+static unsigned long io_uring_cache_count(struct shrinker *shrink,
+ struct shrink_control *sc)
+{
+ unsigned long count = 0;
+ int cpu, i;
+
+ for_each_possible_cpu(cpu) {
+ struct io_kiocb_cache *cache;
+
+ cache = per_cpu_ptr(alloc_cache, cpu);
+ for (i = 0; i < ARRAY_SIZE(cache->caches); i++) {
+ struct io_kiocb_cache_entry *ce = &cache->caches[i];
+
+ count += ce->nr_free;
+ }
+ }
+
+ return count;
+}
+
+static unsigned long __io_uring_cache_shrink(struct io_kiocb_cache_entry *ce,
+ int irq_comp, int *nr_to_scan)
+{
+ unsigned long freed = 0;
+ struct io_kiocb *req;
+ LIST_HEAD(free_list);
+
+ if (!ce->nr_free)
+ return 0;
+
+ if (irq_comp)
+ spin_lock_irq(&ce->free_lock);
+ else
+ spin_lock(&ce->free_lock);
+
+ while (!list_empty(&ce->free_list)) {
+ req = list_first_entry(&ce->free_list, struct io_kiocb, list);
+ list_move(&req->list, &free_list);
+ freed++;
+ if (!--(*nr_to_scan))
+ break;
+ }
+
+ if (irq_comp)
+ spin_unlock_irq(&ce->free_lock);
+ else
+ spin_unlock(&ce->free_lock);
+
+ while (!list_empty(&free_list)) {
+ req = list_first_entry(&free_list, struct io_kiocb, list);
+ list_del(&req->list);
+ kmem_cache_free(req_cachep, req);
+ }
+
+ return freed;
+}
+
+static unsigned long io_uring_cache_shrink(int nr_to_scan)
+{
+ long freed = 0;
+ int cpu, i;
+
+ for_each_possible_cpu(cpu) {
+ struct io_kiocb_cache *cache = per_cpu_ptr(alloc_cache, cpu);
+
+ for (i = 0; i < ARRAY_SIZE(cache->caches); i++) {
+ struct io_kiocb_cache_entry *ce = &cache->caches[i];
+
+ freed += __io_uring_cache_shrink(ce, i, &nr_to_scan);
+ if (!nr_to_scan)
+ break;
+ }
+ if (!nr_to_scan)
+ break;
+ }
+
+ return freed ?: SHRINK_STOP;
+}
+
+static unsigned long io_uring_cache_scan(struct shrinker *shrink,
+ struct shrink_control *sc)
+{
+ if ((sc->gfp_mask & GFP_KERNEL) != GFP_KERNEL)
+ return SHRINK_STOP;
+
+ return io_uring_cache_shrink(sc->nr_to_scan);
+}
+
+static struct shrinker io_uring_shrinker = {
+ .count_objects = io_uring_cache_count,
+ .scan_objects = io_uring_cache_scan,
+ .seeks = DEFAULT_SEEKS,
+};
+
+static void io_uring_kill_ce(struct io_kiocb_cache_entry *ce, bool irq_comp)
+{
+ struct io_kiocb *req;
+
+ list_splice_init(&ce->alloc_list, &ce->free_list);
+
+ while (!list_empty(&ce->free_list)) {
+ req = list_first_entry(&ce->free_list, struct io_kiocb, list);
+ list_del(&req->list);
+ kmem_cache_free(req_cachep, req);
+ }
+
+ ce->nr_free = ce->nr_avail = 0;
+}
+
+static int io_uring_notify_dead(unsigned int cpu)
+{
+ struct io_kiocb_cache *cache = per_cpu_ptr(alloc_cache, cpu);
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(cache->caches); i++)
+ io_uring_kill_ce(&cache->caches[i], i);
+
+ return 0;
+}
+
static int __init io_uring_init(void)
{
+ int cpu, i;
+
#define __BUILD_BUG_VERIFY_ELEMENT(stype, eoffset, etype, ename) do { \
BUILD_BUG_ON(offsetof(stype, ename) != eoffset); \
BUILD_BUG_ON(sizeof(etype) != sizeof_field(stype, ename)); \
@@ -8142,6 +8373,25 @@ static int __init io_uring_init(void)
BUILD_BUG_ON(ARRAY_SIZE(io_op_defs) != IORING_OP_LAST);
BUILD_BUG_ON(__REQ_F_LAST_BIT >= 8 * sizeof(int));
req_cachep = KMEM_CACHE(io_kiocb, SLAB_HWCACHE_ALIGN | SLAB_PANIC);
+
+ alloc_cache = alloc_percpu(struct io_kiocb_cache);
+ for_each_possible_cpu(cpu) {
+ struct io_kiocb_cache *cache = per_cpu_ptr(alloc_cache, cpu);
+
+ for (i = 0; i < ARRAY_SIZE(cache->caches); i++) {
+ struct io_kiocb_cache_entry *ce = &cache->caches[i];
+
+ INIT_LIST_HEAD(&ce->alloc_list);
+ spin_lock_init(&ce->free_lock);
+ INIT_LIST_HEAD(&ce->free_list);
+ ce->nr_free = 0;
+ ce->nr_avail = 0;
+ }
+ }
+
+ cpuhp_setup_state_nocalls(CPUHP_IOURING_DEAD, "io_uring:dead", NULL,
+ io_uring_notify_dead);
+ WARN_ON(register_shrinker(&io_uring_shrinker));
return 0;
};
__initcall(io_uring_init);
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index 77d70b633531..3b80556572a5 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -60,6 +60,7 @@ enum cpuhp_state {
CPUHP_LUSTRE_CFS_DEAD,
CPUHP_AP_ARM_CACHE_B15_RAC_DEAD,
CPUHP_PADATA_DEAD,
+ CPUHP_IOURING_DEAD,
CPUHP_WORKQUEUE_PREP,
CPUHP_POWER_NUMA_PREPARE,
CPUHP_HRTIMERS_PREPARE,