NeilBrown <neilb@xxxxxxxx> writes: > On Thu, Jul 05 2018, David Howells wrote: > >> From: kiran modukuri <kiran.modukuri@xxxxxxxxx> >> >> There is a potential race in fscache operation enqueuing for reading and >> copying multiple pages from cachefiles to netfs. >> Under some heavy load system, it will happen very often. >> >> If this race occurs, an oops similar to the following is seen: >> >> kernel BUG at fs/fscache/operation.c:69! >> invalid opcode: 0000 [#1] SMP >> ... >> #0 [ffff883fff0838d8] machine_kexec at ffffffff81051beb >> #1 [ffff883fff083938] crash_kexec at ffffffff810f2542 >> #2 [ffff883fff083a08] oops_end at ffffffff8163e1a8 >> #3 [ffff883fff083a30] die at ffffffff8101859b >> #4 [ffff883fff083a60] do_trap at ffffffff8163d860 >> #5 [ffff883fff083ab0] do_invalid_op at ffffffff81015204 >> #6 [ffff883fff083b60] invalid_op at ffffffff8164701e >> [exception RIP: fscache_enqueue_operation+246] >> RIP: ffffffffa0b793c6 RSP: ffff883fff083c18 RFLAGS: 00010046 >> RAX: 0000000000000019 RBX: ffff8832ed1a9ec0 RCX: 0000000000000006 >> RDX: 0000000000000000 RSI: 0000000000000046 RDI: 0000000000000046 >> RBP: ffff883fff083c20 R8: 0000000000000086 R9: 000000000000178f >> R10: ffffffff816aeb00 R11: ffff883fff08392e R12: ffff8802f0525620 >> R13: ffff88407ffc01d8 R14: 0000000000000000 R15: 0000000000000003 >> ORIG_RAX: ffffffffffffffff CS: 0010 SS: 0000 >> #7 [ffff883fff083c10] fscache_enqueue_operation at ffffffffa0b793c6 >> #8 [ffff883fff083c28] cachefiles_read_waiter at ffffffffa0b15a48 >> #9 [ffff883fff083c48] __wake_up_common at ffffffff810af028 >> >> Reported-by: Lei Xue <carmark.dlut@xxxxxxxxx> >> Reported-by: Vegard Nossum <vegard.nossum@xxxxxxxxx> >> Reported-by: Anthony DeRobertis <aderobertis@xxxxxxxxxxx> >> Reported-by: NeilBrown <neilb@xxxxxxxx> >> Reported-by: Daniel Axtens <dja@xxxxxxxxxx> >> Reported-by: KiranKumar Modukuri <kiran.modukuri@xxxxxxxxx> >> Signed-off-by: David Howells <dhowells@xxxxxxxxxx> >> --- >> >> fs/cachefiles/rdwr.c | 17 ++++++++++++----- >> fs/fscache/operation.c | 6 ++++-- >> 2 files changed, 16 insertions(+), 7 deletions(-) >> >> diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c >> index 5082c8a49686..40f7595aad10 100644 >> --- a/fs/cachefiles/rdwr.c >> +++ b/fs/cachefiles/rdwr.c >> @@ -27,6 +27,7 @@ static int cachefiles_read_waiter(wait_queue_entry_t *wait, unsigned mode, >> struct cachefiles_one_read *monitor = >> container_of(wait, struct cachefiles_one_read, monitor); >> struct cachefiles_object *object; >> + struct fscache_retrieval *op = monitor->op; >> struct wait_bit_key *key = _key; >> struct page *page = wait->private; >> >> @@ -51,16 +52,22 @@ static int cachefiles_read_waiter(wait_queue_entry_t *wait, unsigned mode, >> list_del(&wait->entry); >> >> /* move onto the action list and queue for FS-Cache thread pool */ >> - ASSERT(monitor->op); >> + ASSERT(op); >> >> - object = container_of(monitor->op->op.object, >> - struct cachefiles_object, fscache); >> + /* We need to temporarily bump the usage count as we don't own a ref >> + * here otherwise cachefiles_read_copier() may free the op between the >> + * monitor being enqueued on the op->to_do list and the op getting >> + * enqueued on the work queue. >> + */ >> + fscache_get_retrieval(op); >> >> + object = container_of(op->op.object, struct cachefiles_object, fscache); >> spin_lock(&object->work_lock); >> - list_add_tail(&monitor->op_link, &monitor->op->to_do); >> + list_add_tail(&monitor->op_link, &op->to_do); >> spin_unlock(&object->work_lock); >> >> - fscache_enqueue_retrieval(monitor->op); >> + fscache_enqueue_retrieval(op); >> + fscache_put_retrieval(op); >> return 0; >> } > > Thanks - I like this approach. Taking the extra reference makes it a > lot more clear what is happening and why. I personally preferred the other approach, but I looked at this approach in great detail and am confident that it is correct. Reviewed-by: Daniel Axtens <dja@xxxxxxxxxx> Regards, Daniel > > Thanks, > NeilBrown > > >> >> diff --git a/fs/fscache/operation.c b/fs/fscache/operation.c >> index e30c5975ea58..8d265790374c 100644 >> --- a/fs/fscache/operation.c >> +++ b/fs/fscache/operation.c >> @@ -70,7 +70,8 @@ void fscache_enqueue_operation(struct fscache_operation *op) >> ASSERT(op->processor != NULL); >> ASSERT(fscache_object_is_available(op->object)); >> ASSERTCMP(atomic_read(&op->usage), >, 0); >> - ASSERTCMP(op->state, ==, FSCACHE_OP_ST_IN_PROGRESS); >> + ASSERTIFCMP(op->state != FSCACHE_OP_ST_IN_PROGRESS, >> + op->state, ==, FSCACHE_OP_ST_CANCELLED); >> >> fscache_stat(&fscache_n_op_enqueue); >> switch (op->flags & FSCACHE_OP_TYPE) { >> @@ -499,7 +500,8 @@ void fscache_put_operation(struct fscache_operation *op) >> struct fscache_cache *cache; >> >> _enter("{OBJ%x OP%x,%d}", >> - op->object->debug_id, op->debug_id, atomic_read(&op->usage)); >> + op->object ? op->object->debug_id : 0, >> + op->debug_id, atomic_read(&op->usage)); >> >> ASSERTCMP(atomic_read(&op->usage), >, 0); >> -- Linux-cachefs mailing list Linux-cachefs@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/linux-cachefs