On Wed, 2020-12-09 at 00:26 +0100, Vitaly Wool wrote: > Hi Mike, > > On 2020-12-07 16:41, Mike Galbraith wrote: > > On Mon, 2020-12-07 at 16:21 +0100, Vitaly Wool wrote: > >> On Mon, Dec 7, 2020 at 1:34 PM Mike Galbraith <efault@xxxxxx> wrote: > >>> > >> > >>> Unfortunately, that made zero difference. > >> > >> Okay, I suggest that you submit the patch that changes read_lock() to > >> write_lock() in __release_z3fold_page() and I'll ack it then. > >> I would like to rewrite the code so that write_lock is not necessary > >> there but I don't want to hold you back and it isn't likely that I'll > >> complete this today. > > > > Nah, I'm in no rush... especially not to sign off on "Because the > > little voices in my head said this bit should look like that bit over > > yonder, and testing _seems_ to indicate they're right about that" :) > > > > -Mike > > > > okay, thanks. Would this make things better: Yup, z3fold became RT tolerant with this (un-munged and) applied. BTW, I don't think my write_lock() hacklet actually plugged the hole that leads to the below. I think it just reduced the odds of the two meeting enough to make it look ~solid in fairly limited test time. [ 3369.373023] kworker/-7413 4.....12 3368809247us : do_compact_page: zhdr: ffff95d93abd8000 zhdr->slots: ffff95d951f5df80 zhdr->slots->slot[0]: 0 [ 3369.373025] kworker/-7413 4.....12 3368809248us : do_compact_page: old_handle ffff95d951f5df98 *old_handle was ffff95d93abd804f now is ffff95da3ab8104c [ 3369.373027] kworker/-7413 4.....11 3368809249us : __release_z3fold_page.constprop.25: freed ffff95d951f5df80 [ 3369.373028] --------------------------------- [ 3369.373029] CR2: 0000000000000018 crash> p debug_handle | grep '\[2' [2]: ffff95dc1ecac0d0 crash> rd ffff95dc1ecac0d0 ffff95dc1ecac0d0: ffff95d951f5df98 ...Q.... crash> p debug_zhdr | grep '\[2' [2]: ffff95dc1ecac0c8 crash> rd ffff95dc1ecac0c8 ffff95dc1ecac0c8: ffff95da3ab81000 ...:.... <== kworker is not working on same zhdr as free_handle().. crash> p debug_slots | grep '\[2' [2]: ffff95dc1ecac0c0 crash> rd ffff95dc1ecac0c0 ffff95dc1ecac0c0: ffff95d951f5df80 ...Q.... <== ..but is the same slots, and frees it under free_handle(). crash> bt -sx leading to use after free+corruption+explosion 1us later. PID: 9334 TASK: ffff95d95a1eb3c0 CPU: 2 COMMAND: "swapoff" #0 [ffffb4248847f8f0] machine_kexec+0x16e at ffffffffa605f8ce #1 [ffffb4248847f938] __crash_kexec+0xd2 at ffffffffa614c162 #2 [ffffb4248847f9f8] crash_kexec+0x30 at ffffffffa614d350 #3 [ffffb4248847fa08] oops_end+0xca at ffffffffa602680a #4 [ffffb4248847fa28] no_context+0x14d at ffffffffa606d7cd #5 [ffffb4248847fa88] exc_page_fault+0x2b8 at ffffffffa68bdb88 #6 [ffffb4248847fae0] asm_exc_page_fault+0x1e at ffffffffa6a00ace #7 [ffffb4248847fb68] mark_wakeup_next_waiter+0x51 at ffffffffa60ea121 #8 [ffffb4248847fbd0] rt_mutex_futex_unlock+0x4f at ffffffffa68c93ef #9 [ffffb4248847fc10] z3fold_zpool_free+0x593 at ffffffffc0ecb663 [z3fold] #10 [ffffb4248847fc78] zswap_free_entry+0x43 at ffffffffa627c823 #11 [ffffb4248847fc88] zswap_frontswap_invalidate_page+0x8a at ffffffffa627c92a #12 [ffffb4248847fcb0] __frontswap_invalidate_page+0x48 at ffffffffa627c018 #13 [ffffb4248847fcd8] swapcache_free_entries+0x1ee at ffffffffa6276f5e #14 [ffffb4248847fd20] free_swap_slot+0x9f at ffffffffa627b8ff #15 [ffffb4248847fd40] delete_from_swap_cache+0x61 at ffffffffa6274621 #16 [ffffb4248847fd60] try_to_free_swap+0x70 at ffffffffa6277520 #17 [ffffb4248847fd70] unuse_vma+0x55c at ffffffffa627869c #18 [ffffb4248847fe90] try_to_unuse+0x139 at ffffffffa6278e89 #19 [ffffb4248847fee8] __x64_sys_swapoff+0x1eb at ffffffffa62798cb #20 [ffffb4248847ff40] do_syscall_64+0x33 at ffffffffa68b9ab3 #21 [ffffb4248847ff50] entry_SYSCALL_64_after_hwframe+0x44 at ffffffffa6a0007c RIP: 00007fbd835a5d17 RSP: 00007ffd60634458 RFLAGS: 00000202 RAX: ffffffffffffffda RBX: 0000559540e34b60 RCX: 00007fbd835a5d17 RDX: 0000000000000001 RSI: 0000000000000001 RDI: 0000559540e34b60 RBP: 0000000000000001 R8: 00007ffd606344c0 R9: 0000000000000003 R10: 0000559540e34721 R11: 0000000000000202 R12: 0000000000000001 R13: 0000000000000000 R14: 00007ffd606344c0 R15: 0000000000000000 ORIG_RAX: 00000000000000a8 CS: 0033 SS: 002b crash> > > diff --git a/mm/z3fold.c b/mm/z3fold.c > index 18feaa0bc537..340c38a5ffac 100644 > --- a/mm/z3fold.c > +++ b/mm/z3fold.c > @@ -303,10 +303,9 @@ static inline void put_z3fold_header(struct > z3fold_header *zhdr) > z3fold_page_unlock(zhdr); > } > > -static inline void free_handle(unsigned long handle) > +static inline void free_handle(unsigned long handle, struct > z3fold_header *zhdr) > { > struct z3fold_buddy_slots *slots; > - struct z3fold_header *zhdr; > int i; > bool is_free; > > @@ -316,22 +315,13 @@ static inline void free_handle(unsigned long handle) > if (WARN_ON(*(unsigned long *)handle == 0)) > return; > > - zhdr = handle_to_z3fold_header(handle); > slots = handle_to_slots(handle); > write_lock(&slots->lock); > *(unsigned long *)handle = 0; > - if (zhdr->slots == slots) { > - write_unlock(&slots->lock); > - return; /* simple case, nothing else to do */ > - } > + if (zhdr->slots != slots) > + zhdr->foreign_handles--; > > - /* we are freeing a foreign handle if we are here */ > - zhdr->foreign_handles--; > is_free = true; > - if (!test_bit(HANDLES_ORPHANED, &slots->pool)) { > - write_unlock(&slots->lock); > - return; > - } > for (i = 0; i <= BUDDY_MASK; i++) { > if (slots->slot[i]) { > is_free = false; > @@ -343,6 +333,8 @@ static inline void free_handle(unsigned long handle) > if (is_free) { > struct z3fold_pool *pool = slots_to_pool(slots); > > + if (zhdr->slots == slots) > + zhdr->slots = NULL; > kmem_cache_free(pool->c_handle, slots); > } > } > @@ -525,8 +517,6 @@ static void __release_z3fold_page(struct > z3fold_header *zhdr, bool locked) > { > struct page *page = virt_to_page(zhdr); > struct z3fold_pool *pool = zhdr_to_pool(zhdr); > - bool is_free = true; > - int i; > > WARN_ON(!list_empty(&zhdr->buddy)); > set_bit(PAGE_STALE, &page->private); > @@ -536,21 +526,6 @@ static void __release_z3fold_page(struct > z3fold_header *zhdr, bool locked) > list_del_init(&page->lru); > spin_unlock(&pool->lock); > > - /* If there are no foreign handles, free the handles array */ > - read_lock(&zhdr->slots->lock); > - for (i = 0; i <= BUDDY_MASK; i++) { > - if (zhdr->slots->slot[i]) { > - is_free = false; > - break; > - } > - } > - if (!is_free) > - set_bit(HANDLES_ORPHANED, &zhdr->slots->pool); > - read_unlock(&zhdr->slots->lock); > - > - if (is_free) > - kmem_cache_free(pool->c_handle, zhdr->slots); > - > if (locked) > z3fold_page_unlock(zhdr); > > @@ -973,6 +948,9 @@ static inline struct z3fold_header > *__z3fold_alloc(struct z3fold_pool *pool, > } > } > > + if (zhdr && !zhdr->slots) > + zhdr->slots = alloc_slots(pool, > + can_sleep ? GFP_NOIO : GFP_ATOMIC); > return zhdr; > } > > @@ -1270,7 +1248,7 @@ static void z3fold_free(struct z3fold_pool *pool, > unsigned long handle) > } > > if (!page_claimed) > - free_handle(handle); > + free_handle(handle, zhdr); > if (kref_put(&zhdr->refcount, release_z3fold_page_locked_list)) { > atomic64_dec(&pool->pages_nr); > return; > @@ -1429,19 +1407,19 @@ static int z3fold_reclaim_page(struct > z3fold_pool *pool, unsigned int retries) > ret = pool->ops->evict(pool, middle_handle); > if (ret) > goto next; > - free_handle(middle_handle); > + free_handle(middle_handle, zhdr); > } > if (first_handle) { > ret = pool->ops->evict(pool, first_handle); > if (ret) > goto next; > - free_handle(first_handle); > + free_handle(first_handle, zhdr); > } > if (last_handle) { > ret = pool->ops->evict(pool, last_handle); > if (ret) > goto next; > - free_handle(last_handle); > + free_handle(last_handle, zhdr); > } > next: > if (test_bit(PAGE_HEADLESS, &page->private)) { > > -- > > Best regards, > Vitaly