On Fri, Aug 23, 2024 at 01:51:41PM +0200, Linux regression tracking (Thorsten Leemhuis) wrote: > > [75864.693223] br0: port 1(enp5s0) entered blocking state > > [75864.693226] br0: port 1(enp5s0) entered forwarding state > > [86041.349844] ------------[ cut here ]------------ > > [86041.349850] kernel BUG at mm/zswap.c:1005! This is: BUG_ON(crypto_wait_req(crypto_acomp_decompress(acomp_ctx->req), &acomp_ctx->wait)); so crypto_acomp_decompress() returned an error, and zswap did not handle it. I wouldn't be surprised if this were dodgy ram. That said, zswap could handle this better. There's no need to panic the entire machine over being unable to read a page from swap. Killing just the process that needed this page is sufficient. Suggested patch at end after the oops. > > [86041.349862] Oops: invalid opcode: 0000 [#1] PREEMPT SMP NOPTI > > [86041.349867] CPU: 5 PID: 2798071 Comm: llvm-tblgen Not tainted 6.10.6-12 #1 349ceb515693b41153483eac7819a5fb2832d2bf > > [86041.349872] Hardware name: To Be Filled By O.E.M. B450M Pro4-F R2.0/B450M Pro4-F R2.0, BIOS P10.08 01/19/2024 > > [86041.349876] RIP: 0010:zswap_decompress+0x1ef/0x200 > > [86041.349884] Code: ef e8 95 2a ce ff 84 c0 0f 85 1f ff ff ff e9 fb fe ff ff 0f 0b 48 8d 7b 10 e8 0d a9 a4 00 c7 43 10 00 00 00 00 8b 43 30 eb 86 <0f> 0b 0f 0b e8 f8 9b a3 00 0f 1f 84 00 00 00 00 00 90 90 90 90 90 > > [86041.349889] RSP: 0000:ffffb98f823ebb90 EFLAGS: 00010282 > > [86041.349892] RAX: 00000000ffffffea RBX: ffff9bf22e8c1e08 RCX: ffff9bef137774ba > > [86041.349894] RDX: 0000000000000002 RSI: 0000000000000438 RDI: ffff9bf22e8b2af0 > > [86041.349897] RBP: ffff9bef58cd2b98 R08: ffff9bee8baf07e0 R09: ffff9bef13777080 > > [86041.349899] R10: 0000000000000022 R11: ffff9bee8baf1000 R12: fffff782422ebc00 > > [86041.349902] R13: ffff9bef13777080 R14: ffff9bef01e3d6e0 R15: ffff9bf22e8c1e48 > > [86041.349904] FS: 00007f4bda31d280(0000) GS:ffff9bf22e880000(0000) knlGS:0000000000000000 > > [86041.349908] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 > > [86041.349910] CR2: 000000001665d010 CR3: 0000000191a2c000 CR4: 0000000000350ef0 > > [86041.349914] Call Trace: > > [86041.349918] <TASK> > > [86041.349920] ? die+0x36/0x90 > > [86041.349925] ? do_trap+0xdd/0x100 > > [86041.349929] ? zswap_decompress+0x1ef/0x200 > > [86041.349932] ? do_error_trap+0x6a/0x90 > > [86041.349935] ? zswap_decompress+0x1ef/0x200 > > [86041.349938] ? exc_invalid_op+0x50/0x70 > > [86041.349943] ? zswap_decompress+0x1ef/0x200 > > [86041.349946] ? asm_exc_invalid_op+0x1a/0x20 > > [86041.349951] ? zswap_decompress+0x1ef/0x200 > > [86041.349955] zswap_load+0x109/0x120 > > [86041.349958] swap_read_folio+0x64/0x450 > > [86041.349963] swapin_readahead+0x463/0x4e0 > > [86041.349967] do_swap_page+0x436/0xd70 > > [86041.349972] ? __pte_offset_map+0x1b/0x180 > > [86041.349976] __handle_mm_fault+0x85d/0x1070 > > [86041.349979] ? sched_tick+0xee/0x2f0 > > [86041.349985] handle_mm_fault+0x18d/0x320 > > [86041.349988] do_user_addr_fault+0x177/0x6a0 > > [86041.349993] exc_page_fault+0x7e/0x180 > > [86041.349996] asm_exc_page_fault+0x26/0x30 > > [86041.350000] RIP: 0033:0x7453b9 > > [86041.350019] Code: 00 48 8d 0c 49 4c 8d 04 ca 48 8b 0f 4c 39 c2 75 19 e9 7f 00 00 00 66 66 2e 0f 1f 84 00 00 00 00 00 48 83 c2 18 49 39 d0 74 6b <48> 39 0a 75 f2 48 89 84 24 90 00 00 00 4c 39 73 10 0f 84 2f 02 00 > > [86041.350024] RSP: 002b:00007ffe67b93c80 EFLAGS: 00010206 > > [86041.350027] RAX: 0000000016659250 RBX: 00007ffe67b93db0 RCX: 000000000f1aad40 > > [86041.350030] RDX: 000000001665d010 RSI: 00007ffe67b93cd8 RDI: 00007ffe67b93cd0 > > [86041.350032] RBP: 0000000000000001 R08: 000000001665d088 R09: 0000000000000000 > > [86041.350035] R10: 00007f4bda030610 R11: 00007f4bda0d6200 R12: 0000000016661210 > > [86041.350038] R13: 00007ffe67b94a58 R14: 000000000ba280a8 R15: 0000000000000006 > > [86041.350041] </TASK> > > [86041.350043] Modules linked in: tls rpcsec_gss_krb5 nfsv4 dns_resolver nfs netfs rpcrdma rdma_cm iw_cm ib_cm ib_core br_netfilter iptable_filter xt_physdev tun bridge stp llc ext4 crc16 mbcache jbd2 amd_atl intel_rapl_msr intel_rapl_common cfg80211 edac_mce_amd kvm_amd rfkill kvm crct10dif_pclmul crc32_pclmul polyval_clmulni r8169 polyval_generic gf128mul ghash_clmulni_intel sha512_ssse3 realtek sha256_ssse3 sha1_ssse3 aesni_intel mdio_devres crypto_simd sp5100_tco k10temp gpio_amdpt cryptd wmi_bmof pcspkr ccp libphy i2c_piix4 acpi_cpufreq rapl zenpower ryzen_smu gpio_generic mac_hid nfsd auth_rpcgss nfs_acl lockd grace nct6775 nct6775_core hwmon_vid sg sunrpc crypto_user fuse dm_mod loop nfnetlink bpf_preload ip_tables x_tables xfs libcrc32c crc32c_generic drm_ttm_helper ttm video gpu_sched i2c_algo_bit drm_gpuvm drm_exec mxm_wmi nvme crc32c_intel drm_display_helper xhci_pci nvme_core xhci_pci_renesas wmi virtio_net net_failover failover dimlib virtio_blk virtio_pci virtio_pci_legacy_dev virtio_pci_modern_dev > > [86041.350106] [last unloaded: nouveau] > > [86041.350125] ---[ end trace 0000000000000000 ]--- diff --git a/mm/zswap.c b/mm/zswap.c index df66ab102d27..186aa4282c93 100644 --- a/mm/zswap.c +++ b/mm/zswap.c @@ -958,12 +958,13 @@ static bool zswap_compress(struct folio *folio, struct zswap_entry *entry) return comp_ret == 0 && alloc_ret == 0; } -static void zswap_decompress(struct zswap_entry *entry, struct folio *folio) +static int zswap_decompress(struct zswap_entry *entry, struct folio *folio) { struct zpool *zpool = entry->pool->zpool; struct scatterlist input, output; struct crypto_acomp_ctx *acomp_ctx; u8 *src; + int err; acomp_ctx = raw_cpu_ptr(entry->pool->acomp_ctx); mutex_lock(&acomp_ctx->mutex); @@ -989,12 +990,17 @@ static void zswap_decompress(struct zswap_entry *entry, struct folio *folio) sg_init_table(&output, 1); sg_set_folio(&output, folio, PAGE_SIZE, 0); acomp_request_set_params(acomp_ctx->req, &input, &output, entry->length, PAGE_SIZE); - BUG_ON(crypto_wait_req(crypto_acomp_decompress(acomp_ctx->req), &acomp_ctx->wait)); - BUG_ON(acomp_ctx->req->dlen != PAGE_SIZE); + err = crypto_acomp_decompress(acomp_ctx->req); + err = crypto_wait_req(err, &acomp_ctx->wait);; + if (WARN_ONCE(err, "Decompression error %d -- corrupted RAM?\n", err)) + return err; + if (acomp_ctx->req->dlen != PAGE_SIZE) + return -EIO; mutex_unlock(&acomp_ctx->mutex); if (src != acomp_ctx->buffer) zpool_unmap_handle(zpool, entry->handle); + return 0; } /********************************* @@ -1020,6 +1026,7 @@ static int zswap_writeback_entry(struct zswap_entry *entry, struct folio *folio; struct mempolicy *mpol; bool folio_was_allocated; + int err; struct writeback_control wbc = { .sync_mode = WB_SYNC_NONE, }; @@ -1060,7 +1067,12 @@ static int zswap_writeback_entry(struct zswap_entry *entry, return -ENOMEM; } - zswap_decompress(entry, folio); + err = zswap_decompress(entry, folio); + if (err < 0) { + folio_unlock(folio); + folio_put(folio); + return err; + } count_vm_event(ZSWPWB); if (entry->objcg) @@ -1601,6 +1613,7 @@ bool zswap_load(struct folio *folio) bool swapcache = folio_test_swapcache(folio); struct xarray *tree = swap_zswap_tree(swp); struct zswap_entry *entry; + int err; VM_WARN_ON_ONCE(!folio_test_locked(folio)); @@ -1638,10 +1651,13 @@ bool zswap_load(struct folio *folio) if (!entry) return false; - if (entry->length) - zswap_decompress(entry, folio); - else + if (entry->length) { + err = zswap_decompress(entry, folio); + if (err) + return false; + } else { zswap_fill_folio(folio, entry->value); + } count_vm_event(ZSWPIN); if (entry->objcg)