Re: [regression] oops on heavy compilations ("kernel BUG at mm/zswap.c:1005!" and "Oops: invalid opcode: 0000")

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Fri, Aug 23, 2024 at 01:51:41PM +0200, Linux regression tracking (Thorsten Leemhuis) wrote:
> > [75864.693223] br0: port 1(enp5s0) entered blocking state
> > [75864.693226] br0: port 1(enp5s0) entered forwarding state
> > [86041.349844] ------------[ cut here ]------------
> > [86041.349850] kernel BUG at mm/zswap.c:1005!

This is:

        BUG_ON(crypto_wait_req(crypto_acomp_decompress(acomp_ctx->req), &acomp_ctx->wait));

so crypto_acomp_decompress() returned an error, and zswap did not handle
it.  I wouldn't be surprised if this were dodgy ram.

That said, zswap could handle this better.  There's no need to panic the
entire machine over being unable to read a page from swap.  Killing just
the process that needed this page is sufficient.

Suggested patch at end after the oops.

> > [86041.349862] Oops: invalid opcode: 0000 [#1] PREEMPT SMP NOPTI
> > [86041.349867] CPU: 5 PID: 2798071 Comm: llvm-tblgen Not tainted 6.10.6-12 #1 349ceb515693b41153483eac7819a5fb2832d2bf
> > [86041.349872] Hardware name: To Be Filled By O.E.M. B450M Pro4-F R2.0/B450M Pro4-F R2.0, BIOS P10.08 01/19/2024
> > [86041.349876] RIP: 0010:zswap_decompress+0x1ef/0x200
> > [86041.349884] Code: ef e8 95 2a ce ff 84 c0 0f 85 1f ff ff ff e9 fb fe ff ff 0f 0b 48 8d 7b 10 e8 0d a9 a4 00 c7 43 10 00 00 00 00 8b 43 30 eb 86 <0f> 0b 0f 0b e8 f8 9b a3 00 0f 1f 84 00 00 00 00 00 90 90 90 90 90
> > [86041.349889] RSP: 0000:ffffb98f823ebb90 EFLAGS: 00010282
> > [86041.349892] RAX: 00000000ffffffea RBX: ffff9bf22e8c1e08 RCX: ffff9bef137774ba
> > [86041.349894] RDX: 0000000000000002 RSI: 0000000000000438 RDI: ffff9bf22e8b2af0
> > [86041.349897] RBP: ffff9bef58cd2b98 R08: ffff9bee8baf07e0 R09: ffff9bef13777080
> > [86041.349899] R10: 0000000000000022 R11: ffff9bee8baf1000 R12: fffff782422ebc00
> > [86041.349902] R13: ffff9bef13777080 R14: ffff9bef01e3d6e0 R15: ffff9bf22e8c1e48
> > [86041.349904] FS:  00007f4bda31d280(0000) GS:ffff9bf22e880000(0000) knlGS:0000000000000000
> > [86041.349908] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> > [86041.349910] CR2: 000000001665d010 CR3: 0000000191a2c000 CR4: 0000000000350ef0
> > [86041.349914] Call Trace:
> > [86041.349918]  <TASK>
> > [86041.349920]  ? die+0x36/0x90
> > [86041.349925]  ? do_trap+0xdd/0x100
> > [86041.349929]  ? zswap_decompress+0x1ef/0x200
> > [86041.349932]  ? do_error_trap+0x6a/0x90
> > [86041.349935]  ? zswap_decompress+0x1ef/0x200
> > [86041.349938]  ? exc_invalid_op+0x50/0x70
> > [86041.349943]  ? zswap_decompress+0x1ef/0x200
> > [86041.349946]  ? asm_exc_invalid_op+0x1a/0x20
> > [86041.349951]  ? zswap_decompress+0x1ef/0x200
> > [86041.349955]  zswap_load+0x109/0x120
> > [86041.349958]  swap_read_folio+0x64/0x450
> > [86041.349963]  swapin_readahead+0x463/0x4e0
> > [86041.349967]  do_swap_page+0x436/0xd70
> > [86041.349972]  ? __pte_offset_map+0x1b/0x180
> > [86041.349976]  __handle_mm_fault+0x85d/0x1070
> > [86041.349979]  ? sched_tick+0xee/0x2f0
> > [86041.349985]  handle_mm_fault+0x18d/0x320
> > [86041.349988]  do_user_addr_fault+0x177/0x6a0
> > [86041.349993]  exc_page_fault+0x7e/0x180
> > [86041.349996]  asm_exc_page_fault+0x26/0x30
> > [86041.350000] RIP: 0033:0x7453b9
> > [86041.350019] Code: 00 48 8d 0c 49 4c 8d 04 ca 48 8b 0f 4c 39 c2 75 19 e9 7f 00 00 00 66 66 2e 0f 1f 84 00 00 00 00 00 48 83 c2 18 49 39 d0 74 6b <48> 39 0a 75 f2 48 89 84 24 90 00 00 00 4c 39 73 10 0f 84 2f 02 00
> > [86041.350024] RSP: 002b:00007ffe67b93c80 EFLAGS: 00010206
> > [86041.350027] RAX: 0000000016659250 RBX: 00007ffe67b93db0 RCX: 000000000f1aad40
> > [86041.350030] RDX: 000000001665d010 RSI: 00007ffe67b93cd8 RDI: 00007ffe67b93cd0
> > [86041.350032] RBP: 0000000000000001 R08: 000000001665d088 R09: 0000000000000000
> > [86041.350035] R10: 00007f4bda030610 R11: 00007f4bda0d6200 R12: 0000000016661210
> > [86041.350038] R13: 00007ffe67b94a58 R14: 000000000ba280a8 R15: 0000000000000006
> > [86041.350041]  </TASK>
> > [86041.350043] Modules linked in: tls rpcsec_gss_krb5 nfsv4 dns_resolver nfs netfs rpcrdma rdma_cm iw_cm ib_cm ib_core br_netfilter iptable_filter xt_physdev tun bridge stp llc ext4 crc16 mbcache jbd2 amd_atl intel_rapl_msr intel_rapl_common cfg80211 edac_mce_amd kvm_amd rfkill kvm crct10dif_pclmul crc32_pclmul polyval_clmulni r8169 polyval_generic gf128mul ghash_clmulni_intel sha512_ssse3 realtek sha256_ssse3 sha1_ssse3 aesni_intel mdio_devres crypto_simd sp5100_tco k10temp gpio_amdpt cryptd wmi_bmof pcspkr ccp libphy i2c_piix4 acpi_cpufreq rapl zenpower ryzen_smu gpio_generic mac_hid nfsd auth_rpcgss nfs_acl lockd grace nct6775 nct6775_core hwmon_vid sg sunrpc crypto_user fuse dm_mod loop nfnetlink bpf_preload ip_tables x_tables xfs libcrc32c crc32c_generic drm_ttm_helper ttm video gpu_sched i2c_algo_bit drm_gpuvm drm_exec mxm_wmi nvme crc32c_intel drm_display_helper xhci_pci nvme_core xhci_pci_renesas wmi virtio_net net_failover failover dimlib virtio_blk virtio_pci virtio_pci_legacy_dev virtio_pci_modern_dev
> > [86041.350106]  [last unloaded: nouveau]
> > [86041.350125] ---[ end trace 0000000000000000 ]---

diff --git a/mm/zswap.c b/mm/zswap.c
index df66ab102d27..186aa4282c93 100644
--- a/mm/zswap.c
+++ b/mm/zswap.c
@@ -958,12 +958,13 @@ static bool zswap_compress(struct folio *folio, struct zswap_entry *entry)
 	return comp_ret == 0 && alloc_ret == 0;
 }
 
-static void zswap_decompress(struct zswap_entry *entry, struct folio *folio)
+static int zswap_decompress(struct zswap_entry *entry, struct folio *folio)
 {
 	struct zpool *zpool = entry->pool->zpool;
 	struct scatterlist input, output;
 	struct crypto_acomp_ctx *acomp_ctx;
 	u8 *src;
+	int err;
 
 	acomp_ctx = raw_cpu_ptr(entry->pool->acomp_ctx);
 	mutex_lock(&acomp_ctx->mutex);
@@ -989,12 +990,17 @@ static void zswap_decompress(struct zswap_entry *entry, struct folio *folio)
 	sg_init_table(&output, 1);
 	sg_set_folio(&output, folio, PAGE_SIZE, 0);
 	acomp_request_set_params(acomp_ctx->req, &input, &output, entry->length, PAGE_SIZE);
-	BUG_ON(crypto_wait_req(crypto_acomp_decompress(acomp_ctx->req), &acomp_ctx->wait));
-	BUG_ON(acomp_ctx->req->dlen != PAGE_SIZE);
+	err = crypto_acomp_decompress(acomp_ctx->req);
+	err = crypto_wait_req(err, &acomp_ctx->wait);;
+	if (WARN_ONCE(err, "Decompression error %d -- corrupted RAM?\n", err))
+		return err;
+	if (acomp_ctx->req->dlen != PAGE_SIZE)
+		return -EIO;
 	mutex_unlock(&acomp_ctx->mutex);
 
 	if (src != acomp_ctx->buffer)
 		zpool_unmap_handle(zpool, entry->handle);
+	return 0;
 }
 
 /*********************************
@@ -1020,6 +1026,7 @@ static int zswap_writeback_entry(struct zswap_entry *entry,
 	struct folio *folio;
 	struct mempolicy *mpol;
 	bool folio_was_allocated;
+	int err;
 	struct writeback_control wbc = {
 		.sync_mode = WB_SYNC_NONE,
 	};
@@ -1060,7 +1067,12 @@ static int zswap_writeback_entry(struct zswap_entry *entry,
 		return -ENOMEM;
 	}
 
-	zswap_decompress(entry, folio);
+	err = zswap_decompress(entry, folio);
+	if (err < 0) {
+		folio_unlock(folio);
+		folio_put(folio);
+		return err;
+	}
 
 	count_vm_event(ZSWPWB);
 	if (entry->objcg)
@@ -1601,6 +1613,7 @@ bool zswap_load(struct folio *folio)
 	bool swapcache = folio_test_swapcache(folio);
 	struct xarray *tree = swap_zswap_tree(swp);
 	struct zswap_entry *entry;
+	int err;
 
 	VM_WARN_ON_ONCE(!folio_test_locked(folio));
 
@@ -1638,10 +1651,13 @@ bool zswap_load(struct folio *folio)
 	if (!entry)
 		return false;
 
-	if (entry->length)
-		zswap_decompress(entry, folio);
-	else
+	if (entry->length) {
+		err = zswap_decompress(entry, folio);
+		if (err)
+			return false;
+	} else {
 		zswap_fill_folio(folio, entry->value);
+	}
 
 	count_vm_event(ZSWPIN);
 	if (entry->objcg)





[Index of Archives]     [Linux ARM Kernel]     [Linux ARM]     [Linux Omap]     [Fedora ARM]     [IETF Annouce]     [Bugtraq]     [Linux OMAP]     [Linux MIPS]     [eCos]     [Asterisk Internet PBX]     [Linux API]

  Powered by Linux