[PATCH -RT] net: xfrm: fix compress vs decompress serialization

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



The following splat was seen when running ltp's 'tcp4_ipsec06' stresser on v4.x
based RT kernels:

[   82.523064] BUG: unable to handle kernel paging request at ffffbfbec0c0bf64
[   82.523085] IP: deflate_slow+0x32/0x400
[   82.523086] PGD 3e10d067 P4D 3e10d067 PUD 3e10e067 PMD 3a5ea067 PTE 0
[   82.523091] Oops: 0000 [#1] PREEMPT SMP PTI
[   82.523101] CPU: 0 PID: 5883 Comm: netstress Not tainted 4.12.14-14.26-rt #1 SLE15-SP1
[   82.523102] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.12.0-0-ga698c89-rebuilt.opensuse.org 04/01/2014
[   82.523105] task: ffff9c703731cc40 task.stack: ffffbfbec0fc4000
[   82.523108] RIP: 0010:deflate_slow+0x32/0x400
[   82.523112] RSP: 0018:ffffbfbec0fc7a48 EFLAGS: 00010202
[   82.523114] RAX: 000000000008581c RBX: ffffbfbec0b85000 RCX: 0000000000000005
[   82.523115] RDX: ffffbfbec0b86748 RSI: 000000000000001a RDI: 000000000008c660
[   82.523116] RBP: 0000000000000005 R08: 0000000000000027 R09: 0000000000000027
[   82.523117] R10: ffffbfbec0b86748 R11: 0000000000000053 R12: 00000000000005c5
[   82.523118] R13: 00000000000005c5 R14: ffffbfbec0b85000 R15: 00000000000005c8
[   82.523120] FS:  00007fc9da6ec700(0000) GS:ffff9c703fc00000(0000) knlGS:0000000000000000
[   82.523121] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[   82.523122] CR2: ffffbfbec0c0bf64 CR3: 000000002fd16000 CR4: 00000000000006f0
[   82.523126] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[   82.523127] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[   82.523127] Call Trace:
[   82.523168]  zlib_deflate+0xae/0x3b0
[   82.523181]  deflate_compress+0x5d/0x90 [deflate]
[   82.523186]  ipcomp_output+0xf0/0x1b0 [xfrm_ipcomp]
[   82.523191]  xfrm_output_resume+0x391/0x480
[   82.523199]  ? skb_mac_gso_segment+0xad/0x110
[   82.523202]  xfrm_output+0xd4/0x1d0
[   82.523206]  xfrm4_output+0x2c/0xc0
[   82.523209]  ? xfrm4_udp_encap_rcv+0x1a0/0x1a0
[   82.523213]  ip_queue_xmit+0x145/0x3e0
[   82.523217]  __tcp_transmit_skb+0x513/0x9c0
[   82.523220]  tcp_write_xmit+0x1ba/0xf00
[   82.523223]  __tcp_push_pending_frames+0x31/0xd0
[   82.523225]  tcp_sendmsg_locked+0x395/0xbd0
[   82.523228]  tcp_sendmsg+0x27/0x40
[   82.523231]  sock_sendmsg+0x36/0x40
[   82.523237]  SYSC_sendto+0x10e/0x140
[   82.523240]  ? sock_setsockopt+0x2aa/0xa30
[   82.523245]  ? kvm_clock_read+0x21/0x50
[   82.523249]  ? ktime_get_ts64+0x4c/0xe0
[   82.523253]  ? SyS_poll+0x70/0x100
[   82.523257]  do_syscall_64+0x74/0x150
[   82.523267]  entry_SYSCALL_64_after_hwframe+0x59/0xbe
[   82.523279] RIP: 0033:0x7fc9db4c348a
[   82.523280] RSP: 002b:00007fc9da6cbd00 EFLAGS: 00000246 ORIG_RAX: 000000000000002c
[   82.523282] RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 00007fc9db4c348a
[   82.523283] RDX: 000000000000ffff RSI: 00007fc9da6dbd90 RDI: 0000000000000007
[   82.523284] RBP: 00007fc9da6dbd90 R08: 0000000000000000 R09: 0000000000000000
[   82.523285] R10: 0000000000004000 R11: 0000000000000246 R12: 000000000000ffff
[   82.523286] R13: 0000556d7d69fca4 R14: 0000000000000259 R15: 0000000000000001
[   82.523287] Code: 55 53 89 f5 8b 87 9c 00 00 00 48 89 fb 3d 05 01 00 00 0f 86 04 01 00 00 8b b3 94 00 00 00 48 8b 53 48 8d 46 02 8b 4b 78 23 73 40 <0f> b6 04 02 8b 53 68 d3 e2 31 d0 23 43 74 48 8b 53 60 89 43 68
[   82.523314] Modules linked in: ipcomp xfrm_ipcomp deflate authenc echainiv esp4 des3_ede_x86_64 des_generic xfrm4_mode_tunnel ah4 xfrm4_mode_transport ip6table_mangle ip6table_nat nf_conntrack_ipv6 nf_defrag_ipv6 nf_nat_ipv6 ip6table_filter ip6_tables iptable_mangle iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 nf_nat nf_conntrack libcrc32c iptable_filter ip_tables x_tables xfrm_user xfrm_algo veth af_packet iscsi_ibft iscsi_boot_sysfs snd_hda_codec_generic ledtrig_audio snd_hda_intel ppdev snd_hda_codec snd_hda_core snd_hwdep pcspkr snd_pcm snd_timer joydev virtio_net snd net_failover failover parport_pc soundcore parport qemu_fw_cfg button i2c_piix4 nfsd auth_rpcgss nfs_acl lockd grace sunrpc btrfs xor zstd_decompress zstd_compress xxhash raid6_pq virtio_blk virtio_console virtio_scsi
[   82.523355]  hid_generic usbhid sr_mod cdrom ata_generic bochs_drm drm_kms_helper ata_piix ahci syscopyarea sysfillrect sysimgblt fb_sys_fops ehci_pci libahci ttm ehci_hcd drm libata serio_raw usbcore floppy virtio_pci virtio_ring virtio drm_panel_orientation_quirks sg dm_multipath dm_mod scsi_dh_rdac scsi_dh_emc scsi_dh_alua scsi_mod autofs4
[   82.523376] Supported: Yes
[   82.523379] CR2: ffffbfbec0c0bf64
[   82.582750] ---[ end trace 0000000000000002 ]---
[   82.582763] RIP: 0010:deflate_slow+0x32/0x400
[   82.582764] RSP: 0018:ffffbfbec0fc7a48 EFLAGS: 00010202
[   82.582765] RAX: 000000000008581c RBX: ffffbfbec0b85000 RCX: 0000000000000005
[   82.582765] RDX: ffffbfbec0b86748 RSI: 000000000000001a RDI: 000000000008c660
[   82.582766] RBP: 0000000000000005 R08: 0000000000000027 R09: 0000000000000027
[   82.582766] R10: ffffbfbec0b86748 R11: 0000000000000053 R12: 00000000000005c5
[   82.582767] R13: 00000000000005c5 R14: ffffbfbec0b85000 R15: 00000000000005c8
[   82.582768] FS:  00007fc9da6ec700(0000) GS:ffff9c703fc00000(0000) knlGS:0000000000000000
[   82.582769] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[   82.582770] CR2: ffffbfbec0c0bf64 CR3: 000000002fd16000 CR4: 00000000000006f0
[   82.582772] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[   82.582773] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400

ipcomp_compress() will serialize access to the ipcomp_scratches percpu buffer by
disabling BH and preventing a softirq from coming in and running ipcom_decompress(),
which is never called from process context. This of course won't work on RT and
the buffer can get corrupted; there have been similar issues with in the past with
such assumptions, ie: ebf255ed6c44 (net: add back the missing serialization in
ip_send_unicast_reply()).

Similarly, this patch addresses the issue with locallocks allowing RT to have a
percpu spinlock and do the correct serialization.

Addressing such races on an individual basis seemed like a game of whack a mole,
until afaict local_bh_enable() was reworked to use locallocks in 96fac673174
(softirq: Add preemptible softirq) which is why the BUG is not seen in newer kernels.

Signed-off-by: Davidlohr Bueso <dbueso@xxxxxxx>
---

This applies on top of all v4.x based kernels -rt.

 net/xfrm/xfrm_ipcomp.c | 21 +++++++++++++++------
 1 file changed, 15 insertions(+), 6 deletions(-)

diff --git a/net/xfrm/xfrm_ipcomp.c b/net/xfrm/xfrm_ipcomp.c
index ccfdc7115a83..f13871de6e0d 100644
--- a/net/xfrm/xfrm_ipcomp.c
+++ b/net/xfrm/xfrm_ipcomp.c
@@ -20,6 +20,7 @@
 #include <linux/list.h>
 #include <linux/module.h>
 #include <linux/mutex.h>
+#include <linux/locallock.h>
 #include <linux/percpu.h>
 #include <linux/slab.h>
 #include <linux/smp.h>
@@ -36,6 +37,7 @@ struct ipcomp_tfms {
 
 static DEFINE_MUTEX(ipcomp_resource_mutex);
 static void * __percpu *ipcomp_scratches;
+static DEFINE_LOCAL_IRQ_LOCK(ipcomp_scratches_lock);
 static int ipcomp_scratch_users;
 static LIST_HEAD(ipcomp_tfms_list);
 
@@ -45,12 +47,15 @@ static int ipcomp_decompress(struct xfrm_state *x, struct sk_buff *skb)
 	const int plen = skb->len;
 	int dlen = IPCOMP_SCRATCH_SIZE;
 	const u8 *start = skb->data;
-	const int cpu = get_cpu();
-	u8 *scratch = *per_cpu_ptr(ipcomp_scratches, cpu);
-	struct crypto_comp *tfm = *per_cpu_ptr(ipcd->tfms, cpu);
-	int err = crypto_comp_decompress(tfm, start, plen, scratch, &dlen);
-	int len;
+	u8 *scratch;
+	struct crypto_comp *tfm;
+	int err, len;
+
+	local_lock(ipcomp_scratches_lock);
 
+	scratch = *this_cpu_ptr(ipcomp_scratches);
+	tfm = *this_cpu_ptr(ipcd->tfms);
+	err = crypto_comp_decompress(tfm, start, plen, scratch, &dlen);
 	if (err)
 		goto out;
 
@@ -103,7 +108,7 @@ static int ipcomp_decompress(struct xfrm_state *x, struct sk_buff *skb)
 	err = 0;
 
 out:
-	put_cpu();
+	local_unlock(ipcomp_scratches_lock);
 	return err;
 }
 
@@ -146,6 +151,8 @@ static int ipcomp_compress(struct xfrm_state *x, struct sk_buff *skb)
 	int err;
 
 	local_bh_disable();
+	local_lock(ipcomp_scratches_lock);
+
 	scratch = *this_cpu_ptr(ipcomp_scratches);
 	tfm = *this_cpu_ptr(ipcd->tfms);
 	err = crypto_comp_compress(tfm, start, plen, scratch, &dlen);
@@ -158,12 +165,14 @@ static int ipcomp_compress(struct xfrm_state *x, struct sk_buff *skb)
 	}
 
 	memcpy(start + sizeof(struct ip_comp_hdr), scratch, dlen);
+	local_unlock(ipcomp_scratches_lock);
 	local_bh_enable();
 
 	pskb_trim(skb, dlen + sizeof(struct ip_comp_hdr));
 	return 0;
 
 out:
+	local_unlock(ipcomp_scratches_lock);
 	local_bh_enable();
 	return err;
 }
-- 
2.26.2




[Index of Archives]     [RT Stable]     [Kernel Newbies]     [IDE]     [Security]     [Git]     [Netfilter]     [Bugtraq]     [Yosemite]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux ATA RAID]     [Samba]     [Video 4 Linux]     [Device Mapper]

  Powered by Linux