[PATCH for-rc 3/4] IB/hfi1: Fix AIP early init panic

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



From: Mike Marciniszyn <mike.marciniszyn@xxxxxxxxxxxxxxxxxxxx>

An early failure in hfi1_ipoib_setup_rn() can lead to the following
panic:

[  355.625765] BUG: unable to handle kernel NULL pointer dereference at 00000000000001b0
[  355.634188] PGD 0 P4D 0
[  355.636731] Oops: 0002 [#1] SMP NOPTI
[  355.659994] Workqueue: events work_for_cpu_fn
[  355.664371] RIP: 0010:try_to_grab_pending+0x2b/0x140
[  355.669361] Code: 1f 44 00 00 41 55 41 54 55 48 89 d5 53 48 89 fb 9c 58 0f 1f 44 00 00 48 89 c2 fa 66 0f 1f 44 00 00 48 89 55 00 40 84 f6 75 77 <f0> 48 0f ba 2b 00 72 09 31 c0 5b 5d 41 5c 41 5d c3 48 89 df e8 6c
[  355.688238] RSP: 0018:ffffb6b3cf7cfa48 EFLAGS: 00010046
[  355.693491] RAX: 0000000000000246 RBX: 00000000000001b0 RCX: 0000000000000000
[  355.700664] RDX: 0000000000000246 RSI: 0000000000000000 RDI: 00000000000001b0
[  355.707836] RBP: ffffb6b3cf7cfa70 R08: 0000000000000f09 R09: 0000000000000001
[  355.715007] R10: 0000000000000000 R11: 0000000000000001 R12: 0000000000000000
[  355.722178] R13: ffffb6b3cf7cfa90 R14: ffffffff9b2fbfc0 R15: ffff8a4fdf244690
[  355.729351] FS:  0000000000000000(0000) GS:ffff8a527f400000(0000) knlGS:0000000000000000
[  355.737485] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  355.743260] CR2: 00000000000001b0 CR3: 00000017e2410003 CR4: 00000000007706f0
[  355.750434] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[  355.757607] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[  355.764780] PKRU: 55555554
[  355.767497] Call Trace:
[  355.769954]  __cancel_work_timer+0x42/0x190
[  355.774159]  ? dev_printk_emit+0x4e/0x70
[  355.778115]  iowait_cancel_work+0x15/0x30 [hfi1]
[  355.782768]  hfi1_ipoib_txreq_deinit+0x5a/0x220 [hfi1]
[  355.787933]  ? dev_err+0x6c/0x90
[  355.791188]  hfi1_ipoib_netdev_dtor+0x15/0x30 [hfi1]
[  355.796188]  hfi1_ipoib_setup_rn+0x10e/0x150 [hfi1]
[  355.801094]  rdma_init_netdev+0x5a/0x80 [ib_core]
[  355.805832]  ? hfi1_ipoib_free_rdma_netdev+0x20/0x20 [hfi1]
[  355.811434]  ipoib_intf_init+0x6c/0x350 [ib_ipoib]
[  355.816251]  ipoib_intf_alloc+0x5c/0xc0 [ib_ipoib]
[  355.821068]  ipoib_add_one+0xbe/0x300 [ib_ipoib]
[  355.825712]  add_client_context+0x12c/0x1a0 [ib_core]
[  355.830794]  enable_device_and_get+0xdc/0x1d0 [ib_core]
[  355.836049]  ib_register_device+0x572/0x6b0 [ib_core]
[  355.841128]  rvt_register_device+0x11b/0x220 [rdmavt]
[  355.846219]  hfi1_register_ib_device+0x6b4/0x770 [hfi1]
[  355.851486]  do_init_one.isra.20+0x3e3/0x680 [hfi1]
[  355.856389]  local_pci_probe+0x41/0x90
[  355.860154]  work_for_cpu_fn+0x16/0x20
[  355.863921]  process_one_work+0x1a7/0x360
[  355.867948]  ? create_worker+0x1a0/0x1a0
[  355.871888]  worker_thread+0x1cf/0x390
[  355.875655]  ? create_worker+0x1a0/0x1a0
[  355.879594]  kthread+0x116/0x130
[  355.882838]  ? kthread_flush_work_fn+0x10/0x10
[  355.887302]  ret_from_fork+0x1f/0x40
[  355.890893] Modules linked in: rpcrdma sunrpc rdma_ucm ib_srpt ib_isert acpi_cpufreq(-) iscsi_target_mod target_core_mod ib_iser libiscsi scsi_transport_iscsi ib_umad rdma_cm ib_ipoib iw_cm ib_cm mlx5_ib intel_rapl_msr intel_rapl_
common isst_if_common x86_pkg_temp_thermal intel_powerclamp coretemp kvm_intel kvm irqbypass mlx5_core crct10dif_pclmul crc32_pclmul hfi1(OE+) tls ghash_clmulni_intel rdmavt(OE) mgag200 drm_kms_helper mlxfw mei_me syscopyarea sysfill
rect ib_uverbs sysimgblt fb_sys_fops rapl ioatdma intel_cstate tg3 i2c_algo_bit mei hpwdt ses drm ib_core pci_hyperv_intf uas enclosure hpilo pcspkr intel_uncore wmi lpc_ich dca acpi_tad ipmi_ssif acpi_power_meter binfmt_misc xpmem(O
) numatools(O) fuse ip_tables dm_mod xfs libcrc32c vfat fat ext4 mbcache jbd2 sd_mod t10_pi sg smartpqi ipmi_si scsi_transport_sas usb_storage ipmi_devintf ipmi_msghandler crc32c_intel [last unloaded: mlxfw]
[  355.970226] CR2: 00000000000001b0
[  355.973583]

The panic happens in hfi1_ipoib_txreq_deinit() because there is a NULL
deref when hfi1_ipoib_netdev_dtor() is called in this error case.

hfi1_ipoib_txreq_init() and hfi1_ipoib_rxq_init() are self unwinding so fix
by adjusting the error paths accordingly.

Other changes:
- hfi1_ipoib_free_rdma_netdev() is deleted including the free_netdev()
  since the netdev core code deletes calls free_netdev()
- The switch to the accelerated entrances is moved to the success path.

Fixes: d99dc602e2a5 ("IB/hfi1: Add functions to transmit datagram ipoib packets")
Cc: stable@xxxxxxxxxxxxxxx
Reviewed-by: Dennis Dalessandro <dennis.dalessandro@xxxxxxxxxxxxxxxxxxxx>
Signed-off-by: Mike Marciniszyn <mike.marciniszyn@xxxxxxxxxxxxxxxxxxxx>
---
 drivers/infiniband/hw/hfi1/ipoib_main.c | 13 +++----------
 1 file changed, 3 insertions(+), 10 deletions(-)

diff --git a/drivers/infiniband/hw/hfi1/ipoib_main.c b/drivers/infiniband/hw/hfi1/ipoib_main.c
index e1a2b02..8306ed5 100644
--- a/drivers/infiniband/hw/hfi1/ipoib_main.c
+++ b/drivers/infiniband/hw/hfi1/ipoib_main.c
@@ -168,12 +168,6 @@ static void hfi1_ipoib_netdev_dtor(struct net_device *dev)
 	free_percpu(dev->tstats);
 }
 
-static void hfi1_ipoib_free_rdma_netdev(struct net_device *dev)
-{
-	hfi1_ipoib_netdev_dtor(dev);
-	free_netdev(dev);
-}
-
 static void hfi1_ipoib_set_id(struct net_device *dev, int id)
 {
 	struct hfi1_ipoib_dev_priv *priv = hfi1_ipoib_priv(dev);
@@ -211,24 +205,23 @@ static int hfi1_ipoib_setup_rn(struct ib_device *device,
 	priv->port_num = port_num;
 	priv->netdev_ops = netdev->netdev_ops;
 
-	netdev->netdev_ops = &hfi1_ipoib_netdev_ops;
-
 	ib_query_pkey(device, port_num, priv->pkey_index, &priv->pkey);
 
 	rc = hfi1_ipoib_txreq_init(priv);
 	if (rc) {
 		dd_dev_err(dd, "IPoIB netdev TX init - failed(%d)\n", rc);
-		hfi1_ipoib_free_rdma_netdev(netdev);
 		return rc;
 	}
 
 	rc = hfi1_ipoib_rxq_init(netdev);
 	if (rc) {
 		dd_dev_err(dd, "IPoIB netdev RX init - failed(%d)\n", rc);
-		hfi1_ipoib_free_rdma_netdev(netdev);
+		hfi1_ipoib_txreq_deinit(priv);
 		return rc;
 	}
 
+	netdev->netdev_ops = &hfi1_ipoib_netdev_ops;
+
 	netdev->priv_destructor = hfi1_ipoib_netdev_dtor;
 	netdev->needs_free_netdev = true;
 
-- 
1.8.3.1




[Index of Archives]     [Linux Kernel]     [Kernel Development Newbies]     [Linux USB Devel]     [Video for Linux]     [Linux Audio Users]     [Yosemite Hiking]     [Linux Kernel]     [Linux SCSI]

  Powered by Linux