On Thu, 2011-08-11 at 14:06 -0400, Neil Horman wrote: > This oops was reported recently: > d:mon> e > cpu 0xd: Vector: 300 (Data Access) at [c0000000fd4c7120] > pc: d00000000076f194: .t3_l2t_get+0x44/0x524 [cxgb3] > lr: d000000000b02108: .init_act_open+0x150/0x3d4 [cxgb3i] > sp: c0000000fd4c73a0 > msr: 8000000000009032 > dar: 0 > dsisr: 40000000 > current = 0xc0000000fd640d40 > paca = 0xc00000000054ff80 > pid = 5085, comm = iscsid > d:mon> t > [c0000000fd4c7450] d000000000b02108 .init_act_open+0x150/0x3d4 [cxgb3i] > [c0000000fd4c7500] d000000000e45378 .cxgbi_ep_connect+0x784/0x8e8 [libcxgbi] > [c0000000fd4c7650] d000000000db33f0 .iscsi_if_rx+0x71c/0xb18 > [scsi_transport_iscsi2] > [c0000000fd4c7740] c000000000370c9c .netlink_data_ready+0x40/0xa4 > [c0000000fd4c77c0] c00000000036f010 .netlink_sendskb+0x4c/0x9c > [c0000000fd4c7850] c000000000370c18 .netlink_sendmsg+0x358/0x39c > [c0000000fd4c7950] c00000000033be24 .sock_sendmsg+0x114/0x1b8 > [c0000000fd4c7b50] c00000000033d208 .sys_sendmsg+0x218/0x2ac > [c0000000fd4c7d70] c00000000033f55c .sys_socketcall+0x228/0x27c > [c0000000fd4c7e30] c0000000000086a4 syscall_exit+0x0/0x40 > --- Exception: c01 (System Call) at 00000080da560cfc > > The root cause was an EEH error, which sent us down the offload_close path in > the cxgb3 driver, which in turn sets cdev->lldev to NULL, without regard for > upper layer driver (like the cxgbi drivers) which might have execution contexts > in the middle of its use. The result is the oops above, when t3_l2t_get attempts > to dereference cdev->lldev right after the EEH error handler sets it to NULL. > > The fix is to reference count the cdev structure. When an EEH error occurs, the > shutdown path: > t3_adapter_error->offload_close->cxgb3i_remove_clients->cxgb3i_dev_close > will now block until such time as the cdev pointer has a use count of zero. > This coupled with the fact that lookups will now skip finding any registered > cdev's in cxgbi_device_find_by_[lldev|netdev] with the CXGBI_FLAG_ADAPTER_RESET > bit set ensures that on an EEH, the setting of lldev to NULL in offload_close > will only happen after there are no longer any active users of the data > structure. > > This has been tested by the reporter and shown to fix the reproted oops > > Change notes: > > v2) Resent because of some build warnings with the first version on request of > JBottomley > > v3) Modified to use kref refcounting api. It doesn't do much for this code, > since the purpose of refcounting is primarily to serialize the clearing of a > pointer in another data structure in a different module, but I think it does > look a bit cleaner. I've built this code, but not had the chance to test it > yet, given that I'm about to leave on vacation. I wanted to get it posted for > review before I left. It should work fine, since its effectively a replacement > of the raw atomic with struct krefs. Karen, if you could run this on some > cxgb3i equipment to make sure I didn't inadvertently do anything stupid, I'd > aprpeciate it. Thanks! With any of these updates, doesn't cxgb4i also need updating? > Signed-off-by: Neil Horman <nhorman@xxxxxxxxxxxxx> > CC: Divy Le Ray <divy@xxxxxxxxxxx> > CC: Steve Wise <swise@xxxxxxxxxxx> > CC: Karen Xie <kxie@xxxxxxxxxxx> > CC: Mike Christie <michaelc@xxxxxxxxxxx> > CC: "James E.J. Bottomley" <JBottomley@xxxxxxxxxxxxx> > CC: stable-commits@xxxxxxxxxxxxxxx > --- > drivers/scsi/cxgbi/cxgb3i/cxgb3i.c | 8 +++++++- > drivers/scsi/cxgbi/libcxgbi.c | 21 +++++++++++++++++---- > drivers/scsi/cxgbi/libcxgbi.h | 5 +++++ > 3 files changed, 29 insertions(+), 5 deletions(-) > > diff --git a/drivers/scsi/cxgbi/cxgb3i/cxgb3i.c b/drivers/scsi/cxgbi/cxgb3i/cxgb3i.c > index bd22041..855ee23 100644 > --- a/drivers/scsi/cxgbi/cxgb3i/cxgb3i.c > +++ b/drivers/scsi/cxgbi/cxgb3i/cxgb3i.c > @@ -1303,9 +1303,13 @@ static void cxgb3i_dev_close(struct t3cdev *t3dev) > > if (!cdev || cdev->flags & CXGBI_FLAG_ADAPTER_RESET) { > pr_info("0x%p close, f 0x%x.\n", cdev, cdev ? cdev->flags : 0); > + if (cdev) > + cdev_put(cdev); > + while (cdev && atomic_read(&cdev->use_count.refcount) > 1) > + msleep(1); OK, so this was what leapt out the last time as a big no-no. Firstly, do you really have to wait for destruction? If so, use a completion. > return; > } > - > + cdev_put(cdev); > cxgbi_device_unregister(cdev); > } > > @@ -1320,6 +1324,7 @@ static void cxgb3i_dev_open(struct t3cdev *t3dev) > int i, err; > > if (cdev) { > + cdev_put(cdev); > pr_info("0x%p, updating.\n", cdev); Other way around, otherwise it's use after free. > return; > } > @@ -1392,6 +1397,7 @@ static void cxgb3i_dev_event_handler(struct t3cdev *t3dev, u32 event, u32 port) > cdev->flags &= ~CXGBI_FLAG_ADAPTER_RESET; > break; > } > + cdev_put(cdev); > } > > /** > diff --git a/drivers/scsi/cxgbi/libcxgbi.c b/drivers/scsi/cxgbi/libcxgbi.c > index 77ac217..297efaa 100644 > --- a/drivers/scsi/cxgbi/libcxgbi.c > +++ b/drivers/scsi/cxgbi/libcxgbi.c > @@ -95,8 +95,10 @@ void cxgbi_device_portmap_cleanup(struct cxgbi_device *cdev) > } > EXPORT_SYMBOL_GPL(cxgbi_device_portmap_cleanup); > > -static inline void cxgbi_device_destroy(struct cxgbi_device *cdev) > +void cxgbi_device_destroy(struct kref *kr) > { > + struct cxgbi_device *cdev = container_of(kr, struct cxgbi_device, use_count); > + > log_debug(1 << CXGBI_DBG_DEV, > "cdev 0x%p, p# %u.\n", cdev, cdev->nports); > cxgbi_hbas_remove(cdev); > @@ -111,6 +113,7 @@ static inline void cxgbi_device_destroy(struct cxgbi_device *cdev) > cxgbi_free_big_mem(cdev->pmap.port_csk); > kfree(cdev); > } > +EXPORT_SYMBOL_GPL(cxgbi_device_destroy); There's no cross module use, is there? so no need of an export. > struct cxgbi_device *cxgbi_device_register(unsigned int extra, > unsigned int nports) > @@ -125,6 +128,7 @@ struct cxgbi_device *cxgbi_device_register(unsigned int extra, > pr_warn("nport %d, OOM.\n", nports); > return NULL; > } > + kref_init(&cdev->use_count); > cdev->ports = (struct net_device **)(cdev + 1); > cdev->hbas = (struct cxgbi_hba **)(((char*)cdev->ports) + nports * > sizeof(struct net_device *)); > @@ -151,7 +155,7 @@ void cxgbi_device_unregister(struct cxgbi_device *cdev) > mutex_lock(&cdev_mutex); > list_del(&cdev->list_head); > mutex_unlock(&cdev_mutex); > - cxgbi_device_destroy(cdev); > + cdev_put(cdev); > } > EXPORT_SYMBOL_GPL(cxgbi_device_unregister); > > @@ -167,7 +171,7 @@ void cxgbi_device_unregister_all(unsigned int flag) > cdev, cdev->nports, cdev->nports ? > cdev->ports[0]->name : ""); > list_del(&cdev->list_head); > - cxgbi_device_destroy(cdev); > + cdev_put(cdev); > } > } > mutex_unlock(&cdev_mutex); > @@ -181,6 +185,9 @@ struct cxgbi_device *cxgbi_device_find_by_lldev(void *lldev) > mutex_lock(&cdev_mutex); > list_for_each_entry_safe(cdev, tmp, &cdev_list, list_head) { > if (cdev->lldev == lldev) { > + if (cdev->flags & CXGBI_FLAG_ADAPTER_RESET) > + continue; > + cdev_hold(cdev); > mutex_unlock(&cdev_mutex); > return cdev; > } > @@ -210,7 +217,10 @@ static struct cxgbi_device *cxgbi_device_find_by_netdev(struct net_device *ndev, > list_for_each_entry_safe(cdev, tmp, &cdev_list, list_head) { > for (i = 0; i < cdev->nports; i++) { > if (ndev == cdev->ports[i]) { > + if (cdev->flags & CXGBI_FLAG_ADAPTER_RESET) > + continue; > cdev->hbas[i]->vdev = vdev; > + cdev_hold(cdev); > mutex_unlock(&cdev_mutex); > if (port) > *port = i; > @@ -469,7 +479,7 @@ static struct cxgbi_sock *cxgbi_check_route(struct sockaddr *dst_addr) > struct sockaddr_in *daddr = (struct sockaddr_in *)dst_addr; > struct dst_entry *dst; > struct net_device *ndev; > - struct cxgbi_device *cdev; > + struct cxgbi_device *cdev = NULL; > struct rtable *rt = NULL; > struct flowi4 fl4; > struct cxgbi_sock *csk = NULL; > @@ -542,6 +552,8 @@ rel_rt: > if (csk) > cxgbi_sock_closed(csk); > err_out: > + if (cdev) > + cdev_put(cdev); > return ERR_PTR(err); > } > > @@ -2491,6 +2503,7 @@ struct iscsi_endpoint *cxgbi_ep_connect(struct Scsi_Host *shost, > return ep; > > release_conn: > + cdev_put(csk->cdev); This isn't right. an embedded device reference should be released in the csk release function (cxgbi_sock_free). > cxgbi_sock_put(csk); > cxgbi_sock_closed(csk); > err_out: > diff --git a/drivers/scsi/cxgbi/libcxgbi.h b/drivers/scsi/cxgbi/libcxgbi.h > index 9267844..08788ac 100644 > --- a/drivers/scsi/cxgbi/libcxgbi.h > +++ b/drivers/scsi/cxgbi/libcxgbi.h > @@ -24,6 +24,7 @@ > #include <linux/scatterlist.h> > #include <linux/skbuff.h> > #include <linux/vmalloc.h> > +#include <linux/kref.h> > #include <scsi/scsi_device.h> > #include <scsi/libiscsi_tcp.h> > > @@ -514,6 +515,7 @@ struct cxgbi_device { > unsigned int flags; > struct net_device **ports; > void *lldev; > + struct kref use_count; > struct cxgbi_hba **hbas; > const unsigned short *mtus; > unsigned char nmtus; > @@ -557,6 +559,8 @@ struct cxgbi_device { > void *dd_data; > }; > #define cxgbi_cdev_priv(cdev) ((cdev)->dd_data) > +#define cdev_hold(x) do {kref_get(&(x)->use_count);} while(0) hold is non standard, use put instead. > +#define cdev_put(x) do {kref_put(&(x)->use_count, cxgbi_device_destroy);} while(0) > > struct cxgbi_conn { > struct cxgbi_endpoint *cep; > @@ -691,6 +695,7 @@ static inline __be32 cxgbi_get_iscsi_ipv4(struct cxgbi_hba *chba) > struct cxgbi_device *cxgbi_device_register(unsigned int, unsigned int); > void cxgbi_device_unregister(struct cxgbi_device *); > void cxgbi_device_unregister_all(unsigned int flag); > +void cxgbi_device_destroy(struct kref *kr); > struct cxgbi_device *cxgbi_device_find_by_lldev(void *); > int cxgbi_hbas_add(struct cxgbi_device *, unsigned int, unsigned int, > struct scsi_host_template *, ��.n��������+%������w��{.n�����{������ܨ}���Ơz�j:+v�����w����ޙ��&�)ߡ�a����z�ޗ���ݢj��w�f