>>>> diff --git a/drivers/infiniband/core/roce_gid_mgmt.c b/drivers/infiniband/core/roce_gid_mgmt.c >>>> index 68197e576433..063dbe72b7c2 100644 >>>> --- a/drivers/infiniband/core/roce_gid_mgmt.c >>>> +++ b/drivers/infiniband/core/roce_gid_mgmt.c >>>> @@ -621,6 +621,7 @@ static void netdevice_event_work_handler(struct work_struct *_work) >>>> { >>>> struct netdev_event_work *work = >>>> container_of(_work, struct netdev_event_work, work); >>>> + struct net_device *real_dev; >>>> unsigned int i; >>>> >>>> for (i = 0; i < ARRAY_SIZE(work->cmds) && work->cmds[i].cb; i++) { >>>> @@ -628,6 +629,12 @@ static void netdevice_event_work_handler(struct work_struct *_work) >>>> work->cmds[i].filter_ndev, >>>> work->cmds[i].cb, >>>> work->cmds[i].ndev); >>>> + real_dev = rdma_vlan_dev_real_dev(work->cmds[i].ndev); >>>> + if (real_dev) >>>> + dev_put(real_dev); >>>> + real_dev = rdma_vlan_dev_real_dev(work->cmds[i].filter_ndev); >>>> + if (real_dev) >>>> + dev_put(real_dev); >>>> dev_put(work->cmds[i].ndev); >>>> dev_put(work->cmds[i].filter_ndev); >>>> } >>>> @@ -638,9 +645,10 @@ static void netdevice_event_work_handler(struct work_struct *_work) >>>> static int netdevice_queue_work(struct netdev_event_work_cmd *cmds, >>>> struct net_device *ndev) >>>> { >>>> - unsigned int i; >>>> struct netdev_event_work *ndev_work = >>>> kmalloc(sizeof(*ndev_work), GFP_KERNEL); >>>> + struct net_device *real_dev; >>>> + unsigned int i; >>>> >>>> if (!ndev_work) >>>> return NOTIFY_DONE; >>>> @@ -653,6 +661,12 @@ static int netdevice_queue_work(struct netdev_event_work_cmd *cmds, >>>> ndev_work->cmds[i].filter_ndev = ndev; >>>> dev_hold(ndev_work->cmds[i].ndev); >>>> dev_hold(ndev_work->cmds[i].filter_ndev); >>>> + real_dev = rdma_vlan_dev_real_dev(ndev_work->cmds[i].ndev); >>>> + if (real_dev) >>>> + dev_hold(real_dev); >>>> + real_dev = rdma_vlan_dev_real_dev(ndev_work->cmds[i].filter_ndev); >>>> + if (real_dev) >>>> + dev_hold(real_dev); >>>> } >>>> INIT_WORK(&ndev_work->work, netdevice_event_work_handler); >>> >>> Probably, this is the right change, but I don't know well enough that >>> part of code. What prevents from "real_dev" to disappear right after >>> your call to rdma_vlan_dev_real_dev()? >>> >> >> It is known that free the net_device until its dev_refcnt is one. The >> detail realization see netdev_run_todo().The real_dev's dev_refcnt of >> a vlan net_device will reach one after unregister_netdevice(&real_dev) >> and unregister_vlan_dev(&vlan_ndev, ...) but the dev_refcnt of the vlan >> net_device is bigger than one because netdevice_queue_work() will hold >> the vlan net_device. So my solution is hold the real_dev too in >> netdevice_queue_work(). > > dev_hold(ndev_work->cmds[i].filter_ndev); > + real_dev = rdma_vlan_dev_real_dev(ndev_work->cmds[i].ndev); > + if (real_dev) > <------------ real_dev is released here. > + dev_hold(real_dev); At first, I thought the real_dev's dev_refcnt is bigger than one before NETDEV_UNREGISTER notifier event of the vlan net_device because it calls dev_put(real_dev) after calling unregister_netdevice_queue(dev, head). I thought unregister_netdevice_queue() would issue NETDEV_UNREGISTER notifier event of the vlan net_device, I can hold the real_dev in NETDEV_UNREGISTER notifier event handler netdevice_queue_work(). But I read unregister_vlan_dev() again, found unregister_netdevice_queue() in unregister_vlan_dev() just move the vlan net_device to a list to unregister later. So it is possible the real_dev has been freed when we access in netdevice_queue_work() although the probability is very small. So the modification need to improve. For example set vlan->real_dev = NULL after dev_put(real_dev) in unregister_vlan_dev() proposed by Jason Gunthorpe. Do you have any other good ideas? Thank you!