> From: Haiyang Zhang <haiyangz@xxxxxxxxxxxxx> > Sent: Saturday, October 30, 2021 8:55 AM > > > > @@ -1844,44 +1845,70 @@ int mana_probe(struct gdma_dev *gd) > > if (err) > > return err; > > > > - ac = kzalloc(sizeof(*ac), GFP_KERNEL); > > - if (!ac) > > - return -ENOMEM; > > + if (!resuming) { > > + ac = kzalloc(sizeof(*ac), GFP_KERNEL); > > + if (!ac) > > + return -ENOMEM; > > > > - ac->gdma_dev = gd; > > - ac->num_ports = 1; > > - gd->driver_data = ac; > > + ac->gdma_dev = gd; > > + gd->driver_data = ac; > > + } > > > > err = mana_create_eq(ac); > > if (err) > > goto out; > > > > err = mana_query_device_cfg(ac, MANA_MAJOR_VERSION, > > MANA_MINOR_VERSION, > > - MANA_MICRO_VERSION, &ac->num_ports); > > + MANA_MICRO_VERSION, &num_ports); > > if (err) > > goto out; > > > > + if (!resuming) { > > + ac->num_ports = num_ports; > > + } else { > > + if (ac->num_ports != num_ports) { > > + dev_err(dev, "The number of vPorts changed: %d->%d\n", > > + ac->num_ports, num_ports); > > + err = -EPROTO; > > + goto out; > > + } > > + } > > + > > + if (ac->num_ports == 0) > > + dev_err(dev, "Failed to detect any vPort\n"); > > + > > if (ac->num_ports > MAX_PORTS_IN_MANA_DEV) > > ac->num_ports = MAX_PORTS_IN_MANA_DEV; > > > > - for (i = 0; i < ac->num_ports; i++) { > > - err = mana_probe_port(ac, i, &ac->ports[i]); > > - if (err) > > - break; > > + if (!resuming) { > > + for (i = 0; i < ac->num_ports; i++) { > > + err = mana_probe_port(ac, i, &ac->ports[i]); > > + if (err) > > + break; > > + } > > + } else { > > + for (i = 0; i < ac->num_ports; i++) { > > + rtnl_lock(); > > + err = mana_attach(ac->ports[i]); > > + rtnl_unlock(); > > + if (err) > > + break; > > + } > > } > > out: > > if (err) > > - mana_remove(gd); > > + mana_remove(gd, false); > > The "goto out" can happen in both resuming true/false cases, > should the error handling path deal with the two cases > differently? Let me make the below change in v2. Please let me know if any further change is needed: --- a/drivers/net/ethernet/microsoft/mana/mana_en.c +++ b/drivers/net/ethernet/microsoft/mana/mana_en.c @@ -1850,8 +1850,10 @@ int mana_probe(struct gdma_dev *gd, bool resuming) if (!resuming) { ac = kzalloc(sizeof(*ac), GFP_KERNEL); - if (!ac) - return -ENOMEM; + if (!ac) { + err = -ENOMEM; + goto out; + } ac->gdma_dev = gd; gd->driver_data = ac; @@ -1872,8 +1874,8 @@ int mana_probe(struct gdma_dev *gd, bool resuming) if (ac->num_ports != num_ports) { dev_err(dev, "The number of vPorts changed: %d->%d\n", ac->num_ports, num_ports); - err = -EPROTO; - goto out; + /* It's unsafe to proceed. */ + return -EPROTO; } } @@ -1886,22 +1888,36 @@ int mana_probe(struct gdma_dev *gd, bool resuming) if (!resuming) { for (i = 0; i < ac->num_ports; i++) { err = mana_probe_port(ac, i, &ac->ports[i]); - if (err) - break; + if (err) { + dev_err(dev, "Failed to probe vPort %u: %d\n", + i, err); + goto out; + } } } else { for (i = 0; i < ac->num_ports; i++) { rtnl_lock(); err = mana_attach(ac->ports[i]); rtnl_unlock(); - if (err) - break; + + if (err) { + netdev_err(ac->ports[i], + "Failed to resume vPort %u: %d\n", + i, err); + return err; + } } } + + return 0; out: - if (err) - mana_remove(gd, false); + /* In the resuming path, it's safer to leave the device in the failed + * state than try to invoke mana_detach(). + */ + if (resuming) + return err; + mana_remove(gd, false); return err; } @@ -1919,7 +1935,7 @@ void mana_remove(struct gdma_dev *gd, bool suspending) if (!ndev) { if (i == 0) dev_err(dev, "No net device to remove\n"); - goto out; + break; } /* All cleanup actions should stay after rtnl_lock(), otherwise For your easy reviewing, the new code of the function in v2 will be: int mana_probe(struct gdma_dev *gd, bool resuming) { struct gdma_context *gc = gd->gdma_context; struct mana_context *ac = gd->driver_data; struct device *dev = gc->dev; u16 num_ports = 0; int err; int i; dev_info(dev, "Microsoft Azure Network Adapter protocol version: %d.%d.%d\n", MANA_MAJOR_VERSION, MANA_MINOR_VERSION, MANA_MICRO_VERSION); err = mana_gd_register_device(gd); if (err) return err; if (!resuming) { ac = kzalloc(sizeof(*ac), GFP_KERNEL); if (!ac) { err = -ENOMEM; goto out; } ac->gdma_dev = gd; gd->driver_data = ac; } err = mana_create_eq(ac); if (err) goto out; err = mana_query_device_cfg(ac, MANA_MAJOR_VERSION, MANA_MINOR_VERSION, MANA_MICRO_VERSION, &num_ports); if (err) goto out; if (!resuming) { ac->num_ports = num_ports; } else { if (ac->num_ports != num_ports) { dev_err(dev, "The number of vPorts changed: %d->%d\n", ac->num_ports, num_ports); /* It's unsafe to proceed. */ return -EPROTO; } } if (ac->num_ports == 0) dev_err(dev, "Failed to detect any vPort\n"); if (ac->num_ports > MAX_PORTS_IN_MANA_DEV) ac->num_ports = MAX_PORTS_IN_MANA_DEV; if (!resuming) { for (i = 0; i < ac->num_ports; i++) { err = mana_probe_port(ac, i, &ac->ports[i]); if (err) { dev_err(dev, "Failed to probe vPort %u: %d\n", i, err); goto out; } } } else { for (i = 0; i < ac->num_ports; i++) { rtnl_lock(); err = mana_attach(ac->ports[i]); rtnl_unlock(); if (err) { netdev_err(ac->ports[i], "Failed to resume vPort %u: %d\n", i, err); return err; } } } return 0; out: /* In the resuming path, it's safer to leave the device in the failed * state than try to invoke mana_detach(). */ if (resuming) return err; mana_remove(gd, false); return err; }