Fri, Apr 27, 2018 at 07:06:58PM CEST, sridhar.samudrala@xxxxxxxxx wrote: >This provides a generic interface for paravirtual drivers to listen >for netdev register/unregister/link change events from pci ethernet >devices with the same MAC and takeover their datapath. The notifier and >event handling code is based on the existing netvsc implementation. > >It exposes 2 sets of interfaces to the paravirtual drivers. >1. For paravirtual drivers like virtio_net that use 3 netdev model, the > the failover module provides interfaces to create/destroy additional > master netdev and all the slave events are managed internally. > net_failover_create() > net_failover_destroy() > A failover netdev is created that acts a master device and controls 2 > slave devices. The original virtio_net netdev is registered as 'standby' > netdev and a passthru/vf device with the same MAC gets registered as > 'primary' netdev. Both 'standby' and 'primary' netdevs are associated > with the same 'pci' device. The user accesses the network interface via > 'failover' netdev. The 'failover' netdev chooses 'primary' netdev as > default for transmits when it is available with link up and running. >2. For existing netvsc driver that uses 2 netdev model, no master netdev > is created. The paravirtual driver registers each instance of netvsc > as a 'failover' netdev along with a set of ops to manage the slave > events. There is no 'standby' netdev in this model. A passthru/vf device > with the same MAC gets registered as 'primary' netdev. > net_failover_register() > net_failover_unregister() > First of all, I like this v9 very much. Nice progress! Couple of notes inlined. >Signed-off-by: Sridhar Samudrala <sridhar.samudrala@xxxxxxxxx> >--- > include/linux/netdevice.h | 16 + > include/net/net_failover.h | 62 ++++ > net/Kconfig | 10 + > net/core/Makefile | 1 + > net/core/net_failover.c | 892 +++++++++++++++++++++++++++++++++++++++++++++ > 5 files changed, 981 insertions(+) > create mode 100644 include/net/net_failover.h > create mode 100644 net/core/net_failover.c [...] >+static int net_failover_slave_register(struct net_device *slave_dev) >+{ >+ struct net_failover_info *nfo_info; >+ struct net_failover_ops *nfo_ops; >+ struct net_device *failover_dev; >+ bool slave_is_standby; >+ u32 orig_mtu; >+ int err; >+ >+ ASSERT_RTNL(); >+ >+ failover_dev = net_failover_get_bymac(slave_dev->perm_addr, &nfo_ops); >+ if (!failover_dev) >+ goto done; >+ >+ if (failover_dev->type != slave_dev->type) >+ goto done; >+ >+ if (nfo_ops && nfo_ops->slave_register) >+ return nfo_ops->slave_register(slave_dev, failover_dev); >+ >+ nfo_info = netdev_priv(failover_dev); >+ slave_is_standby = (slave_dev->dev.parent == failover_dev->dev.parent); No parentheses needed. >+ if (slave_is_standby ? rtnl_dereference(nfo_info->standby_dev) : >+ rtnl_dereference(nfo_info->primary_dev)) { >+ netdev_err(failover_dev, "%s attempting to register as slave dev when %s already present\n", >+ slave_dev->name, >+ slave_is_standby ? "standby" : "primary"); >+ goto done; >+ } >+ >+ /* We want to allow only a direct attached VF device as a primary >+ * netdev. As there is no easy way to check for a VF device, restrict >+ * this to a pci device. >+ */ >+ if (!slave_is_standby && (!slave_dev->dev.parent || >+ !dev_is_pci(slave_dev->dev.parent))) Yeah, this is good for now. >+ goto done; >+ >+ if (failover_dev->features & NETIF_F_VLAN_CHALLENGED && >+ vlan_uses_dev(failover_dev)) { >+ netdev_err(failover_dev, "Device %s is VLAN challenged and failover device has VLAN set up\n", >+ failover_dev->name); >+ goto done; >+ } >+ >+ /* Align MTU of slave with failover dev */ >+ orig_mtu = slave_dev->mtu; >+ err = dev_set_mtu(slave_dev, failover_dev->mtu); >+ if (err) { >+ netdev_err(failover_dev, "unable to change mtu of %s to %u register failed\n", >+ slave_dev->name, failover_dev->mtu); >+ goto done; >+ } >+ >+ dev_hold(slave_dev); >+ >+ if (netif_running(failover_dev)) { >+ err = dev_open(slave_dev); >+ if (err && (err != -EBUSY)) { >+ netdev_err(failover_dev, "Opening slave %s failed err:%d\n", >+ slave_dev->name, err); >+ goto err_dev_open; >+ } >+ } >+ >+ netif_addr_lock_bh(failover_dev); >+ dev_uc_sync_multiple(slave_dev, failover_dev); >+ dev_uc_sync_multiple(slave_dev, failover_dev); >+ netif_addr_unlock_bh(failover_dev); >+ >+ err = vlan_vids_add_by_dev(slave_dev, failover_dev); >+ if (err) { >+ netdev_err(failover_dev, "Failed to add vlan ids to device %s err:%d\n", >+ slave_dev->name, err); >+ goto err_vlan_add; >+ } >+ >+ err = netdev_rx_handler_register(slave_dev, net_failover_handle_frame, >+ failover_dev); >+ if (err) { >+ netdev_err(slave_dev, "can not register failover rx handler (err = %d)\n", >+ err); >+ goto err_handler_register; >+ } >+ >+ err = netdev_upper_dev_link(slave_dev, failover_dev, NULL); Please use netdev_master_upper_dev_link(). >+ if (err) { >+ netdev_err(slave_dev, "can not set failover device %s (err = %d)\n", >+ failover_dev->name, err); >+ goto err_upper_link; >+ } >+ >+ slave_dev->priv_flags |= IFF_FAILOVER_SLAVE; >+ >+ if (slave_is_standby) { >+ rcu_assign_pointer(nfo_info->standby_dev, slave_dev); >+ dev_get_stats(nfo_info->standby_dev, &nfo_info->standby_stats); >+ } else { >+ rcu_assign_pointer(nfo_info->primary_dev, slave_dev); >+ dev_get_stats(nfo_info->primary_dev, &nfo_info->primary_stats); >+ failover_dev->min_mtu = slave_dev->min_mtu; >+ failover_dev->max_mtu = slave_dev->max_mtu; >+ } >+ >+ net_failover_compute_features(failover_dev); >+ >+ call_netdevice_notifiers(NETDEV_JOIN, slave_dev); >+ >+ netdev_info(failover_dev, "failover %s slave:%s registered\n", >+ slave_is_standby ? "standby" : "primary", slave_dev->name); I wonder if noise like this is needed in dmesg... >+ >+ goto done; >+ >+err_upper_link: >+ netdev_rx_handler_unregister(slave_dev); >+err_handler_register: >+ vlan_vids_del_by_dev(slave_dev, failover_dev); >+err_vlan_add: >+ dev_uc_unsync(slave_dev, failover_dev); >+ dev_mc_unsync(slave_dev, failover_dev); >+ dev_close(slave_dev); >+err_dev_open: >+ dev_put(slave_dev); >+ dev_set_mtu(slave_dev, orig_mtu); >+done: >+ return NOTIFY_DONE; >+} >+ >+int net_failover_slave_unregister(struct net_device *slave_dev) >+{ >+ struct net_device *standby_dev, *primary_dev; >+ struct net_failover_info *nfo_info; >+ struct net_failover_ops *nfo_ops; >+ struct net_device *failover_dev; >+ bool slave_is_standby; >+ >+ if (!netif_is_failover_slave(slave_dev)) >+ goto done; >+ >+ ASSERT_RTNL(); >+ >+ failover_dev = net_failover_get_bymac(slave_dev->perm_addr, &nfo_ops); >+ if (!failover_dev) >+ goto done; >+ >+ if (nfo_ops && nfo_ops->slave_unregister) >+ return nfo_ops->slave_unregister(slave_dev, failover_dev); >+ >+ nfo_info = netdev_priv(failover_dev); >+ primary_dev = rtnl_dereference(nfo_info->primary_dev); >+ standby_dev = rtnl_dereference(nfo_info->standby_dev); >+ >+ if (slave_dev != primary_dev && slave_dev != standby_dev) >+ goto done; >+ >+ slave_is_standby = (slave_dev->dev.parent == failover_dev->dev.parent); >+ >+ netdev_rx_handler_unregister(slave_dev); >+ netdev_upper_dev_unlink(slave_dev, failover_dev); >+ vlan_vids_del_by_dev(slave_dev, failover_dev); >+ dev_uc_unsync(slave_dev, failover_dev); >+ dev_mc_unsync(slave_dev, failover_dev); >+ dev_close(slave_dev); >+ slave_dev->priv_flags &= ~IFF_FAILOVER_SLAVE; >+ >+ nfo_info = netdev_priv(failover_dev); >+ net_failover_get_stats(failover_dev, &nfo_info->failover_stats); >+ >+ if (slave_is_standby) { >+ RCU_INIT_POINTER(nfo_info->standby_dev, NULL); >+ } else { >+ RCU_INIT_POINTER(nfo_info->primary_dev, NULL); >+ if (standby_dev) { >+ failover_dev->min_mtu = standby_dev->min_mtu; >+ failover_dev->max_mtu = standby_dev->max_mtu; >+ } >+ } >+ >+ dev_put(slave_dev); >+ >+ net_failover_compute_features(failover_dev); >+ >+ netdev_info(failover_dev, "failover %s slave:%s unregistered\n", >+ slave_is_standby ? "standby" : "primary", slave_dev->name); >+ >+done: >+ return NOTIFY_DONE; >+} >+EXPORT_SYMBOL_GPL(net_failover_slave_unregister); >+ >+static int net_failover_slave_link_change(struct net_device *slave_dev) >+{ >+ struct net_device *failover_dev, *primary_dev, *standby_dev; >+ struct net_failover_info *nfo_info; >+ struct net_failover_ops *nfo_ops; >+ >+ if (!netif_is_failover_slave(slave_dev)) >+ goto done; >+ >+ ASSERT_RTNL(); >+ >+ failover_dev = net_failover_get_bymac(slave_dev->perm_addr, &nfo_ops); >+ if (!failover_dev) >+ goto done; >+ >+ if (nfo_ops && nfo_ops->slave_link_change) >+ return nfo_ops->slave_link_change(slave_dev, failover_dev); >+ >+ if (!netif_running(failover_dev)) >+ return 0; >+ >+ nfo_info = netdev_priv(failover_dev); >+ >+ primary_dev = rtnl_dereference(nfo_info->primary_dev); >+ standby_dev = rtnl_dereference(nfo_info->standby_dev); >+ >+ if (slave_dev != primary_dev && slave_dev != standby_dev) >+ goto done; >+ >+ if ((primary_dev && net_failover_xmit_ready(primary_dev)) || >+ (standby_dev && net_failover_xmit_ready(standby_dev))) { >+ netif_carrier_on(failover_dev); >+ netif_tx_wake_all_queues(failover_dev); >+ } else { >+ net_failover_get_stats(failover_dev, &nfo_info->failover_stats); >+ netif_carrier_off(failover_dev); >+ netif_tx_stop_all_queues(failover_dev); >+ } >+ >+done: >+ return NOTIFY_DONE; >+} >+ >+static int >+net_failover_event(struct notifier_block *this, unsigned long event, void *ptr) >+{ >+ struct net_device *event_dev = netdev_notifier_info_to_dev(ptr); >+ >+ /* Skip parent events */ >+ if (netif_is_failover(event_dev)) >+ return NOTIFY_DONE; >+ >+ switch (event) { >+ case NETDEV_REGISTER: >+ return net_failover_slave_register(event_dev); >+ case NETDEV_UNREGISTER: >+ return net_failover_slave_unregister(event_dev); >+ case NETDEV_UP: >+ case NETDEV_DOWN: >+ case NETDEV_CHANGE: >+ return net_failover_slave_link_change(event_dev); >+ default: >+ return NOTIFY_DONE; >+ } >+} >+ >+static struct notifier_block net_failover_notifier = { >+ .notifier_call = net_failover_event, >+}; >+ >+static void nfo_register_existing_slave(struct net_device *failover_dev) Please maintain the same function prefixes withing the whole code. Also, to be consistent with the rest of the code, have "_register" as a suffix. >+{ >+ struct net *net = dev_net(failover_dev); >+ struct net_device *dev; >+ >+ rtnl_lock(); >+ for_each_netdev(net, dev) { >+ if (netif_is_failover(dev)) >+ continue; >+ if (ether_addr_equal(failover_dev->perm_addr, dev->perm_addr)) >+ net_failover_slave_register(dev); >+ } >+ rtnl_unlock(); >+} >+ For every exported function, please provide documentation in format: /** * net_failover_register - Register net failover device * * @dev: netdevice the failover is registerd for * @ops: failover ops * * Describe what the function does, what are expected inputs and * outputs, etc. Don't hesistate to be verbose. Mention the 2/3netdev * model here. Then you don't need the comment in the header file * for there functions. */ >+int net_failover_register(struct net_device *dev, struct net_failover_ops *ops, >+ struct net_failover **pfailover) Just return "struct net_failover *" instead of arg ** and use ERR_PTR macro to propagate an error. >+{ >+ struct net_failover *failover; >+ >+ failover = kzalloc(sizeof(*failover), GFP_KERNEL); >+ if (!failover) >+ return -ENOMEM; >+ >+ rcu_assign_pointer(failover->ops, ops); >+ dev_hold(dev); >+ dev->priv_flags |= IFF_FAILOVER; >+ rcu_assign_pointer(failover->failover_dev, dev); >+ >+ spin_lock(&net_failover_lock); >+ list_add_tail(&failover->list, &net_failover_list); >+ spin_unlock(&net_failover_lock); >+ >+ netdev_info(dev, "failover master:%s registered\n", dev->name); >+ >+ nfo_register_existing_slave(dev); >+ >+ *pfailover = failover; >+ >+ return 0; >+} >+EXPORT_SYMBOL_GPL(net_failover_register); >+ >+void net_failover_unregister(struct net_failover *failover) >+{ >+ struct net_device *failover_dev; >+ >+ failover_dev = rcu_dereference(failover->failover_dev); >+ >+ netdev_info(failover_dev, "failover master:%s unregistered\n", >+ failover_dev->name); >+ >+ failover_dev->priv_flags &= ~IFF_FAILOVER; >+ dev_put(failover_dev); >+ >+ spin_lock(&net_failover_lock); >+ list_del(&failover->list); >+ spin_unlock(&net_failover_lock); >+ >+ kfree(failover); >+} >+EXPORT_SYMBOL_GPL(net_failover_unregister); >+ >+int net_failover_create(struct net_device *standby_dev, >+ struct net_failover **pfailover) Same here, just return "struct net_failover *" >+{ >+ struct device *dev = standby_dev->dev.parent; >+ struct net_device *failover_dev; >+ int err; >+ >+ /* Alloc at least 2 queues, for now we are going with 16 assuming >+ * that VF devices being enslaved won't have too many queues. >+ */ >+ failover_dev = alloc_etherdev_mq(sizeof(struct net_failover_info), 16); >+ if (!failover_dev) { >+ dev_err(dev, "Unable to allocate failover_netdev!\n"); >+ return -ENOMEM; >+ } >+ >+ dev_net_set(failover_dev, dev_net(standby_dev)); >+ SET_NETDEV_DEV(failover_dev, dev); >+ >+ failover_dev->netdev_ops = &failover_dev_ops; >+ failover_dev->ethtool_ops = &failover_ethtool_ops; >+ >+ /* Initialize the device options */ >+ failover_dev->priv_flags |= IFF_UNICAST_FLT | IFF_NO_QUEUE; >+ failover_dev->priv_flags &= ~(IFF_XMIT_DST_RELEASE | >+ IFF_TX_SKB_SHARING); >+ >+ /* don't acquire failover netdev's netif_tx_lock when transmitting */ >+ failover_dev->features |= NETIF_F_LLTX; >+ >+ /* Don't allow failover devices to change network namespaces. */ >+ failover_dev->features |= NETIF_F_NETNS_LOCAL; >+ >+ failover_dev->hw_features = FAILOVER_VLAN_FEATURES | >+ NETIF_F_HW_VLAN_CTAG_TX | >+ NETIF_F_HW_VLAN_CTAG_RX | >+ NETIF_F_HW_VLAN_CTAG_FILTER; >+ >+ failover_dev->hw_features |= NETIF_F_GSO_ENCAP_ALL; >+ failover_dev->features |= failover_dev->hw_features; >+ >+ memcpy(failover_dev->dev_addr, standby_dev->dev_addr, >+ failover_dev->addr_len); >+ >+ failover_dev->min_mtu = standby_dev->min_mtu; >+ failover_dev->max_mtu = standby_dev->max_mtu; >+ >+ err = register_netdev(failover_dev); >+ if (err < 0) { if (err) is enough >+ dev_err(dev, "Unable to register failover_dev!\n"); >+ goto err_register_netdev; >+ } >+ >+ netif_carrier_off(failover_dev); >+ >+ err = net_failover_register(failover_dev, NULL, pfailover); >+ if (err < 0) if (err) is enough >+ goto err_failover_register; >+ >+ return 0; >+ >+err_failover_register: >+ unregister_netdev(failover_dev); >+err_register_netdev: >+ free_netdev(failover_dev); >+ >+ return err; >+} >+EXPORT_SYMBOL_GPL(net_failover_create); [...] _______________________________________________ Virtualization mailing list Virtualization@xxxxxxxxxxxxxxxxxxxxxxxxxx https://lists.linuxfoundation.org/mailman/listinfo/virtualization