Re: [PATCH net-next v9 2/4] net: Introduce generic failover module

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Fri, Apr 27, 2018 at 07:06:58PM CEST, sridhar.samudrala@xxxxxxxxx wrote:
>This provides a generic interface for paravirtual drivers to listen
>for netdev register/unregister/link change events from pci ethernet
>devices with the same MAC and takeover their datapath. The notifier and
>event handling code is based on the existing netvsc implementation.
>
>It exposes 2 sets of interfaces to the paravirtual drivers.
>1. For paravirtual drivers like virtio_net that use 3 netdev model, the
>   the failover module provides interfaces to create/destroy additional
>   master netdev and all the slave events are managed internally.
>        net_failover_create()
>        net_failover_destroy()
>   A failover netdev is created that acts a master device and controls 2
>   slave devices. The original virtio_net netdev is registered as 'standby'
>   netdev and a passthru/vf device with the same MAC gets registered as
>   'primary' netdev. Both 'standby' and 'primary' netdevs are associated
>   with the same 'pci' device.  The user accesses the network interface via
>   'failover' netdev. The 'failover' netdev chooses 'primary' netdev as
>   default for transmits when it is available with link up and running.
>2. For existing netvsc driver that uses 2 netdev model, no master netdev
>   is created. The paravirtual driver registers each instance of netvsc
>   as a 'failover' netdev  along with a set of ops to manage the slave
>   events. There is no 'standby' netdev in this model. A passthru/vf device
>   with the same MAC gets registered as 'primary' netdev.
>        net_failover_register()
>        net_failover_unregister()
>

First of all, I like this v9 very much. Nice progress!
Couple of notes inlined.


>Signed-off-by: Sridhar Samudrala <sridhar.samudrala@xxxxxxxxx>
>---
> include/linux/netdevice.h  |  16 +
> include/net/net_failover.h |  62 ++++
> net/Kconfig                |  10 +
> net/core/Makefile          |   1 +
> net/core/net_failover.c    | 892 +++++++++++++++++++++++++++++++++++++++++++++
> 5 files changed, 981 insertions(+)
> create mode 100644 include/net/net_failover.h
> create mode 100644 net/core/net_failover.c

[...]


>+static int net_failover_slave_register(struct net_device *slave_dev)
>+{
>+	struct net_failover_info *nfo_info;
>+	struct net_failover_ops *nfo_ops;
>+	struct net_device *failover_dev;
>+	bool slave_is_standby;
>+	u32 orig_mtu;
>+	int err;
>+
>+	ASSERT_RTNL();
>+
>+	failover_dev = net_failover_get_bymac(slave_dev->perm_addr, &nfo_ops);
>+	if (!failover_dev)
>+		goto done;
>+
>+	if (failover_dev->type != slave_dev->type)
>+		goto done;
>+
>+	if (nfo_ops && nfo_ops->slave_register)
>+		return nfo_ops->slave_register(slave_dev, failover_dev);
>+
>+	nfo_info = netdev_priv(failover_dev);
>+	slave_is_standby = (slave_dev->dev.parent == failover_dev->dev.parent);

No parentheses needed.


>+	if (slave_is_standby ? rtnl_dereference(nfo_info->standby_dev) :
>+			rtnl_dereference(nfo_info->primary_dev)) {
>+		netdev_err(failover_dev, "%s attempting to register as slave dev when %s already present\n",
>+			   slave_dev->name,
>+			   slave_is_standby ? "standby" : "primary");
>+		goto done;
>+	}
>+
>+	/* We want to allow only a direct attached VF device as a primary
>+	 * netdev. As there is no easy way to check for a VF device, restrict
>+	 * this to a pci device.
>+	 */
>+	if (!slave_is_standby && (!slave_dev->dev.parent ||
>+				  !dev_is_pci(slave_dev->dev.parent)))

Yeah, this is good for now.


>+		goto done;
>+
>+	if (failover_dev->features & NETIF_F_VLAN_CHALLENGED &&
>+	    vlan_uses_dev(failover_dev)) {
>+		netdev_err(failover_dev, "Device %s is VLAN challenged and failover device has VLAN set up\n",
>+			   failover_dev->name);
>+		goto done;
>+	}
>+
>+	/* Align MTU of slave with failover dev */
>+	orig_mtu = slave_dev->mtu;
>+	err = dev_set_mtu(slave_dev, failover_dev->mtu);
>+	if (err) {
>+		netdev_err(failover_dev, "unable to change mtu of %s to %u register failed\n",
>+			   slave_dev->name, failover_dev->mtu);
>+		goto done;
>+	}
>+
>+	dev_hold(slave_dev);
>+
>+	if (netif_running(failover_dev)) {
>+		err = dev_open(slave_dev);
>+		if (err && (err != -EBUSY)) {
>+			netdev_err(failover_dev, "Opening slave %s failed err:%d\n",
>+				   slave_dev->name, err);
>+			goto err_dev_open;
>+		}
>+	}
>+
>+	netif_addr_lock_bh(failover_dev);
>+	dev_uc_sync_multiple(slave_dev, failover_dev);
>+	dev_uc_sync_multiple(slave_dev, failover_dev);
>+	netif_addr_unlock_bh(failover_dev);
>+
>+	err = vlan_vids_add_by_dev(slave_dev, failover_dev);
>+	if (err) {
>+		netdev_err(failover_dev, "Failed to add vlan ids to device %s err:%d\n",
>+			   slave_dev->name, err);
>+		goto err_vlan_add;
>+	}
>+
>+	err = netdev_rx_handler_register(slave_dev, net_failover_handle_frame,
>+					 failover_dev);
>+	if (err) {
>+		netdev_err(slave_dev, "can not register failover rx handler (err = %d)\n",
>+			   err);
>+		goto err_handler_register;
>+	}
>+
>+	err = netdev_upper_dev_link(slave_dev, failover_dev, NULL);

Please use netdev_master_upper_dev_link().



>+	if (err) {
>+		netdev_err(slave_dev, "can not set failover device %s (err = %d)\n",
>+			   failover_dev->name, err);
>+		goto err_upper_link;
>+	}
>+
>+	slave_dev->priv_flags |= IFF_FAILOVER_SLAVE;
>+
>+	if (slave_is_standby) {
>+		rcu_assign_pointer(nfo_info->standby_dev, slave_dev);
>+		dev_get_stats(nfo_info->standby_dev, &nfo_info->standby_stats);
>+	} else {
>+		rcu_assign_pointer(nfo_info->primary_dev, slave_dev);
>+		dev_get_stats(nfo_info->primary_dev, &nfo_info->primary_stats);
>+		failover_dev->min_mtu = slave_dev->min_mtu;
>+		failover_dev->max_mtu = slave_dev->max_mtu;
>+	}
>+
>+	net_failover_compute_features(failover_dev);
>+
>+	call_netdevice_notifiers(NETDEV_JOIN, slave_dev);
>+
>+	netdev_info(failover_dev, "failover %s slave:%s registered\n",
>+		    slave_is_standby ? "standby" : "primary", slave_dev->name);

I wonder if noise like this is needed in dmesg...


>+
>+	goto done;
>+
>+err_upper_link:
>+	netdev_rx_handler_unregister(slave_dev);
>+err_handler_register:
>+	vlan_vids_del_by_dev(slave_dev, failover_dev);
>+err_vlan_add:
>+	dev_uc_unsync(slave_dev, failover_dev);
>+	dev_mc_unsync(slave_dev, failover_dev);
>+	dev_close(slave_dev);
>+err_dev_open:
>+	dev_put(slave_dev);
>+	dev_set_mtu(slave_dev, orig_mtu);
>+done:
>+	return NOTIFY_DONE;
>+}
>+
>+int net_failover_slave_unregister(struct net_device *slave_dev)
>+{
>+	struct net_device *standby_dev, *primary_dev;
>+	struct net_failover_info *nfo_info;
>+	struct net_failover_ops *nfo_ops;
>+	struct net_device *failover_dev;
>+	bool slave_is_standby;
>+
>+	if (!netif_is_failover_slave(slave_dev))
>+		goto done;
>+
>+	ASSERT_RTNL();
>+
>+	failover_dev = net_failover_get_bymac(slave_dev->perm_addr, &nfo_ops);
>+	if (!failover_dev)
>+		goto done;
>+
>+	if (nfo_ops && nfo_ops->slave_unregister)
>+		return nfo_ops->slave_unregister(slave_dev, failover_dev);
>+
>+	nfo_info = netdev_priv(failover_dev);
>+	primary_dev = rtnl_dereference(nfo_info->primary_dev);
>+	standby_dev = rtnl_dereference(nfo_info->standby_dev);
>+
>+	if (slave_dev != primary_dev && slave_dev != standby_dev)
>+		goto done;
>+
>+	slave_is_standby = (slave_dev->dev.parent == failover_dev->dev.parent);
>+
>+	netdev_rx_handler_unregister(slave_dev);
>+	netdev_upper_dev_unlink(slave_dev, failover_dev);
>+	vlan_vids_del_by_dev(slave_dev, failover_dev);
>+	dev_uc_unsync(slave_dev, failover_dev);
>+	dev_mc_unsync(slave_dev, failover_dev);
>+	dev_close(slave_dev);
>+	slave_dev->priv_flags &= ~IFF_FAILOVER_SLAVE;
>+
>+	nfo_info = netdev_priv(failover_dev);
>+	net_failover_get_stats(failover_dev, &nfo_info->failover_stats);
>+
>+	if (slave_is_standby) {
>+		RCU_INIT_POINTER(nfo_info->standby_dev, NULL);
>+	} else {
>+		RCU_INIT_POINTER(nfo_info->primary_dev, NULL);
>+		if (standby_dev) {
>+			failover_dev->min_mtu = standby_dev->min_mtu;
>+			failover_dev->max_mtu = standby_dev->max_mtu;
>+		}
>+	}
>+
>+	dev_put(slave_dev);
>+
>+	net_failover_compute_features(failover_dev);
>+
>+	netdev_info(failover_dev, "failover %s slave:%s unregistered\n",
>+		    slave_is_standby ? "standby" : "primary", slave_dev->name);
>+
>+done:
>+	return NOTIFY_DONE;
>+}
>+EXPORT_SYMBOL_GPL(net_failover_slave_unregister);
>+
>+static int net_failover_slave_link_change(struct net_device *slave_dev)
>+{
>+	struct net_device *failover_dev, *primary_dev, *standby_dev;
>+	struct net_failover_info *nfo_info;
>+	struct net_failover_ops *nfo_ops;
>+
>+	if (!netif_is_failover_slave(slave_dev))
>+		goto done;
>+
>+	ASSERT_RTNL();
>+
>+	failover_dev = net_failover_get_bymac(slave_dev->perm_addr, &nfo_ops);
>+	if (!failover_dev)
>+		goto done;
>+
>+	if (nfo_ops && nfo_ops->slave_link_change)
>+		return nfo_ops->slave_link_change(slave_dev, failover_dev);
>+
>+	if (!netif_running(failover_dev))
>+		return 0;
>+
>+	nfo_info = netdev_priv(failover_dev);
>+
>+	primary_dev = rtnl_dereference(nfo_info->primary_dev);
>+	standby_dev = rtnl_dereference(nfo_info->standby_dev);
>+
>+	if (slave_dev != primary_dev && slave_dev != standby_dev)
>+		goto done;
>+
>+	if ((primary_dev && net_failover_xmit_ready(primary_dev)) ||
>+	    (standby_dev && net_failover_xmit_ready(standby_dev))) {
>+		netif_carrier_on(failover_dev);
>+		netif_tx_wake_all_queues(failover_dev);
>+	} else {
>+		net_failover_get_stats(failover_dev, &nfo_info->failover_stats);
>+		netif_carrier_off(failover_dev);
>+		netif_tx_stop_all_queues(failover_dev);
>+	}
>+
>+done:
>+	return NOTIFY_DONE;
>+}
>+
>+static int
>+net_failover_event(struct notifier_block *this, unsigned long event, void *ptr)
>+{
>+	struct net_device *event_dev = netdev_notifier_info_to_dev(ptr);
>+
>+	/* Skip parent events */
>+	if (netif_is_failover(event_dev))
>+		return NOTIFY_DONE;
>+
>+	switch (event) {
>+	case NETDEV_REGISTER:
>+		return net_failover_slave_register(event_dev);
>+	case NETDEV_UNREGISTER:
>+		return net_failover_slave_unregister(event_dev);
>+	case NETDEV_UP:
>+	case NETDEV_DOWN:
>+	case NETDEV_CHANGE:
>+		return net_failover_slave_link_change(event_dev);
>+	default:
>+		return NOTIFY_DONE;
>+	}
>+}
>+
>+static struct notifier_block net_failover_notifier = {
>+	.notifier_call = net_failover_event,
>+};
>+
>+static void nfo_register_existing_slave(struct net_device *failover_dev)

Please maintain the same function prefixes withing the whole code.

Also, to be consistent with the rest of the code, have "_register" as a
suffix.


>+{
>+	struct net *net = dev_net(failover_dev);
>+	struct net_device *dev;
>+
>+	rtnl_lock();
>+	for_each_netdev(net, dev) {
>+		if (netif_is_failover(dev))
>+			continue;
>+		if (ether_addr_equal(failover_dev->perm_addr, dev->perm_addr))
>+			net_failover_slave_register(dev);
>+	}
>+	rtnl_unlock();
>+}
>+


For every exported function, please provide documentation in format:

/**
 *	net_failover_register - Register net failover device
 *
 *	@dev: netdevice the failover is registerd for
 *	@ops: failover ops
 *
 *	Describe what the function does, what are expected inputs and
 *	outputs, etc. Don't hesistate to be verbose. Mention the 2/3netdev
 *	model here. Then you don't need the comment in the header file
 *	for there functions.
 */

>+int net_failover_register(struct net_device *dev, struct net_failover_ops *ops,
>+			  struct net_failover **pfailover)

Just return "struct net_failover *" instead of arg ** and use ERR_PTR
macro to propagate an error.


>+{
>+	struct net_failover *failover;
>+
>+	failover = kzalloc(sizeof(*failover), GFP_KERNEL);
>+	if (!failover)
>+		return -ENOMEM;
>+
>+	rcu_assign_pointer(failover->ops, ops);
>+	dev_hold(dev);
>+	dev->priv_flags |= IFF_FAILOVER;
>+	rcu_assign_pointer(failover->failover_dev, dev);
>+
>+	spin_lock(&net_failover_lock);
>+	list_add_tail(&failover->list, &net_failover_list);
>+	spin_unlock(&net_failover_lock);
>+
>+	netdev_info(dev, "failover master:%s registered\n", dev->name);
>+
>+	nfo_register_existing_slave(dev);
>+
>+	*pfailover = failover;
>+
>+	return 0;
>+}
>+EXPORT_SYMBOL_GPL(net_failover_register);
>+
>+void net_failover_unregister(struct net_failover *failover)
>+{
>+	struct net_device *failover_dev;
>+
>+	failover_dev = rcu_dereference(failover->failover_dev);
>+
>+	netdev_info(failover_dev, "failover master:%s unregistered\n",
>+		    failover_dev->name);
>+
>+	failover_dev->priv_flags &= ~IFF_FAILOVER;
>+	dev_put(failover_dev);
>+
>+	spin_lock(&net_failover_lock);
>+	list_del(&failover->list);
>+	spin_unlock(&net_failover_lock);
>+
>+	kfree(failover);
>+}
>+EXPORT_SYMBOL_GPL(net_failover_unregister);
>+
>+int net_failover_create(struct net_device *standby_dev,
>+			struct net_failover **pfailover)

Same here, just return "struct net_failover *"


>+{
>+	struct device *dev = standby_dev->dev.parent;
>+	struct net_device *failover_dev;
>+	int err;
>+
>+	/* Alloc at least 2 queues, for now we are going with 16 assuming
>+	 * that VF devices being enslaved won't have too many queues.
>+	 */
>+	failover_dev = alloc_etherdev_mq(sizeof(struct net_failover_info), 16);
>+	if (!failover_dev) {
>+		dev_err(dev, "Unable to allocate failover_netdev!\n");
>+		return -ENOMEM;
>+	}
>+
>+	dev_net_set(failover_dev, dev_net(standby_dev));
>+	SET_NETDEV_DEV(failover_dev, dev);
>+
>+	failover_dev->netdev_ops = &failover_dev_ops;
>+	failover_dev->ethtool_ops = &failover_ethtool_ops;
>+
>+	/* Initialize the device options */
>+	failover_dev->priv_flags |= IFF_UNICAST_FLT | IFF_NO_QUEUE;
>+	failover_dev->priv_flags &= ~(IFF_XMIT_DST_RELEASE |
>+				       IFF_TX_SKB_SHARING);
>+
>+	/* don't acquire failover netdev's netif_tx_lock when transmitting */
>+	failover_dev->features |= NETIF_F_LLTX;
>+
>+	/* Don't allow failover devices to change network namespaces. */
>+	failover_dev->features |= NETIF_F_NETNS_LOCAL;
>+
>+	failover_dev->hw_features = FAILOVER_VLAN_FEATURES |
>+				    NETIF_F_HW_VLAN_CTAG_TX |
>+				    NETIF_F_HW_VLAN_CTAG_RX |
>+				    NETIF_F_HW_VLAN_CTAG_FILTER;
>+
>+	failover_dev->hw_features |= NETIF_F_GSO_ENCAP_ALL;
>+	failover_dev->features |= failover_dev->hw_features;
>+
>+	memcpy(failover_dev->dev_addr, standby_dev->dev_addr,
>+	       failover_dev->addr_len);
>+
>+	failover_dev->min_mtu = standby_dev->min_mtu;
>+	failover_dev->max_mtu = standby_dev->max_mtu;
>+
>+	err = register_netdev(failover_dev);
>+	if (err < 0) {

if (err)
is enough


>+		dev_err(dev, "Unable to register failover_dev!\n");
>+		goto err_register_netdev;
>+	}
>+
>+	netif_carrier_off(failover_dev);
>+
>+	err = net_failover_register(failover_dev, NULL, pfailover);
>+	if (err < 0)

if (err)
is enough


>+		goto err_failover_register;
>+
>+	return 0;
>+
>+err_failover_register:
>+	unregister_netdev(failover_dev);
>+err_register_netdev:
>+	free_netdev(failover_dev);
>+
>+	return err;
>+}
>+EXPORT_SYMBOL_GPL(net_failover_create);

[...]

_______________________________________________
Virtualization mailing list
Virtualization@xxxxxxxxxxxxxxxxxxxxxxxxxx
https://lists.linuxfoundation.org/mailman/listinfo/virtualization



[Index of Archives]     [KVM Development]     [Libvirt Development]     [Libvirt Users]     [CentOS Virtualization]     [Netdev]     [Ethernet Bridging]     [Linux Wireless]     [Kernel Newbies]     [Security]     [Linux for Hams]     [Netfilter]     [Bugtraq]     [Yosemite Forum]     [MIPS Linux]     [ARM Linux]     [Linux RAID]     [Linux Admin]     [Samba]

  Powered by Linux