Added per-namespace network loopback device Signed-off-by: Dmitry Mishin <dim at openvz.org> --- drivers/net/Space.c | 5 +- drivers/net/loopback.c | 103 ++++++++++++++++++++++++++---------------- include/linux/net_namespace.h | 2 include/linux/netdevice.h | 14 +++++ net/core/dev.c | 3 - net/core/net_namespace.c | 12 ++++ net/ipv4/devinet.c | 2 net/ipv6/addrconf.c | 2 net/ipv6/route.c | 6 +- 9 files changed, 102 insertions(+), 47 deletions(-) --- linux-2.6.19-rc6-mm2.orig/drivers/net/Space.c +++ linux-2.6.19-rc6-mm2/drivers/net/Space.c @@ -33,6 +33,7 @@ #include <linux/errno.h> #include <linux/init.h> #include <linux/netlink.h> +#include <linux/net_namespace.h> /* A unified ethernet device probe. This is the easiest way to have every ethernet adaptor have the name "eth[0123...]". @@ -353,14 +354,14 @@ static void __init trif_probe2(int unit) * The loopback device is global so it can be directly referenced * by the network code. Also, it must be first on device list. */ -extern int loopback_init(void); +extern int loopback_init(struct net_namespace *); /* Statically configured drivers -- order matters here. */ static int __init net_olddevs_init(void) { int num; - if (loopback_init()) { + if (loopback_init(current_net_ns)) { printk(KERN_ERR "Network loopback device setup failed\n"); } --- linux-2.6.19-rc6-mm2.orig/drivers/net/loopback.c +++ linux-2.6.19-rc6-mm2/drivers/net/loopback.c @@ -58,12 +58,6 @@ #include <linux/tcp.h> #include <linux/percpu.h> -struct pcpu_lstats { - unsigned long packets; - unsigned long bytes; -}; -static DEFINE_PER_CPU(struct pcpu_lstats, pcpu_lstats); - #define LOOPBACK_OVERHEAD (128 + MAX_HEADER + 16 + 16) /* KISS: just allocate small chunks and copy bits. @@ -154,7 +148,7 @@ static int loopback_xmit(struct sk_buff dev->last_rx = jiffies; /* it's OK to use __get_cpu_var() because BHs are off */ - lb_stats = &__get_cpu_var(pcpu_lstats); + lb_stats = per_cpu_ptr(pcpu_lstats_ptr, smp_processor_id()); lb_stats->bytes += skb->len; lb_stats->packets++; @@ -163,11 +157,9 @@ static int loopback_xmit(struct sk_buff return 0; } -static struct net_device_stats loopback_stats; - static struct net_device_stats *get_stats(struct net_device *dev) { - struct net_device_stats *stats = &loopback_stats; + struct net_device_stats *stats = netdev_priv(dev); unsigned long bytes = 0; unsigned long packets = 0; int i; @@ -175,7 +167,7 @@ static struct net_device_stats *get_stat for_each_possible_cpu(i) { const struct pcpu_lstats *lb_stats; - lb_stats = &per_cpu(pcpu_lstats, i); + lb_stats = per_cpu_ptr(pcpu_lstats_ptr, i); bytes += lb_stats->bytes; packets += lb_stats->packets; } @@ -200,38 +192,73 @@ static const struct ethtool_ops loopback .get_rx_csum = always_on, }; -/* - * The loopback device is special. There is only one instance and - * it is statically allocated. Don't do this for other devices. - */ -struct net_device loopback_dev = { - .name = "lo", - .get_stats = &get_stats, - .priv = &loopback_stats, - .mtu = (16 * 1024) + 20 + 20 + 12, - .hard_start_xmit = loopback_xmit, - .hard_header = eth_header, - .hard_header_cache = eth_header_cache, - .header_cache_update = eth_header_cache_update, - .hard_header_len = ETH_HLEN, /* 14 */ - .addr_len = ETH_ALEN, /* 6 */ - .tx_queue_len = 0, - .type = ARPHRD_LOOPBACK, /* 0x0001*/ - .rebuild_header = eth_rebuild_header, - .flags = IFF_LOOPBACK, - .features = NETIF_F_SG | NETIF_F_FRAGLIST +struct net_device *loopback_dev_p; +EXPORT_SYMBOL(loopback_dev_p); + +struct pcpu_lstats *pcpu_lstats_p; +EXPORT_SYMBOL(pcpu_lstats_p); + + +void loopback_dev_dtor(struct net_device *dev) +{ + free_percpu(dev->net_ns->pcpu_lstats_p); + free_netdev(dev); +} + +void loopback_dev_ctor(struct net_device *dev) +{ + dev->mtu = (16 * 1024) + 20 + 20 + 12; + dev->hard_start_xmit = loopback_xmit; + dev->hard_header = eth_header; + dev->hard_header_cache = eth_header_cache; + dev->header_cache_update = eth_header_cache_update; + dev->hard_header_len = ETH_HLEN; /* 14 */ + dev->addr_len = ETH_ALEN; /* 6 */ + dev->tx_queue_len = 0; + dev->type = ARPHRD_LOOPBACK; /* 0x0001*/ + dev->rebuild_header = eth_rebuild_header; + dev->flags = IFF_LOOPBACK; + dev->features = NETIF_F_SG | NETIF_F_FRAGLIST #ifdef LOOPBACK_TSO | NETIF_F_TSO #endif | NETIF_F_NO_CSUM | NETIF_F_HIGHDMA - | NETIF_F_LLTX, - .ethtool_ops = &loopback_ethtool_ops, -}; + | NETIF_F_LLTX; + dev->ethtool_ops = &loopback_ethtool_ops; + dev->get_stats = &get_stats; + dev->destructor = loopback_dev_dtor; +} /* Setup and register the loopback device. */ -int __init loopback_init(void) +int loopback_init(struct net_namespace *ns) { - return register_netdev(&loopback_dev); -}; + int err; + + err = -ENOMEM; + ns->loopback_dev_p = alloc_netdev(sizeof(struct net_device_stats), "lo", + loopback_dev_ctor); + if (ns->loopback_dev_p == NULL) + goto out; + ns->pcpu_lstats_p = alloc_percpu(struct pcpu_lstats); + if (ns->pcpu_lstats_p == NULL) + goto out_stats; + /* + * loopback device doesn't hold active reference: it doesn't prevent + * stopping of net_namespace. So, just put old namespace. + */ + put_net_ns(ns->loopback_dev_p->net_ns); + ns->loopback_dev_p->net_ns = ns; + err = register_netdev(ns->loopback_dev_p); + if (err) + goto out_register; + return 0; -EXPORT_SYMBOL(loopback_dev); +out_register: + free_percpu(ns->pcpu_lstats_p); + /* in order to avoid put in free_netdev() */ + ns->loopback_dev_p->net_ns = NULL; +out_stats: + free_netdev(ns->loopback_dev_p); +out: + return err; +} --- linux-2.6.19-rc6-mm2.orig/include/linux/net_namespace.h +++ linux-2.6.19-rc6-mm2/include/linux/net_namespace.h @@ -9,6 +9,8 @@ struct net_namespace { struct kref kref; struct nsproxy *ns; struct net_device *dev_base_p, **dev_tail_p; + struct net_device *loopback_dev_p; + struct pcpu_lstats *pcpu_lstats_p; unsigned int hash; }; --- linux-2.6.19-rc6-mm2.orig/include/linux/netdevice.h +++ linux-2.6.19-rc6-mm2/include/linux/netdevice.h @@ -562,16 +562,28 @@ struct packet_type { #include <linux/notifier.h> #include <linux/net_namespace.h> -extern struct net_device loopback_dev; /* The loopback */ +struct pcpu_lstats { + unsigned long packets; + unsigned long bytes; +}; + +extern struct net_device *loopback_dev_p; +extern struct pcpu_lstats *pcpu_lstats_p; #ifndef CONFIG_NET_NS +#define loopback_dev_ptr loopback_dev_p +#define pcpu_lstats_ptr pcpu_lstats_p extern struct net_device *dev_base; /* All devices */ #define dev_base_ns(dev) dev_base #else +#define loopback_dev_ptr (current_net_ns->loopback_dev_p) +#define pcpu_lstats_ptr (current_net_ns->pcpu_lstats_p) #define dev_base (current_net_ns->dev_base_p) #define dev_base_ns(dev) (dev->net_ns->dev_base_p) #endif +#define loopback_dev (*loopback_dev_ptr) /* The loopback */ extern rwlock_t dev_base_lock; /* Device list lock */ +extern int loopback_init(struct net_namespace *); extern int netdev_boot_setup_check(struct net_device *dev); extern unsigned long netdev_boot_base(const char *prefix, int unit); extern struct net_device *dev_getbyhwaddr(unsigned short type, char *hwaddr); --- linux-2.6.19-rc6-mm2.orig/net/core/dev.c +++ linux-2.6.19-rc6-mm2/net/core/dev.c @@ -3254,7 +3254,8 @@ void free_netdev(struct net_device *dev) ns = dev->net_ns; if (ns != NULL) { - put_net_ns(ns); + if (dev != ns->loopback_dev_p) + put_net_ns(ns); dev->net_ns = NULL; } #endif --- linux-2.6.19-rc6-mm2.orig/net/core/net_namespace.c +++ linux-2.6.19-rc6-mm2/net/core/net_namespace.c @@ -10,6 +10,7 @@ #include <linux/nsproxy.h> #include <linux/net_namespace.h> #include <linux/net.h> +#include <linux/netdevice.h> struct net_namespace init_net_ns = { .kref = { @@ -18,6 +19,8 @@ struct net_namespace init_net_ns = { .ns = &init_nsproxy, .dev_base_p = NULL, .dev_tail_p = &init_net_ns.dev_base_p, + .loopback_dev_p = NULL, + .pcpu_lstats_p = NULL, }; #ifdef CONFIG_NET_NS @@ -40,7 +43,15 @@ static struct net_namespace *clone_net_n ns->dev_base_p = NULL; ns->dev_tail_p = &ns->dev_base_p; ns->hash = net_random(); + + if (loopback_init(ns)) + goto out_loopback; return ns; + +out_loopback: + BUG_ON(atomic_read(&ns->kref.refcount) != 1); + kfree(ns); + return NULL; } /* @@ -78,6 +89,7 @@ void free_net_ns(struct kref *kref) struct net_namespace *ns; ns = container_of(kref, struct net_namespace, kref); + unregister_netdev(ns->loopback_dev_p); if (ns->dev_base_p != NULL) { printk("NET_NS: BUG: namespace %p has devices! ref %d\n", ns, atomic_read(&ns->kref.refcount)); --- linux-2.6.19-rc6-mm2.orig/net/ipv4/devinet.c +++ linux-2.6.19-rc6-mm2/net/ipv4/devinet.c @@ -197,7 +197,7 @@ static void inetdev_destroy(struct in_de ASSERT_RTNL(); dev = in_dev->dev; - if (dev == &loopback_dev) + if (dev == loopback_dev_p) return; in_dev->dead = 1; --- linux-2.6.19-rc6-mm2.orig/net/ipv6/addrconf.c +++ linux-2.6.19-rc6-mm2/net/ipv6/addrconf.c @@ -2361,7 +2361,7 @@ static int addrconf_ifdown(struct net_de ASSERT_RTNL(); - if (dev == &loopback_dev && how == 1) + if (dev == loopback_dev_p && how == 1) how = 0; rt6_ifdown(dev); --- linux-2.6.19-rc6-mm2.orig/net/ipv6/route.c +++ linux-2.6.19-rc6-mm2/net/ipv6/route.c @@ -124,7 +124,6 @@ struct rt6_info ip6_null_entry = { .dst = { .__refcnt = ATOMIC_INIT(1), .__use = 1, - .dev = &loopback_dev, .obsolete = -1, .error = -ENETUNREACH, .metrics = { [RTAX_HOPLIMIT - 1] = 255, }, @@ -150,7 +149,6 @@ struct rt6_info ip6_prohibit_entry = { .dst = { .__refcnt = ATOMIC_INIT(1), .__use = 1, - .dev = &loopback_dev, .obsolete = -1, .error = -EACCES, .metrics = { [RTAX_HOPLIMIT - 1] = 255, }, @@ -170,7 +168,6 @@ struct rt6_info ip6_blk_hole_entry = { .dst = { .__refcnt = ATOMIC_INIT(1), .__use = 1, - .dev = &loopback_dev, .obsolete = -1, .error = -EINVAL, .metrics = { [RTAX_HOPLIMIT - 1] = 255, }, @@ -2469,7 +2466,10 @@ void __init ip6_route_init(void) #endif #ifdef CONFIG_IPV6_MULTIPLE_TABLES fib6_rules_init(); + ip6_prohibit_entry.u.dst.dev = &loopback_dev; + ip6_blk_hole_entry.u.dst.dev = &loopback_dev; #endif + ip6_null_entry.u.dst.dev = &loopback_dev; } void ip6_route_cleanup(void)