Quoting Dan Smith (danms@xxxxxxxxxx): > Guilt dropped the new checkpoint_dev.c file when I switched to the > newer branch. Sorry about that. Updated patch included below. (Just a few comments on a cursory look. Will take a closer look later) > +int ckpt_netdev_inet_addrs(struct in_device *indev, > + struct ckpt_netdev_addr *_abuf[]) > +{ > + struct ckpt_netdev_addr *abuf = NULL; > + struct in_ifaddr *addr = indev->ifa_list; > + int pages = 0; > + int addrs = 0; > + int max; > + > + read_lock(&dev_base_lock); > + retry: > + if (++pages > 4) { > + addrs = -ENOMEM; > + goto out; > + } > + > + *_abuf = krealloc(abuf, PAGE_SIZE * pages, GFP_KERNEL); rw_lockt is effectively a spinlock, so I don't think you can sleep here. > + if (*_abuf == NULL) { > + addrs = -ENOMEM; > + goto out; > + } > + abuf = *_abuf; > + > + max = (pages * PAGE_SIZE) / sizeof(*abuf); > + while (addr) { > + abuf[addrs].type = CKPT_NETDEV_ADDR_IPV4; /* Only IPv4 now */ > + abuf[addrs].inet4_local = addr->ifa_local; > + abuf[addrs].inet4_address = addr->ifa_address; > + abuf[addrs].inet4_mask = addr->ifa_mask; > + abuf[addrs].inet4_broadcast = addr->ifa_broadcast; > + > + addr = addr->ifa_next; > + if (++addrs >= max) > + goto retry; > + } > + > + out: > + read_unlock(&dev_base_lock); > + > + if (addrs < 0) { > + kfree(abuf); > + *_abuf = NULL; > + } > + > + return addrs; > +} > + > +struct ckpt_hdr_netdev *ckpt_netdev_base(struct ckpt_ctx *ctx, > + struct net_device *dev, > + struct ckpt_netdev_addr *addrs[]) > +{ > + struct ckpt_hdr_netdev *h; > + int ret; > + > + h = ckpt_hdr_get_type(ctx, sizeof(*h), CKPT_HDR_NETDEV); > + if (!h) > + return ERR_PTR(-ENOMEM); > + > + ret = ckpt_netdev_hwaddr(dev, h); > + if (ret < 0) > + goto out; > + > + *addrs = NULL; > + ret = h->inet_addrs = ckpt_netdev_inet_addrs(dev->ip_ptr, addrs); > + if (ret < 0) > + goto out; > + > + ret = h->netns_ref = checkpoint_obj(ctx, dev->nd_net, CKPT_OBJ_NET_NS); > + out: > + if (ret < 0) { > + ckpt_hdr_put(ctx, h); > + h = ERR_PTR(ret); > + if (*addrs) > + kfree(*addrs); > + } > + > + return h; > +} > + > +int checkpoint_netdev(struct ckpt_ctx *ctx, void *ptr) > +{ > + struct net_device *dev = (struct net_device *)ptr; > + > + if (!dev->netdev_ops->ndo_checkpoint) > + return -EINVAL; > + > + ckpt_debug("checkpointing netdev %s\n", dev->name); > + > + return dev->netdev_ops->ndo_checkpoint(ctx, dev); > +} > + > +int checkpoint_netns(struct ckpt_ctx *ctx, void *ptr) > +{ > + struct net *net = ptr; > + struct net_device *dev; > + struct ckpt_hdr_netns *h; > + int ret; > + > + h = ckpt_hdr_get_type(ctx, sizeof(*h), CKPT_HDR_NET_NS); > + if (!h) > + return -ENOMEM; > + > + h->this_ref = ckpt_obj_lookup(ctx, net, CKPT_OBJ_NET_NS); > + BUG_ON(h->this_ref == 0); > + > + ret = ckpt_write_obj(ctx, (struct ckpt_hdr *) h); > + if (ret < 0) > + goto out; > + > + for_each_netdev(net, dev) { > + if (!dev->netdev_ops->ndo_checkpoint) > + continue; Won't the checkpoint_obj() call checkpoint_netdev(), which will return -EINVAL if ndo_checkpoint is not defined? But here you skip the checkpoint_obj() call (which seems wrong to me). Which do you want to have happen? > + ret = checkpoint_obj(ctx, dev, CKPT_OBJ_NETDEV); > + if (ret < 0) > + break; > + } > + out: > + ckpt_hdr_put(ctx, h); > + > + return ret; > +} > + > +static int restore_in_addrs(struct ckpt_ctx *ctx, > + __u32 naddrs, > + struct net *net, > + struct net_device *dev) > +{ > + __u32 i; > + int ret = 0; > + int len = naddrs * sizeof(struct ckpt_netdev_addr); > + struct ckpt_netdev_addr *addrs = NULL; > + > + addrs = kmalloc(len, GFP_KERNEL); > + if (!addrs) > + return -ENOMEM; > + > + ret = _ckpt_read_buffer(ctx, addrs, len); > + if (ret < 0) > + goto out; > + > + for (i = 0; i < naddrs; i++) { > + struct ckpt_netdev_addr *addr = &addrs[i]; > + struct ifreq req; > + struct sockaddr_in *inaddr; > + > + if (addr->type != CKPT_NETDEV_ADDR_IPV4) { > + ret = -EINVAL; > + ckpt_err(ctx, ret, "Unsupported netdev addr type %i\n", > + addr->type); > + break; > + } > + > + ckpt_debug("restoring %s: %x/%x/%x\n", dev->name, > + addr->inet4_address, > + addr->inet4_mask, > + addr->inet4_broadcast); > + > + memcpy(req.ifr_name, dev->name, IFNAMSIZ); > + > + inaddr = (struct sockaddr_in *)&req.ifr_addr; > + inaddr->sin_addr.s_addr = addr->inet4_address; > + inaddr->sin_family = AF_INET; > + ret = __kern_devinet_ioctl(net, SIOCSIFADDR, &req); > + if (ret < 0) { > + ckpt_err(ctx, ret, "Failed to set address\n"); > + break; > + } > + > + inaddr = (struct sockaddr_in *)&req.ifr_addr; > + inaddr->sin_addr.s_addr = addr->inet4_mask; > + inaddr->sin_family = AF_INET; > + ret = __kern_devinet_ioctl(net, SIOCSIFNETMASK, &req); > + if (ret < 0) { > + ckpt_err(ctx, ret, "Failed to set netmask\n"); > + break; > + } > + > + inaddr = (struct sockaddr_in *)&req.ifr_addr; > + inaddr->sin_addr.s_addr = addr->inet4_broadcast; > + inaddr->sin_family = AF_INET; > + ret = __kern_devinet_ioctl(net, SIOCSIFBRDADDR, &req); > + if (ret < 0) { > + ckpt_err(ctx, ret, "Failed to set broadcast\n"); > + break; > + } > + } > + > + out: > + kfree(addrs); > + > + return ret; > +} > + > +static int veth_peer_data(struct sk_buff *skb, char *peer_name) > +{ > + struct nlattr *linkdata; > + struct ifinfomsg ifm; > + > + linkdata = nla_nest_start(skb, IFLA_INFO_DATA); > + if (!linkdata) > + return -ENOMEM; > + > + nla_put(skb, VETH_INFO_PEER, sizeof(ifm), &ifm); > + nla_put_string(skb, IFLA_IFNAME, peer_name); > + > + nla_nest_end(skb, linkdata); > + > + return 0; > +} > + > +static struct sk_buff *new_link_message(char *this_name, char *peer_name) > +{ > + int ret = -ENOMEM; > + int flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_ACK; > + struct nlmsghdr *nlh; > + struct sk_buff *skb; > + struct ifinfomsg *ifm; > + struct nlattr *linkinfo; > + > + skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); > + if (!skb) > + goto out; > + > + nlh = nlmsg_put(skb, 0, 0, RTM_NEWLINK, sizeof(*ifm), flags); > + if (!nlh) > + goto out; > + > + ifm = nlmsg_data(nlh); > + memset(ifm, 0, sizeof(*ifm)); > + > + ret = nla_put_string(skb, IFLA_IFNAME, this_name); > + if (ret) > + goto out; > + > + ret = -ENOMEM; > + > + linkinfo = nla_nest_start(skb, IFLA_LINKINFO); > + if (!linkinfo) > + goto out; > + > + if (nla_put_string(skb, IFLA_INFO_KIND, "veth") < 0) > + goto out; > + > + ret = veth_peer_data(skb, peer_name); By hard-coding veth stuff into generic-sounding functions in net/checkpoint_dev.c you seem to be assuming that only veth will ever be supported for checkpoint/restart? what about macvlan? (Not to mention that eventually we intend to support moving physical nics into containers) -serge _______________________________________________ Containers mailing list Containers@xxxxxxxxxxxxxxxxxxxxxxxxxx https://lists.linux-foundation.org/mailman/listinfo/containers