This is a rework of the patch Dirk sent out last week. The attached patch addresses the problem that when a PASSTHROUGH mode DIRECT NIC connection is made the MAC address of the NIC is not automatically set and reset to the configured VM MAC and back again. The patch fixes this problem by setting and resetting the MAC while remembering the previous setting while the VM is running. This also works if libvirtd is restarted while the VM is running. The patch passes make syntax-check Signed-off-by: Dirk Herrendoerfer <d.herrendoerfer at herrendoerfer.name> Signed-off-by: Gerhard Stenzel <gerhard.stenzel@xxxxxxxxxx> --- src/libvirt_macvtap.syms | 2 + src/qemu/qemu_command.c | 48 +++++++++++++++ src/qemu/qemu_process.c | 45 ++++++++++++++ src/util/macvtap.c | 149 ++++++++++++++++++++++++++++++++++++++++++++++ src/util/macvtap.h | 6 ++ 5 files changed, 250 insertions(+), 0 deletions(-) Index: libvirt/src/libvirt_macvtap.syms =================================================================== --- libvirt.orig/src/libvirt_macvtap.syms +++ libvirt/src/libvirt_macvtap.syms @@ -5,6 +5,8 @@ # macvtap.h delMacvtap; +getMacaddr; openMacvtapTap; +setMacaddr; vpAssociatePortProfileId; vpDisassociatePortProfileId; Index: libvirt/src/qemu/qemu_command.c =================================================================== --- libvirt.orig/src/qemu/qemu_command.c +++ libvirt/src/qemu/qemu_command.c @@ -125,6 +125,77 @@ qemuPhysIfaceConnect(virDomainDefPtr def net->model && STREQ(net->model, "virtio")) vnet_hdr = 1; + /** Note: When using PASSTHROUGH mode with MACVTAP devices the link + * device's MAC address must be set to the VMs MAC address. In + * order to not confuse the first switch or bridge in line this MAC + * address must be reset when the VM is shut down. + * This is especially important when using SRIOV capable cards that + * emulate their switch in firmware. + */ + if (net->data.direct.mode == VIR_DOMAIN_NETDEV_MACVTAP_MODE_PASSTHRU) { + unsigned char oldmac[6]; + rc = getMacaddr(&oldmac, net->data.direct.linkdev); + if (rc) { + virReportSystemError(rc, + _("Getting MAC address from '%s' " + "to '%02x:%02x:%02x:%02x:%02x:%02x' failed."), + net->data.direct.linkdev, + oldmac[0], oldmac[1], oldmac[2], + oldmac[3], oldmac[4], oldmac[5]); + } else { + char *oldmacpath = NULL; + char *oldmacname = NULL; + char *newmacname = NULL; + char *basedir = "/var/run/libvirt/network"; + + if (virAsprintf(&oldmacname,"%02x:%02x:%02x:%02x:%02x:%02x", + oldmac[0], oldmac[1], oldmac[2], + oldmac[3], oldmac[4], oldmac[5]) < 0) { + virReportOOMError(); + } + if (virAsprintf(&oldmacpath,"%s/%s", basedir, oldmacname) < 0) { + virReportOOMError(); + } + + if (virAsprintf(&newmacname, "%s/%s@%02x:%02x:%02x:%02x:%02x:%02x", + basedir, + net->data.direct.linkdev, + net->mac[0],net->mac[1],net->mac[2], + net->mac[3],net->mac[4],net->mac[5]) < 0) { + virReportOOMError(); + } + + rc = symlink (oldmacname, newmacname); + if (rc) { + virReportSystemError(errno, + _("MAC link file creation failed for %s."), + net->data.direct.linkdev); + } + rc = creat(oldmacpath, S_IWUSR); + if (rc) { + virReportSystemError(errno, + _("MAC link file creation failed for %s."), + oldmacpath); + } + rc = VIR_CLOSE(rc); + if (rc) { + virReportSystemError(errno, + _("MAC link file closing failed for %s."), + oldmacpath); + } + } + + rc = setMacaddr(net->mac, net->data.direct.linkdev); + if (rc) { + virReportSystemError(errno, + _("Setting MAC address on '%s' to " + "'%02x:%02x:%02x:%02x:%02x:%02x' failed."), + net->data.direct.linkdev, + net->mac[0], net->mac[1], net->mac[2], + net->mac[3], net->mac[4], net->mac[5]); + } + } + rc = openMacvtapTap(net->ifname, net->mac, net->data.direct.linkdev, net->data.direct.mode, vnet_hdr, def->uuid, &net->data.direct.virtPortProfile, &res_ifname, Index: libvirt/src/qemu/qemu_process.c =================================================================== --- libvirt.orig/src/qemu/qemu_process.c +++ libvirt/src/qemu/qemu_process.c @@ -2708,6 +2708,48 @@ void qemuProcessStop(struct qemud_driver if (net->type == VIR_DOMAIN_NET_TYPE_DIRECT) { delMacvtap(net->ifname, net->mac, net->data.direct.linkdev, &net->data.direct.virtPortProfile); + + if (net->data.direct.mode == VIR_DOMAIN_NETDEV_MACVTAP_MODE_PASSTHRU) { + char *newmacname = NULL; + char *oldmacname = NULL; + char *oldmacpath = NULL; + char *basedir = "/var/run/libvirt/network"; + + if (virAsprintf(&newmacname, "%s/%s@%02x:%02x:%02x:%02x:%02x:%02x", + basedir, + net->data.direct.linkdev, + net->mac[0],net->mac[1],net->mac[2], + net->mac[3],net->mac[4],net->mac[5]) < 0) { + virReportOOMError(); + } + + ret = virFileResolveLink(newmacname, &oldmacpath); + if (ret < 0) { + virReportSystemError(errno, + _("Reading MAC address from '%s' failed !"), + newmacname); + } else { + unsigned char oldmac[6]; + + oldmacname = basename(oldmacpath); + + if (virParseMacAddr(oldmacname, &oldmac[0]) != 0) { + virReportSystemError(VIR_ERR_INTERNAL_ERROR, + _("Cannot parse MAC address from '%s'"), + oldmacname); + } + + VIR_WARN(("Resetting MAC address on '%s' " + "to '%02x:%02x:%02x:%02x:%02x:%02x'."), + net->data.direct.linkdev, + oldmac[0],oldmac[1],oldmac[2], + oldmac[3],oldmac[4],oldmac[5]); + /*reset mac and remove link file-ignore results*/ + ret = setMacaddr(oldmac, net->data.direct.linkdev); + ret = unlink(oldmacpath); + ret = unlink(newmacname); + } + } VIR_FREE(net->ifname); } } Index: libvirt/src/util/macvtap.c =================================================================== --- libvirt.orig/src/util/macvtap.c +++ libvirt/src/util/macvtap.c @@ -87,6 +87,7 @@ # define LLDPAD_PID_FILE "/var/run/lldpad.pid" +#define MACADDRSIZE 6 enum virVirtualPortOp { ASSOCIATE = 0x1, @@ -191,6 +192,149 @@ err_exit: # if WITH_MACVTAP +/** + * getMacaddr: + * Get the MAC address of a network device + * + * @macaddress: Pointer where the MAC address will be stored + * @srcdev: The interface name of the NIC to get the MAC from + * + * Returns zero in case of success, + * negative value otherwise with error reported. + * + */ +int +getMacaddr(const unsigned char *macaddress, const char *srcdev ) +{ + int sockfd; + int io; + struct ifreq ifr; + + strcpy(ifr.ifr_name, srcdev); + + sockfd = socket(AF_INET, SOCK_STREAM, 0); + if(sockfd < 0){ + return -1; + } + + io = ioctl(sockfd, SIOCGIFHWADDR, (char *)&ifr); + if(io < 0){ + return -1; + } + + memcpy(macaddress, ifr.ifr_ifru.ifru_hwaddr.sa_data, MACADDRSIZE); + + return 0; +} + +/** + * setMacaddr: + * Set the MAC address of a network device + * + * @macaddress: MAC address to assign to the NIC + * @srcdev: The interface name of the NIC + * + * Returns zero in case of success, + * negative value otherwise with error reported. + * + */ +int +setMacaddr(const unsigned char *macaddress, const char *srcdev ) +{ + int rc = 0; + struct nlmsghdr *resp; + struct nlmsgerr *err; + struct ifinfomsg ifinfo = { .ifi_family = AF_UNSPEC }; + int ifindex; + unsigned char *recvbuf = NULL; + unsigned int recvbuflen; + struct nl_msg *nl_msg; + + if (ifaceGetIndex(true, srcdev, &ifindex) != 0) + return -1; + + nl_msg = nlmsg_alloc_simple(RTM_SETLINK, NLM_F_REQUEST); + + if (!nl_msg) { + virReportOOMError(); + return -1; + } + + if (nlmsg_append(nl_msg, &ifinfo, sizeof(ifinfo), NLMSG_ALIGNTO) < 0) + goto buffer_too_small; + + if (nla_put_u32(nl_msg, IFLA_LINK, ifindex) < 0) + goto buffer_too_small; + + if (nla_put(nl_msg, IFLA_ADDRESS, MACADDRSIZE, macaddress) < 0) + goto buffer_too_small; + + if (srcdev && + nla_put(nl_msg, IFLA_IFNAME, strlen(srcdev)+1, srcdev) < 0) + goto buffer_too_small; + + if (nlComm(nl_msg, &recvbuf, &recvbuflen, 0) < 0) { + rc = -1; + goto err_exit; + } + + if (recvbuflen < NLMSG_LENGTH(0) || recvbuf == NULL) + goto malformed_resp; + + resp = (struct nlmsghdr *)recvbuf; + + switch (resp->nlmsg_type) { + case NLMSG_ERROR: + err = (struct nlmsgerr *)NLMSG_DATA(resp); + if (resp->nlmsg_len < NLMSG_LENGTH(sizeof(*err))) + goto malformed_resp; + + switch (err->error) { + + case 0: + break; + + case -EEXIST: + rc = -1; + break; + + default: + macvtapError(VIR_ERR_INTERNAL_ERROR, "%s", + _("error setting device mac address")); + rc = -1; + } + break; + + case NLMSG_DONE: + break; + + default: + goto malformed_resp; + } + +err_exit: + nlmsg_free(nl_msg); + + VIR_FREE(recvbuf); + + return rc; + +malformed_resp: + nlmsg_free(nl_msg); + + macvtapError(VIR_ERR_INTERNAL_ERROR, "%s", + _("malformed netlink response message")); + VIR_FREE(recvbuf); + return -1; + +buffer_too_small: + nlmsg_free(nl_msg); + + macvtapError(VIR_ERR_INTERNAL_ERROR, "%s", + _("allocated netlink buffer is too small")); + return -1; +} + static int link_add(const char *type, const unsigned char *macaddress, int macaddrsize, Index: libvirt/src/util/macvtap.h =================================================================== --- libvirt.orig/src/util/macvtap.h +++ libvirt/src/util/macvtap.h @@ -75,6 +75,10 @@ enum virVMOperationType { # include "internal.h" +int getMacaddr(const unsigned char *macaddress, const char *srcdev ); + +int setMacaddr(const unsigned char *macaddress, const char *srcdev ); + int openMacvtapTap(const char *ifname, const unsigned char *macaddress, const char *linkdev, =================================================================== Best regards, Gerhard Stenzel ------------------------------------------------------------------------------------- IBM Deutschland Research & Development GmbH Vorsitzender des Aufsichtsrats: Martin Jetter Geschaeftsfuehrung: Dirk Wittkopp Sitz der Gesellschaft: Boeblingen Registergericht: Amtsgericht Stuttgart, HRB 243294 -- libvir-list mailing list libvir-list@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/libvir-list