When creating a standard tap device, if provided with an ifname that contains "%d", rather than taking that literally as the name to use for the new device, the kernel will instead use that string as a template, and search for the lowest number that could be put in place of %d and produce an otherwise unused and unique name for the new device. For example, if there is no tap device name given in the XML, libvirt will always send "vnet%d" as the device name, and the kernel will create new devices named "vnet0", "vnet1", etc. If one of those devices is deleted, creating a "hole" in the name list, the kernel will always attempt to reuse the name in the hole first before using a name with a higher number (i.e. it finds the lowest possible unused number). The problem with this, as described in the previous patch dealing with macvtap device naming, is that it makes "immediate reuse" of a newly freed tap device name *much* more common, and in the aftermath of deleting a tap device, there is some other necessary cleanup of things which are named based on the device name (nwfilter rules, bandwidth rules, OVS switch ports, to name a few) that could end up stomping over the top of the setup of a new device of the same name for a different guest. Since the kernel "create a name based on a template" functionality for tap devices doesn't exist for macvtap, this patch for standard tap devices is a bit different from the previous patch for macvtap - in particular there was no previous "bitmap ID reservation system" or overly-complex retry loop that needed to be removed. We simply find and unused name, and pass that name on to the kernel instead of "vnet%d". This counter is also wrapped when either it gets to INT_MAX or if the full name would overflow IFNAMSIZ-1 characters. In the case of "vnet%d" and a 32 bit int, we would reach INT_MAX first, but possibly someday someone will change the name from vnet to something else. (NB: It is still possible for a user to provide their own parameterized template name (e.g. "mytap%d") in the XML, and libvirt will just pass that through to the kernel as it always has.) Signed-off-by: Laine Stump <laine@xxxxxxxxxx> --- src/libvirt_private.syms | 1 + src/qemu/qemu_process.c | 20 +++++++- src/util/virnetdevtap.c | 108 ++++++++++++++++++++++++++++++++++++++- src/util/virnetdevtap.h | 4 ++ 4 files changed, 130 insertions(+), 3 deletions(-) diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms index 4b155691a8..5736a2dbd3 100644 --- a/src/libvirt_private.syms +++ b/src/libvirt_private.syms @@ -2676,6 +2676,7 @@ virNetDevTapGetName; virNetDevTapGetRealDeviceName; virNetDevTapInterfaceStats; virNetDevTapReattachBridge; +virNetDevTapReserveName; # util/virnetdevveth.h diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c index 2a862e6d9e..222a1376c4 100644 --- a/src/qemu/qemu_process.c +++ b/src/qemu/qemu_process.c @@ -3320,8 +3320,26 @@ qemuProcessNotifyNets(virDomainDefPtr def) * domain to be unceremoniously killed, which would be *very* * impolite. */ - if (virDomainNetGetActualType(net) == VIR_DOMAIN_NET_TYPE_DIRECT) + switch (virDomainNetGetActualType(net)) { + case VIR_DOMAIN_NET_TYPE_DIRECT: virNetDevMacVLanReserveName(net->ifname); + break; + case VIR_DOMAIN_NET_TYPE_BRIDGE: + case VIR_DOMAIN_NET_TYPE_NETWORK: + case VIR_DOMAIN_NET_TYPE_ETHERNET: + virNetDevTapReserveName(net->ifname); + break; + case VIR_DOMAIN_NET_TYPE_USER: + case VIR_DOMAIN_NET_TYPE_VHOSTUSER: + case VIR_DOMAIN_NET_TYPE_SERVER: + case VIR_DOMAIN_NET_TYPE_CLIENT: + case VIR_DOMAIN_NET_TYPE_MCAST: + case VIR_DOMAIN_NET_TYPE_INTERNAL: + case VIR_DOMAIN_NET_TYPE_HOSTDEV: + case VIR_DOMAIN_NET_TYPE_UDP: + case VIR_DOMAIN_NET_TYPE_LAST: + break; + } if (net->type == VIR_DOMAIN_NET_TYPE_NETWORK) { if (!conn && !(conn = virGetConnectNetwork())) diff --git a/src/util/virnetdevtap.c b/src/util/virnetdevtap.c index c0a7c3019e..a46f836da2 100644 --- a/src/util/virnetdevtap.c +++ b/src/util/virnetdevtap.c @@ -49,11 +49,100 @@ #if defined(HAVE_GETIFADDRS) && defined(AF_LINK) # include <ifaddrs.h> #endif +#include <math.h> #define VIR_FROM_THIS VIR_FROM_NONE VIR_LOG_INIT("util.netdevtap"); +virMutex virNetDevTapCreateMutex = VIR_MUTEX_INITIALIZER; +static int virNetDevTapLastID = -1; /* not "unsigned" because callers use %d */ + + +/** + * virNetDevTapReserveName: + * @name: name of an existing tap device + * + * Set the value of virNetDevTapLastID to assure that any new tap + * device created with an autogenerated name will use a number higher + * than the number in the given tap device name. + * + * Returns nothing. + */ +void +virNetDevTapReserveName(const char *name) +{ + unsigned int id; + const char *idstr = NULL; + + + if (STRPREFIX(name, VIR_NET_GENERATED_TAP_PREFIX)) { + + VIR_INFO("marking device in use: '%s'", name); + + idstr = name + strlen(VIR_NET_GENERATED_TAP_PREFIX); + + if (virStrToLong_ui(idstr, NULL, 10, &id) >= 0) { + virMutexLock(&virNetDevTapCreateMutex); + + if (virNetDevTapLastID < (int)id) + virNetDevTapLastID = id; + + virMutexUnlock(&virNetDevTapCreateMutex); + } + } +} + + +/** + * virNetDevTapGenerateName: + * @ifname: pointer to pointer to string containing template + * + * generate a new (currently unused) name for a new tap device based + * on the templace string in @ifname - replace %d with + * ++virNetDevTapLastID, and keep trying new values until one is found + * that doesn't already exist, or we've tried 10000 different + * names. Once a usable name is found, replace the template with the + * actual name. + * + * Returns 0 on success, -1 on failure. + */ +static int +virNetDevTapGenerateName(char **ifname) +{ + int id; + double maxIDd = pow(10, IFNAMSIZ - 1 - strlen(VIR_NET_GENERATED_TAP_PREFIX)); + int maxID = INT_MAX; + int attempts = 0; + + if (maxIDd <= (double)INT_MAX) + maxID = (int)maxIDd; + + do { + g_autofree char *try = NULL; + + id = ++virNetDevTapLastID; + + /* reset before overflow */ + if (virNetDevTapLastID >= maxID) + virNetDevTapLastID = -1; + + try = g_strdup_printf(*ifname, id); + + if (!virNetDevExists(try)) { + g_free(*ifname); + *ifname = g_steal_pointer(&try); + return 0; + } + } while (++attempts < 10000); + + virReportError(VIR_ERR_INTERNAL_ERROR, + _("no unused %s names available"), + VIR_NET_GENERATED_TAP_PREFIX); + return -1; +} + + /** * virNetDevTapGetName: * @tapfd: a tun/tap file descriptor @@ -230,10 +319,22 @@ int virNetDevTapCreate(char **ifname, size_t tapfdSize, unsigned int flags) { - size_t i; + size_t i = 0; struct ifreq ifr; int ret = -1; - int fd; + int fd = 0; + + virMutexLock(&virNetDevTapCreateMutex); + + /* if ifname is "vnet%d", then auto-generate a name for the new + * device (the kernel could do this for us, but has a bad habit of + * immediately re-using names that have just been released, which + * can lead to race conditions). + */ + if (STREQ(*ifname, VIR_NET_GENERATED_TAP_PREFIX "%d") && + virNetDevTapGenerateName(ifname) < 0) { + goto cleanup; + } if (!tunpath) tunpath = "/dev/net/tun"; @@ -299,9 +400,11 @@ int virNetDevTapCreate(char **ifname, tapfd[i] = fd; } + VIR_INFO("created device: '%s'", *ifname); ret = 0; cleanup: + virMutexUnlock(&virNetDevTapCreateMutex); if (ret < 0) { VIR_FORCE_CLOSE(fd); while (i--) @@ -351,6 +454,7 @@ int virNetDevTapDelete(const char *ifname, goto cleanup; } + VIR_INFO("delete device: '%s'", ifname); ret = 0; cleanup: diff --git a/src/util/virnetdevtap.h b/src/util/virnetdevtap.h index c6bd9285ba..dea8aec3af 100644 --- a/src/util/virnetdevtap.h +++ b/src/util/virnetdevtap.h @@ -29,6 +29,10 @@ # define VIR_NETDEV_TAP_REQUIRE_MANUAL_CLEANUP 1 #endif +void +virNetDevTapReserveName(const char *name) + ATTRIBUTE_NONNULL(1); + int virNetDevTapCreate(char **ifname, const char *tunpath, int *tapfd, -- 2.26.2