Hi we had the need to manage guests bandwidth. Therefore I wrote a qemu hook script that achieves this and, if there is any interest in that, would like to contribute it to the project. This script will only work on linux hosts. It effectively limits the bandwidth a guest can _send_. It only somewhat effectively limits the bandwidth a guest can _receive_. While well behaving tcp connections slow down when packets are being delayed/dropped, with udp, we are facing a different situation. During my tests, udp was always way slower (factor 10) than actually configured (using iperf -u). Since only tcp matters to us in this situation, we consider it "good enough". Also: This mechanism does by no means protect you from ddos or the like. So what does the script do? The script uses iptables to mark packets from- and to a virtual machine. Depending on these marks, packets will be filtered into tc classes which limit bandwidth to a configurable amount. The configuration allows for groups of vms which share an amount of bandwidth while each machine has its guaranteed bandwidth in high network usage situations. Documentation is part of the script. If you like, feel free to include it. If you have any comments or questions, talk! :) Regards Dominik
#!/bin/bash # /etc/libvirt/hooks/qemu # qemu hook script for libvirtd # manages bandwidth limits for virtual machines # see http://libvirt.org/hooks.html # # based on work by horms: http://events.linuxfoundation.org/linuxcon2010/horman configfile=/etc/libvirt/bandwidth.conf # this configfile is supposed to hold these variables: # ceil<dev>="max bandwidth for dev" # example: # ceil_eth1="1000mbit" # refer to tc(8) for UNITS # <vmname><physdev>="<id> [<rate>] [<ceil>]" # "id" is an arbitrary id used as iptables fw mark and classid for tc, has to be unique, don't use 333. the first digit of this id determines the group into which a vm is put. read more below. # "rate" is guaranteed bandwidth, optional, if not specified, default is used (see below) # "ceil" is maximum bandwidth when other classes in this hierarchy don't use it all, optional, if not specified, default is used (see below). each vm in the same group should have the same ceil. otherwise, the highest value is used. # example: # vm1_eth1="10 100mbit 1000mbit" ######################## # example configuration: ######################## # # machine setup: # eth0 - br0 - vnet[012] # # vms: # cliff # jason # rob # # cliff and rob have to share 30mbit, 15 is guaranteed to each # jason, since he's the coolest, can have 30 on its own # # configfile to achieve this: # # ceil_eth0="1000mbit" # cliff_eth0="10 15mbit 30mbit" # rob_eth0="11 15mbit 30mbit" # jason_eth0="20 30mbit 30mbit" # # this means since cliff and robs "id" both start with 1, they join group 1 # jasons id starts with 2, so he is in group 2 # # this will form the following tc hierarchy for eth0 and the corresponding virtual device of the vm: # # /--- 1:10 rate 15 ceil 30 # / # /--- 1:1111 rate 30 ceil 30 # / \ # / \--- 1:11 rate 15 ceil 30 # | # | /--- 1:20 rate 30 ceil 30 # | / # root--- 1:2222 rate 30 ceil 30 # | # \ # \ # \--- 1:333 rate 1000 ceil 1000 # ### end example configuration defaultrate=100mbit defaultceil=1000mbit defaultglobalceil=1000mbit # no changes past this line if [ -r $configfile ]; then source $configfile else echo "configfile $configfile not found. exiting." exit 1 fi limits_start() { virtdev=$(xpath $domaincfg "//interface[$i]/target/@dev" 2>/dev/null|cut -d \" -f 2) if [ -n "$virtdev" ]; then sourcedev=$(xpath $domaincfg "//interface[$i][@type='bridge']/source/@bridge" 2>/dev/null|cut -d \" -f 2) if [ -n "$sourcedev" ]; then # assuming the first device added to the bridge is the physical device physdev=$(brctl show|grep ^$sourcedev|egrep -o "[a-z]*[0-9]*$") id=$(eval "echo \$${domainname}_${physdev}"|cut -d " " -f 1) groupid=$(eval "echo \$${domainname}_${physdev}"|cut -b 1) rate=$(eval "echo \$${domainname}_${physdev}"|cut -d " " -f 2) ceil=$(eval "echo \$${domainname}_${physdev}"|cut -d " " -f 3) gobalceil=$(eval "echo \$ceil_$physdev") if [ -z "$id" -o "$id" -lte 0 ]; then echo "no id configured for domain $domainname. exiting qemu hook" >&2 exit 1 fi [ -z "$rate" ] && rate=$defaultrate [ -z "$ceil" ] && ceil=$defaultceil [ -z "$globalceil" ] && globalceil=$defaultglobalceil # mark packets with iptables iptables -t mangle -A FORWARD -m physdev --physdev-in $virtdev --physdev-out $physdev -j MARK --set-mark $id iptables -t mangle -A FORWARD -m physdev --physdev-in $physdev --physdev-out $virtdev -j MARK --set-mark $id # root qdiscs on virt and phys device # these commands are most likely going to fail since the root qdisc will probably exist tc qdisc add dev $physdev root handle 1: htb default 333 tc qdisc add dev $virtdev root handle 1: htb default 333 # default classes for unmarked traffic # these are also likely to fail tc class add dev $physdev parent 1: classid 1:333 htb rate $globalceil ceil $globalceil tc class add dev $virtdev parent 1: classid 1:333 htb rate $globalceil ceil $globalceil ##### create first hierarchical level ##### we rely on having configfile like ##### <domain>_<device>=["']<id> <rate> <ceil>["'] ##### the first digit of the id will serve as the "group" id for i in $(grep _$physdev $configfile | grep -v ^#| cut -d '=' -f 2|cut -b 2|sort|uniq); do grouplimit=$(grep _$interface $configfile | grep -v ^#| cut -d '=' -f 2|egrep '[[:space:]]'|egrep "^.$i"|cut -d ' ' -f 3|sort -n|tail -n 1|sed 's/.$//') tc class add dev $physdev parent 1: classid 1:$i$i$i$i htb rate $grouplimit ceil $grouplimit tc class add dev $virtdev parent 1: classid 1:$i$i$i$i htb rate $grouplimit ceil $grouplimit done # leaf classes with rate and ceil as configured tc class add dev $physdev parent 1:$groupid$groupid$groupid$groupid classid 1:$id htb rate $rate ceil $ceil tc class add dev $virtdev parent 1:$groupid$groupid$groupid$groupid classid 1:$id htb rate $rate ceil $ceil # filter on fw-mark to classify into classid as defined above tc filter add dev $physdev prio $id protocol ip parent 1: handle $id fw flowid 1:$id tc filter add dev $virtdev prio $id protocol ip parent 1: handle $id fw flowid 1:$id fi fi } limits_stop() { virtdev=$(xpath $domaincfg "//interface[$i]/target/@dev" 2>/dev/null|cut -d \" -f 2) if [ -n "$virtdev" ]; then sourcedev=$(xpath $domaincfg "//interface[$i][@type='bridge']/source/@bridge" 2>/dev/null|cut -d \" -f 2) if [ -n "$sourcedev" ]; then # assuming the first device added to the bridge is the physical device physdev=$(brctl show|grep ^$sourcedev|egrep -o "[a-z]*[0-9]*$") groupid=$(eval "echo \$${domainname}_${physdev}"|cut -b 1) id=$(eval "echo \$${domainname}_${physdev}"|cut -d " " -f 1) iptables -t mangle -D FORWARD -m physdev --physdev-in $virtdev --physdev-out $physdev -j MARK --set-mark $id iptables -t mangle -D FORWARD -m physdev --physdev-in $physdev --physdev-out $virtdev -j MARK --set-mark $id # removing from physdev is sufficient since virtdev does not exist any more when this is called tc filter del dev $physdev protocol ip pref $id fw tc class del dev $physdev parent 1:$groupid$groupid$groupid$groupid classid 1:$id fi fi } domainname=$1 domaintask=$2 domaincfg=/tmp/libvirt.qemuhook.intermediate.$domainname.$$ # the entire xml config is on stdin cat - > $domaincfg numifaces=$(egrep -o "<interface" $domaincfg | wc -l) case "$domaintask" in # hook is called with <domainname> start begin - start) for i in $(seq $numifaces); do limits_start $i done ;; # hook is called with <domainname> stopped end - stopped) for i in $(seq $numifaces); do limits_stop $i done ;; *) echo "qemu hook called with unexpected options $*" >&2 ;; esac
-- libvir-list mailing list libvir-list@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/libvir-list