Re: How to check an inactive slave in a bond?

Linux Advanced Routing and Traffic Control

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Thank you for this very complete answer.

- Assuming I really have to implement some sort of inactive slave-link check. - Assuming it is acceptable to remove the inactive slave from the bound for the duration of the check.

Could you help me check my script? It works well for me but as I'm about to deploy it for production purpose I'd rather have a double check from you guys. (Note: I'm not reliable when it comes to (among other things) routing and network related topics)

Thx.

      Olivier

---------------------------------------------------------------------------------------
#!/bin/bash

# Check all nics enslaved in a bond.
# This is a way to check that all nics (including inactive ones) are working properly.
#

# Authors:
#   OA: Olivier Arsac
# History:
#   19/04/2007: OA scratch
#   31/06/2007: OA better handling of "free" IPs used during test
# TODO:
#   remove all TODOs from the script

#set -x

# try to be robust -> exit if a variable is not set (probably something went wrong)
set -o nounset

trap clean INT TERM

PATH=/exploit/local/sbin:/exploit/local/bin:/usr/kerberos/sbin:/usr/kerberos/bin:/usr/local/sbin:/usr/local/bin:/sbin:/bin:/usr/sbin:/usr/bin:/exploit/unix/prod/bin

com=`basename "$0"`
fullcom="$*"

usage() {
   echo "Usage: $com [-q] [-i ip] [-t target] [bond]"
   echo "  Check all nics enslaved to a bond."
echo " This is a way to check that all nics (including inactive ones) are working properly. You should check that periodicaly to avoid nasty surprises when your active nic stops working and you have to fallback to your (unchecked) slave one."
   echo "  exit 0 if all is OK (or if no bond is present)."
   echo "  -q: quiet (no verbose message for human operator)."
   echo "  -i: ip to use during check of inactive slaves."
   echo "  -t: target ip to ping during checks."
   echo "eg: $com"
   echo "  check all nics from all bonds."
   echo "eg: $com -q bond0"
   echo "  check silently all nics from bond0."
}

quiet=0
ip=""
target=""
while getopts "qi:t:" option
do
 case $option in
   q) quiet=1;;
   i) ip=$OPTARG;;
   t) target=$OPTARG;;
   *) usage; exit 1;;
 esac
done

# drop what has been parsed by getopts
shift `expr $OPTIND - 1`

# get args
if [ "$#" -ne 0 ]
then
 bonds="$@"
 for bond in $bonds; do
   if [ ! -f /proc/net/bonding/$bond ]; then
     echoe "Error: $bond is not a valid bond."
     exit 6
   fi
 done
else
 bonds=`ls /proc/net/bonding/ 2>/dev/null`
fi

#match a MAC address
re_mac="([a-zA-Z0-9][a-zA-Z0-9]:){5}[a-zA-Z0-9][a-zA-Z0-9]"
re_ip="(([0-9]{3}[.]){3}[0-9]{3}"

function echoe(){
 echo "$@" >/dev/stderr
}

function echoq(){
 if [ $quiet -eq 0 ] ; then echo "$@"; fi
}

# set a valid mac to a nic
# (must get a mac from the slaves in bond but one that is not currently in use by the bond)
get_free_mac_ret=""
function get_free_mac(){
 bond=$1
 nic=$2
 free_mac=""
macs=`grep "Permanent HW addr:" /proc/net/bonding/$bond | egrep -o $re_mac| tr 'a-z' 'A-Z'`
 bond_mac=`ifconfig $bond | grep HWaddr | egrep -o $re_mac`
 for mac in $macs; do
   if [ "$mac" != "$bond_mac" ]; then
     free_mac=$mac
   fi
 done
 get_free_mac_ret=$free_mac
}

# ping a target using a specified nic to test for IP connectivity
check_nic_ret=0
function check_nic(){
  target=$1
  if [ $# -ge 2 ]; then
    nic=$2
    ping -n -c 3 -I $nic $target 1>/dev/null 2>/dev/null
  else
    ping -n -c 3 $target 1>/dev/null 2>/dev/null
  fi
  if [ $? -ne 0 ]; then
    ma=""
    if [ $# -ge 3 ]; then
      ma="(using $3 as ip)"
fi echoq " [ERROR]" echo "$nic interface on $host is not working properly! $ma" > /dev/stderr
    check_nic_ret=1
  else
    echoq "      [OK]"
    check_nic_ret=0
  fi
}

# arping a target using a specified nic to test for IP connectivity
function exercise_nic_arp(){
  target=$1
  nic=$2
  src_ip=$3
  arping -c 3 -s "$src_ip" -I "$nic" "$target" 1>/dev/null 2>/dev/null
}

# reset a properly configured bond if someone interrupts the script
clean_bond=""
clean_nic=""
function clean(){
 echoq
 echoq "Script interrupted, restoring bond."
 if [ ! -z $clean_bond ] && [ ! -z $clean_nic ]; then
   ifenslave $clean_bond $clean_nic 2>/dev/null
 fi
 exit 2
}

host=`hostname -s`
table=200

if [ ! -d /proc/net/bonding ]; then
 echoe "Warning: Module bonding not loaded. Obviously no bond to check."
#trying to check a bond on a server where none is present is probably not realy an error -> exit 0 with a warning message
 exit 0
fi


if [ -z $target ]; then
 # no target given as parameter -> auto-detect
 # get the default gateway as a ping target
 target=`route -n | grep UG | awk '{print $2}'`
 if [ -z $target ]; then
echoe "Error: Unable to auto-detect the target to use during test (use -t?)."
   exit 3
 fi
fi


if [ -z "$ip" ]; then
 # no ip given as parameter -> auto-detect
 ip_b1=`host "${host}-bond-t1" | grep -o "$re_ip"`
 ip_b2=`host "${host}-bond-t2" | grep -o "$re_ip"`
 if [ -z "$ip_b1" ] && [ -z "$ip_b2" ]; then
   echoe "Error: Unable to auto-detect an ip to use during test (use -i?)."
   exit 4
 fi
fi

error_nb=0
for bond in $bonds
do
 bond=`basename $bond`
 echoq "checking bond $bond"
 active=`grep  "Active Slave" /proc/net/bonding/$bond |cut -d':' -f2`
 echoq -n "  active slave   :$active"
 check_nic $target
 error_nb=$(($error_nb + $check_nic_ret))
 slaves=`grep "Slave Interface:" /proc/net/bonding/$bond |cut -d':' -f2`
 slave_nb=0
 for slave in $slaves
 do
   if [ $slave != $active ]; then
# this nic is enslaved but not active. we want to check if it is ready to work (no cable or VPN trouble that will bite us only when the active slave will change)
     echoq -n "  inactive slave : $slave"
# search for a free mac in this bond (ie a real phy MAC that is not the one used by the bond)
     get_free_mac $bond $slave
     free_mac=$get_free_mac_ret
# store the bond/nic we are going to un-enslave (to be able de re-enslave it in case of interrupt)
     clean_bond=$bond
     clean_nic=$slave
     if [ -z "$ip" ]; then
       ip="$ip_b1" # TODO: use a clever way to match slave and free ip
     fi
     # free this nic from the bond
     ifenslave -d $bond $slave
     # set it up with a "free" mac
     ifconfig $slave hw ether $free_mac
     # set it up with a temp IP
     ifconfig $slave $ip netmask 255.255.255.255
     # it seems we need a small temporisation here or the rest may fail
     sleep 2
     exercise_nic_arp $target $slave $ip
     check_nic $target $slave $ip
     error_nb=$(($error_nb + $check_nic_ret))
     # clean this temporary ip/route
     ifconfig $slave down
     # re-enslave this nic to the bond
     ifenslave $bond $slave
     clean_bond=""; clean_nic=""
     slave_nb=$(($slave_nb + 1))
   fi
 done
 echoq -n "  bond           : $bond"
 check_nic $target $bond
 error_nb=$(($error_nb + $check_nic_ret))
 if [ $slave_nb -eq 0 ]; then
   echoe "Error: No inactive slave in $bond."
   exit 5
 fi
done

if [ $error_nb -ne 0 ]; then
 exit $((10 + $error_nb))
fi
exit 0



---------------------------------------------------------------------------------------






_______________________________________________
LARTC mailing list
LARTC@xxxxxxxxxxxxxxx
http://mailman.ds9a.nl/cgi-bin/mailman/listinfo/lartc

[Index of Archives]     [LARTC Home Page]     [Netfilter]     [Netfilter Development]     [Network Development]     [Bugtraq]     [GCC Help]     [Yosemite News]     [Linux Kernel]     [Fedora Users]
  Powered by Linux