On 12/3/2013 6:18 PM, John R Pierce wrote: > i found and modified a python script that parses the output of megacli > to create a useful status page in a format that could easily be then > grepped for errors for an alert. I'll try and dig it up after dinner. k, here it is. I call this lsi-raidinfo and keep it in /root/bin as its only meant for root's use. it assumes you've installed megacli. (note, there's 2 more scripts following) #!/usr/bin/python # megaclisas-status 0.6 # renamed lsi-raidinfo # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with Pulse 2; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, # MA 02110-1301, USA. # # Copyright (C) 2007-2009 Adam Cecile (Le_Vert) ## modified by johnpuskar@xxxxxxxxx 08/14/11 # fixed for LSI 9285-8e on Openfiler ## modified by pierce@xxxxxxxxxxxx 2012-01-05 # fixed for newer version of megacli output on RHEL6/CentOS6 # output format extended to show raid span-unit and rebuild % complete import os import re import sys if len(sys.argv) > 2: print 'Usage: lsi-raidinfo [-d]' sys.exit(1) # if argument -d, only print disk info printarray = True printcontroller = True if len(sys.argv) > 1: if sys.argv[1] == '-d': printarray = False printcontroller = False else: print 'Usage: lsi-raidinfo [-d]' sys.exit(1) # Get command output def getOutput(cmd): output = os.popen(cmd) lines = [] for line in output: if not re.match(r'^$',line.strip()): lines.append(line.strip()) return lines def returnControllerNumber(output): for line in output: if re.match(r'^Controller Count.*$',line.strip()): return int(line.split(':')[1].strip().strip('.')) def returnControllerModel(output): for line in output: if re.match(r'^Product Name.*$',line.strip()): return line.split(':')[1].strip() def returnArrayNumber(output): i = 0 for line in output: if re.match(r'^Virtual (Drive|Disk).*$',line.strip()): i += 1 return i def returnArrayInfo(output,controllerid,arrayid): id = 'c'+str(controllerid)+'u'+str(arrayid) # print 'DEBUG: id = '+str(id) operationlinennumber = False linenumber = 0 units = 1 type = 'JBOD' span = 0 size = 0 for line in output: if re.match(r'^RAID Level.*$',line.strip()): type = line.strip().split(':')[1].strip() type = 'RAID' + type.split(',')[0].split('-')[1].strip() # print 'debug: type = '+str(type) if re.match(r'^Number.*$',line.strip()): units = line.strip().split(':')[1].strip() if re.match(r'^Span Depth.*$',line.strip()): span = line.strip().split(':')[1].strip() if re.match(r'^Size.*$',line.strip()): # Size reported in MB if re.match(r'^.*MB$',line.strip().split(':')[1]): size = line.strip().split(':')[1].strip('MB').strip() size = str(int(round((float(size) / 1000))))+'G' # Size reported in TB elif re.match(r'^.*TB$',line.strip().split(':')[1]): size = line.strip().split(':')[1].strip('TB').strip() size = str(int(round((float(size) * 1000))))+'G' # Size reported in GB (default) else: size = line.strip().split(':')[1].strip('GB').strip() size = str(int(round((float(size)))))+'G' if re.match(r'^State.*$',line.strip()): state = line.strip().split(':')[1].strip() if re.match(r'^Ongoing Progresses.*$',line.strip()): operationlinennumber = linenumber linenumber += 1 if operationlinennumber: inprogress = output[operationlinennumber+1] else: inprogress = 'None' if span > 1: type = type+'0' type = type + ' ' + str(span) + 'x' + str(units) return [id,type,size,state,inprogress] def returnDiskInfo(output,controllerid,currentarrayid): arrayid = False oldarrayid = False olddiskid = False table = [] state = 'Offline' model = 'Unknown' enclnum = 'Unknown' slotnum = 'Unknown' enclsl = 'Unknown' firstDisk = True for line in output: if re.match(r'Firmware state: .*$',line.strip()): state = line.split(':')[1].strip() if re.match(r'Rebuild',state): cmd2 = '/opt/MegaRAID/MegaCli/MegaCli64 pdrbld showprog physdrv['+str(enclnum)+':'+str(slotnum)+'] a'+str(controllerid)+' nolog' ll = getOutput(cmd2) state += ' completed ' + re.sub(r'Rebuild Progress.*Completed', '', ll[0]).strip(); if re.match(r'Slot Number: .*$',line.strip()): slotnum = line.split(':')[1].strip() if re.match(r'Inquiry Data: .*$',line.strip()): model = line.split(':')[1].strip() model = re.sub(' +', ' ', model) model = re.sub('Hotspare Information', '', model).strip() #remove bogus output from firmware 12.12 if re.match(r"(Drive|Disk)'s postion: .*$",line.strip()): spans = line.split(',') span = re.sub(r"(Drive|Disk).*DiskGroup:", '', spans[0]).strip()+'-' span += spans[1].split(':')[1].strip()+'-' span += spans[2].split(':')[1].strip() if re.match(r'Enclosure Device ID: [0-9]+$',line.strip()): if firstDisk == True: firstDisk = False else: enclsl = str(enclnum)+':'+str(slotnum) table.append([str(enclsl), span, model, state]) span = 'x-x-x' enclnum = line.split(':')[1].strip() # Last disk of last array enclsl = str(enclnum)+':'+str(slotnum) table.append([str(enclsl), span, model, state]) arraytable = [] for disk in table: arraytable.append(disk) return arraytable cmd = '/opt/MegaRAID/MegaCli/MegaCli64 adpcount nolog' output = getOutput(cmd) controllernumber = returnControllerNumber(output) bad = False # List available controller if printcontroller: print '-- Controllers --' print '-- ID | Model' controllerid = 0 while controllerid < controllernumber: cmd = '/opt/MegaRAID/MegaCli/MegaCli64 adpallinfo a'+str(controllerid)+' nolog' output = getOutput(cmd) controllermodel = returnControllerModel(output) print 'c'+str(controllerid)+' | '+controllermodel controllerid += 1 print '' if printarray: controllerid = 0 print '-- Volumes --' print '-- ID | Type | Size | Status | InProgress' # print 'controller number'+str(controllernumber) while controllerid < controllernumber: arrayid = 0 cmd = '/opt/MegaRAID/MegaCli/MegaCli64 ldinfo lall a'+str(controllerid)+' nolog' output = getOutput(cmd) arraynumber = returnArrayNumber(output) # print 'array number'+str(arraynumber) while arrayid < arraynumber: cmd = '/opt/MegaRAID/MegaCli/MegaCli64 ldinfo l'+str(arrayid)+' a'+str(controllerid)+' nolog' # print 'DEBUG: running '+str(cmd) output = getOutput(cmd) # print 'DEBUG: output '+str(output) arrayinfo = returnArrayInfo(output,controllerid,arrayid) print 'volume '+arrayinfo[0]+' | '+arrayinfo[1]+' | '+arrayinfo[2]+' | '+arrayinfo[3]+' | '+arrayinfo[4] if not arrayinfo[3] == 'Optimal': bad = True arrayid += 1 controllerid += 1 print '' print '-- Disks --' print '-- Encl:Slot | vol-span-unit | Model | Status' controllerid = 0 while controllerid < controllernumber: arrayid = 0 cmd = '/opt/MegaRAID/MegaCli/MegaCli64 ldinfo lall a'+str(controllerid)+' nolog' output = getOutput(cmd) arraynumber = returnArrayNumber(output) while arrayid<arraynumber: #grab disk arrayId info cmd = '/opt/MegaRAID/MegaCli/MegaCli64 pdlist a'+str(controllerid)+' nolog' #print 'debug: running '+str(cmd) output = getOutput(cmd) arraydisk = returnDiskInfo(output,controllerid,arrayid) for array in arraydisk: print 'disk '+array[0]+' | '+array[1]+' | '+array[2]+' | '+array[3] arrayid += 1 controllerid += 1 if bad: print '\nThere is at least one disk/array in a NOT OPTIMAL state.' sys.exit(1) ======EOF====== and, this script is /root/bin/lsi-checkraid, which is meant to run like a pipe, lsi-raidinfo | lsi-checkraid, I run this from a monitor script #!/usr/bin/python # created by johnpuskar@xxxxxxxxx on 08/14/11 # rev 01 import os import re import sys if len(sys.argv) > 1: print 'Usage: accepts stdin from lsi-raidinfo' sys.exit(1) blnBadDisk = False infile = sys.stdin for line in infile: # print 'DEBUG!! checking line:'+str(line) if re.match(r'disk .*$',line.strip()): if re.match(r'^((?!Online, Spun Up|Online, Spun down|Hotspare, Spun Up|Hotspare, Spun down|Unconfigured\(good\), Spun Up).)*$',line.strip()): blnBadDisk = True badLine = line # print 'DEBUG!! bad disk found!' if re.match(r'volume ',line.strip()): if re.match(r'^((?!Optimal).)*$',line.strip()): # print 'DEBUG!! bad vol found!' blnBadDisk = True badLine = line if blnBadDisk == True: print 'RAID ERROR' # print badLine else: print 'RAID CLEAN' ======EOF======= and finally, this is bin/lsi-emailalerts, which uses the above to send an email alert... #!/bin/sh MAILTOADDR=root HOST=$(hostname -s| tr [a-z] [A-Z]) #get megaraid status info /root/bin/lsi-raidinfo | tee /tmp/lsi-raidinfo.txt | /root/bin/lsi-checkraid > /tmp/lsi-checkraid.txt #check megaraid status info if grep -qE "RAID ERROR" /tmp/lsi-checkraid.txt ; then cat /tmp/lsi-raidinfo.txt | mailx -s "$HOST Warning: failed disk or degraded array" $MAILTOADDR fi #check mpt status if (bin/mpt-status -s | awk '{print $3}' | egrep -qv "(OPTIMAL|ONLINE)") ; then bin/mpt-status | mailx -s "$HOST Warning: internal disk failure" $MAILTOADDR fi #rm -f /tmp/lsi-raidinfo.txt #rm -f /tmp/lsi-checkraid.txt exit 0 ========EOF========== -- john r pierce 37N 122W somewhere on the middle of the left coast _______________________________________________ CentOS mailing list CentOS@xxxxxxxxxx http://lists.centos.org/mailman/listinfo/centos