--- files/nagios/client/check_cron.cfg | 1 - files/nagios/client/check_disk.cfg | 2 - files/nagios/client/check_lock.cfg | 1 - files/nagios/client/check_mirrorlist_cache.cfg | 1 - files/nagios/client/check_postfix_queue.cfg | 1 - files/nagios/client/check_raid.cfg | 1 - files/nagios/client/check_swap.cfg | 1 - files/nagios/client/nrpe.cfg | 223 --------------------- files/nagios/client/scripts/check_lock | 17 -- files/nagios/client/scripts/check_postfix_queue | 49 ----- files/nagios/client/scripts/check_raid.py | 45 ----- handlers/restart_services.yml | 3 - playbooks/groups/backup-server.yml | 2 +- playbooks/groups/badges-backend.yml | 2 +- playbooks/groups/badges-web.yml | 2 +- playbooks/groups/beaker.yml | 2 +- playbooks/groups/gallery.yml | 2 +- playbooks/groups/kernel-qa.yml | 2 +- playbooks/groups/keyserver.yml | 2 +- playbooks/groups/koji-hub.yml | 2 +- playbooks/groups/mailman.yml | 2 +- playbooks/groups/mirrorlist.yml | 2 +- playbooks/groups/postgresl-server.yml | 2 +- playbooks/groups/releng.yml | 4 +- playbooks/groups/taskbot.yml | 2 +- playbooks/groups/virthost.yml | 2 +- roles/nagios_client/files/check_cron.cfg | 1 + roles/nagios_client/files/check_disk.cfg | 2 + roles/nagios_client/files/check_lock.cfg | 1 + .../nagios_client/files/check_mirrorlist_cache.cfg | 1 + roles/nagios_client/files/check_postfix_queue.cfg | 1 + roles/nagios_client/files/check_raid.cfg | 1 + roles/nagios_client/files/check_swap.cfg | 1 + roles/nagios_client/files/nrpe.cfg | 223 +++++++++++++++++++++ roles/nagios_client/files/scripts/check_lock | 17 ++ .../files/scripts/check_postfix_queue | 49 +++++ roles/nagios_client/files/scripts/check_raid.py | 45 +++++ roles/nagios_client/handlers/main.yml | 3 + roles/nagios_client/tasks/main.yml | 63 ++++++ tasks/nagios_client.yml | 63 ------ 40 files changed, 424 insertions(+), 422 deletions(-) delete mode 100644 files/nagios/client/check_cron.cfg delete mode 100644 files/nagios/client/check_disk.cfg delete mode 100644 files/nagios/client/check_lock.cfg delete mode 100644 files/nagios/client/check_mirrorlist_cache.cfg delete mode 100644 files/nagios/client/check_postfix_queue.cfg delete mode 100644 files/nagios/client/check_raid.cfg delete mode 100644 files/nagios/client/check_swap.cfg delete mode 100644 files/nagios/client/nrpe.cfg delete mode 100755 files/nagios/client/scripts/check_lock delete mode 100755 files/nagios/client/scripts/check_postfix_queue delete mode 100755 files/nagios/client/scripts/check_raid.py create mode 100644 roles/nagios_client/files/check_cron.cfg create mode 100644 roles/nagios_client/files/check_disk.cfg create mode 100644 roles/nagios_client/files/check_lock.cfg create mode 100644 roles/nagios_client/files/check_mirrorlist_cache.cfg create mode 100644 roles/nagios_client/files/check_postfix_queue.cfg create mode 100644 roles/nagios_client/files/check_raid.cfg create mode 100644 roles/nagios_client/files/check_swap.cfg create mode 100644 roles/nagios_client/files/nrpe.cfg create mode 100755 roles/nagios_client/files/scripts/check_lock create mode 100755 roles/nagios_client/files/scripts/check_postfix_queue create mode 100755 roles/nagios_client/files/scripts/check_raid.py create mode 100644 roles/nagios_client/handlers/main.yml create mode 100644 roles/nagios_client/tasks/main.yml delete mode 100644 tasks/nagios_client.yml diff --git a/files/nagios/client/check_cron.cfg b/files/nagios/client/check_cron.cfg deleted file mode 100644 index e01f407..0000000 --- a/files/nagios/client/check_cron.cfg +++ /dev/null @@ -1 +0,0 @@ -command[check_cron]=/usr/lib64/nagios/plugins/check_procs -c 1:10 -C 'crond' -u root diff --git a/files/nagios/client/check_disk.cfg b/files/nagios/client/check_disk.cfg deleted file mode 100644 index d5d3290..0000000 --- a/files/nagios/client/check_disk.cfg +++ /dev/null @@ -1,2 +0,0 @@ -command[check_disk_/]=/usr/lib64/nagios/plugins/check_disk -w 15% -c 10% -p / -command[check_disk_/boot]=/usr/lib64/nagios/plugins/check_disk -w 15% -c 10% -p /boot diff --git a/files/nagios/client/check_lock.cfg b/files/nagios/client/check_lock.cfg deleted file mode 100644 index 9525f9f..0000000 --- a/files/nagios/client/check_lock.cfg +++ /dev/null @@ -1 +0,0 @@ -command[check_lock]=/usr/lib64/nagios/plugins/check_lock diff --git a/files/nagios/client/check_mirrorlist_cache.cfg b/files/nagios/client/check_mirrorlist_cache.cfg deleted file mode 100644 index bd4b171..0000000 --- a/files/nagios/client/check_mirrorlist_cache.cfg +++ /dev/null @@ -1 +0,0 @@ -command[check_mirrorlist_cache]=/usr/lib64/nagios/plugins/check_file_age -w 14400 -c 129600 -f /var/lib/mirrormanager/mirrorlist_cache.pkl diff --git a/files/nagios/client/check_postfix_queue.cfg b/files/nagios/client/check_postfix_queue.cfg deleted file mode 100644 index 5574698..0000000 --- a/files/nagios/client/check_postfix_queue.cfg +++ /dev/null @@ -1 +0,0 @@ -command[check_postfix_queue]=/usr/lib64/nagios/plugins/check_postfix_queue -w 2 -c 5 diff --git a/files/nagios/client/check_raid.cfg b/files/nagios/client/check_raid.cfg deleted file mode 100644 index 5c15d45..0000000 --- a/files/nagios/client/check_raid.cfg +++ /dev/null @@ -1 +0,0 @@ -command[check_raid]=/usr/lib64/nagios/plugins/check_raid.py diff --git a/files/nagios/client/check_swap.cfg b/files/nagios/client/check_swap.cfg deleted file mode 100644 index bc4f19d..0000000 --- a/files/nagios/client/check_swap.cfg +++ /dev/null @@ -1 +0,0 @@ -command[check_swap]=/usr/lib64/nagios/plugins/check_swap -w 15% -c 10% diff --git a/files/nagios/client/nrpe.cfg b/files/nagios/client/nrpe.cfg deleted file mode 100644 index 6933f78..0000000 --- a/files/nagios/client/nrpe.cfg +++ /dev/null @@ -1,223 +0,0 @@ -############################################################################# -# Sample NRPE Config File -# Written by: Ethan Galstad (nagios@xxxxxxxxxx) -# -# Last Modified: 11-23-2007 -# -# NOTES: -# This is a sample configuration file for the NRPE daemon. It needs to be -# located on the remote host that is running the NRPE daemon, not the host -# from which the check_nrpe client is being executed. -############################################################################# - - -# LOG FACILITY -# The syslog facility that should be used for logging purposes. - -log_facility=daemon - - - -# PID FILE -# The name of the file in which the NRPE daemon should write it's process ID -# number. The file is only written if the NRPE daemon is started by the root -# user and is running in standalone mode. - -pid_file=/var/run/nrpe/nrpe.pid - - - -# PORT NUMBER -# Port number we should wait for connections on. -# NOTE: This must be a non-priviledged port (i.e. > 1024). -# NOTE: This option is ignored if NRPE is running under either inetd or xinetd - -server_port=5666 - - - -# SERVER ADDRESS -# Address that nrpe should bind to in case there are more than one interface -# and you do not want nrpe to bind on all interfaces. -# NOTE: This option is ignored if NRPE is running under either inetd or xinetd - -#server_address=127.0.0.1 - - - -# NRPE USER -# This determines the effective user that the NRPE daemon should run as. -# You can either supply a username or a UID. -# -# NOTE: This option is ignored if NRPE is running under either inetd or xinetd - -nrpe_user=nrpe - - - -# NRPE GROUP -# This determines the effective group that the NRPE daemon should run as. -# You can either supply a group name or a GID. -# -# NOTE: This option is ignored if NRPE is running under either inetd or xinetd - -nrpe_group=nrpe - - - -# ALLOWED HOST ADDRESSES -# This is an optional comma-delimited list of IP address or hostnames -# that are allowed to talk to the NRPE daemon. Network addresses with a bit mask -# (i.e. 192.168.1.0/24) are also supported. Hostname wildcards are not currently -# supported. -# -# Note: The daemon only does rudimentary checking of the client's IP -# address. I would highly recommend adding entries in your /etc/hosts.allow -# file to allow only the specified host to connect to the port -# you are running this daemon on. -# -# NOTE: This option is ignored if NRPE is running under either inetd or xinetd - - -allowed_hosts=10.5.126.41,192.168.1.10,192.168.1.20,209.132.181.35 - - - -# COMMAND ARGUMENT PROCESSING -# This option determines whether or not the NRPE daemon will allow clients -# to specify arguments to commands that are executed. This option only works -# if the daemon was configured with the --enable-command-args configure script -# option. -# -# *** ENABLING THIS OPTION IS A SECURITY RISK! *** -# Read the SECURITY file for information on some of the security implications -# of enabling this variable. -# -# Values: 0=do not allow arguments, 1=allow command arguments - -dont_blame_nrpe=0 - - - -# COMMAND PREFIX -# This option allows you to prefix all commands with a user-defined string. -# A space is automatically added between the specified prefix string and the -# command line from the command definition. -# -# *** THIS EXAMPLE MAY POSE A POTENTIAL SECURITY RISK, SO USE WITH CAUTION! *** -# Usage scenario: -# Execute restricted commmands using sudo. For this to work, you need to add -# the nagios user to your /etc/sudoers. An example entry for alllowing -# execution of the plugins from might be: -# -# nagios ALL=(ALL) NOPASSWD: /usr/lib/nagios/plugins/ -# -# This lets the nagios user run all commands in that directory (and only them) -# without asking for a password. If you do this, make sure you don't give -# random users write access to that directory or its contents! - -# command_prefix=/usr/bin/sudo - - - -# DEBUGGING OPTION -# This option determines whether or not debugging messages are logged to the -# syslog facility. -# Values: 0=debugging off, 1=debugging on - -debug=0 - - - -# COMMAND TIMEOUT -# This specifies the maximum number of seconds that the NRPE daemon will -# allow plugins to finish executing before killing them off. - -command_timeout=60 - - - -# CONNECTION TIMEOUT -# This specifies the maximum number of seconds that the NRPE daemon will -# wait for a connection to be established before exiting. This is sometimes -# seen where a network problem stops the SSL being established even though -# all network sessions are connected. This causes the nrpe daemons to -# accumulate, eating system resources. Do not set this too low. - -connection_timeout=300 - - - -# WEEK RANDOM SEED OPTION -# This directive allows you to use SSL even if your system does not have -# a /dev/random or /dev/urandom (on purpose or because the necessary patches -# were not applied). The random number generator will be seeded from a file -# which is either a file pointed to by the environment valiable $RANDFILE -# or $HOME/.rnd. If neither exists, the pseudo random number generator will -# be initialized and a warning will be issued. -# Values: 0=only seed from /dev/[u]random, 1=also seed from weak randomness - -#allow_weak_random_seed=1 - - - -# INCLUDE CONFIG FILE -# This directive allows you to include definitions from an external config file. - -#include=<somefile.cfg> - - - -# INCLUDE CONFIG DIRECTORY -# This directive allows you to include definitions from config files (with a -# .cfg extension) in one or more directories (with recursion). - -include_dir=/etc/nrpe.d/ - - - -# COMMAND DEFINITIONS -# Command definitions that this daemon will run. Definitions -# are in the following format: -# -# command[<command_name>]=<command_line> -# -# When the daemon receives a request to return the results of <command_name> -# it will execute the command specified by the <command_line> argument. -# -# Unlike Nagios, the command line cannot contain macros - it must be -# typed exactly as it should be executed. -# -# Note: Any plugins that are used in the command lines must reside -# on the machine that this daemon is running on! The examples below -# assume that you have plugins installed in a /usr/local/nagios/libexec -# directory. Also note that you will have to modify the definitions below -# to match the argument format the plugins expect. Remember, these are -# examples only! - - -# The following examples use hardcoded command arguments... - -command[check_users]=/usr/lib64/nagios/plugins/check_users -w 5 -c 10 -command[check_load]=/usr/lib64/nagios/plugins/check_load -w 15,10,5 -c 30,25,20 -command[check_hda1]=/usr/lib64/nagios/plugins/check_disk -w 20% -c 10% -p /dev/hda1 -command[check_zombie_procs]=/usr/lib64/nagios/plugins/check_procs -w 5 -c 10 -s Z -command[check_total_procs]=/usr/lib64/nagios/plugins/check_procs -w {{ nrpe_procs_warn }} -c {{ nrpe_procs_crit }} - - -# The following examples allow user-supplied arguments and can -# only be used if the NRPE daemon was compiled with support for -# command arguments *AND* the dont_blame_nrpe directive in this -# config file is set to '1'. This poses a potential security risk, so -# make sure you read the SECURITY file before doing this. - -#command[check_users]=/usr/lib64/nagios/plugins/check_users -w $ARG1$ -c $ARG2$ -#command[check_load]=/usr/lib64/nagios/plugins/check_load -w $ARG1$ -c $ARG2$ -#command[check_disk]=/usr/lib64/nagios/plugins/check_disk -w $ARG1$ -c $ARG2$ -p $ARG3$ -#command[check_procs]=/usr/lib64/nagios/plugins/check_procs -w $ARG1$ -c $ARG2$ -s $ARG3$ - - -# NEVER ADD ANYTHING HERE - ANY ENTRIES TO NRPE SHOULD BE in .cfg files in /etc/nrpe.d/ - -# NEVER NEVER NEVER -# diff --git a/files/nagios/client/scripts/check_lock b/files/nagios/client/scripts/check_lock deleted file mode 100755 index 1a58e95..0000000 --- a/files/nagios/client/scripts/check_lock +++ /dev/null @@ -1,17 +0,0 @@ -#!/usr/bin/python - -import fcntl -import sys - -try: - f = open('/mnt/koji/.nagios_test', 'r') - f.close() - f = open('/mnt/koji/.nagios_test', 'w') -except IOError: - print "Could not create file" - sys.exit(2) - -fcntl.flock(f, fcntl.LOCK_EX) -f.close() -print "File Locked Successfully" -sys.exit(0) diff --git a/files/nagios/client/scripts/check_postfix_queue b/files/nagios/client/scripts/check_postfix_queue deleted file mode 100755 index 44ab444..0000000 --- a/files/nagios/client/scripts/check_postfix_queue +++ /dev/null @@ -1,49 +0,0 @@ -#!/bin/bash -# -# 19-07-2010 -# Author: Cherwin Nooitmeer <cherwin@xxxxxxxxx> -# - -# exit codes -e_ok=0 -e_warning=1 -e_critical=2 -e_unknown=3 - -# regular expression that matches queue IDs (e.g. D71EF7AC80F8) -queue_id='^[A-F0-9][A-F0-9][A-F0-9][A-F0-9][A-F0-9][A-F0-9][A-F0-9][A-F0-9][A-F0-9][A-F0-9]' - -usage="Invalid command line usage" - -if [ -z $1 ]; then - echo $usage - exit $e_unknown -fi - -while getopts ":w:c:" options -do - case $options in - w ) warning=$OPTARG ;; - c ) critical=$OPTARG ;; - * ) echo $usage - exit $e_unknown ;; - esac -done - -# determine queue size -qsize=$(mailq | egrep -c $queue_id) -if [ -z $qsize ] -then - exit $e_unknown -fi - -if [ $qsize -ge $critical ]; then - retval=$e_critical -elif [ $qsize -ge $warning ]; then - retval=$e_warning -elif [ $qsize -lt $warning ]; then - retval=$e_ok -fi - -echo "$qsize mail(s) in queue | mail_queue=$qsize" -exit $retval diff --git a/files/nagios/client/scripts/check_raid.py b/files/nagios/client/scripts/check_raid.py deleted file mode 100755 index 48cddd9..0000000 --- a/files/nagios/client/scripts/check_raid.py +++ /dev/null @@ -1,45 +0,0 @@ -#!/usr/bin/env python -# -# very simple python script to parse out /proc/mdstat -# and give results for nagios to monitor -# - -import sys -import string - -devices = [] - -try: - mdstat = string.split(open('/proc/mdstat').read(), '\n') -except IOError: - # seems we have no software raid on this machines - sys.exit(0) - -error = "" -i = 0 -for line in mdstat: - if line[0:2] == 'md': - device = string.split(line)[0] - devices.append(device) - status = string.split(mdstat[i+1])[3] - if string.count(status, "_"): - # see if we can figure out what's going on - err = string.split(mdstat[i+2]) - msg = "device=%s status=%s" % (device, status) - if len(err) > 0: - msg = msg + " rebuild=%s" % err[0] - - if not error: - error = msg - else: - error = error + ", " + msg - i = i + 1 - -if not error: - print "DEVICES %s OK" % " ".join(devices) - sys.exit(0) - -else: - print error - sys.exit(2) - diff --git a/handlers/restart_services.yml b/handlers/restart_services.yml index 32c11e3..8b90f0f 100644 --- a/handlers/restart_services.yml +++ b/handlers/restart_services.yml @@ -56,9 +56,6 @@ - name: restart nfslock action: service name=nfslock state=restarted -- name: restart nrpe - action: service name=nrpe state=restarted - - name: restart ntpd action: service name=ntpd state=restarted diff --git a/playbooks/groups/backup-server.yml b/playbooks/groups/backup-server.yml index 0820d9f..2b30af4 100644 --- a/playbooks/groups/backup-server.yml +++ b/playbooks/groups/backup-server.yml @@ -16,6 +16,7 @@ roles: - rkhunter - denyhosts + - nagios_client tasks: - include: $tasks/hosts.yml @@ -25,7 +26,6 @@ - include: $tasks/2fa_client.yml - include: $tasks/motd.yml - include: $tasks/sudo.yml - - include: $tasks/nagios_client.yml - include: $tasks/mysql_server.yml - include: $tasks/bacula_server.yml - include: $tasks/rdiff_backup_server.yml diff --git a/playbooks/groups/badges-backend.yml b/playbooks/groups/badges-backend.yml index 9d599c1..59b145e 100644 --- a/playbooks/groups/badges-backend.yml +++ b/playbooks/groups/badges-backend.yml @@ -32,6 +32,7 @@ roles: - rkhunter - denyhosts + - nagios_client tasks: - include: $tasks/hosts.yml @@ -41,7 +42,6 @@ - include: $tasks/2fa_client.yml - include: $tasks/motd.yml - include: $tasks/sudo.yml - - include: $tasks/nagios_client.yml - include: $tasks/openvpn_client.yml only_if: "'$env' != 'staging'" - include: $tasks/fedmsg_base.yml diff --git a/playbooks/groups/badges-web.yml b/playbooks/groups/badges-web.yml index 4fddc4e..6c33548 100644 --- a/playbooks/groups/badges-web.yml +++ b/playbooks/groups/badges-web.yml @@ -35,6 +35,7 @@ roles: - rkhunter - denyhosts + - nagios_client tasks: - include: $tasks/hosts.yml @@ -44,7 +45,6 @@ - include: $tasks/2fa_client.yml - include: $tasks/motd.yml - include: $tasks/sudo.yml - - include: $tasks/nagios_client.yml - include: $tasks/openvpn_client.yml only_if: "'$env' != 'staging'" - include: $tasks/fedmsg_base.yml diff --git a/playbooks/groups/beaker.yml b/playbooks/groups/beaker.yml index 43606c1..5ec502e 100644 --- a/playbooks/groups/beaker.yml +++ b/playbooks/groups/beaker.yml @@ -31,6 +31,7 @@ roles: - rkhunter - denyhosts + - nagios_client tasks: # this is how you include other task lists @@ -42,7 +43,6 @@ - include: $tasks/collectd/client.yml - include: $tasks/motd.yml - include: $tasks/sudo.yml - - include: $tasks/nagios_client.yml handlers: - include: $handlers/restart_services.yml diff --git a/playbooks/groups/gallery.yml b/playbooks/groups/gallery.yml index 141e613..152455a 100644 --- a/playbooks/groups/gallery.yml +++ b/playbooks/groups/gallery.yml @@ -32,6 +32,7 @@ roles: - rkhunter - denyhosts + - nagios_client tasks: - include: $tasks/hosts.yml @@ -41,7 +42,6 @@ - include: $tasks/2fa_client.yml - include: $tasks/motd.yml - include: $tasks/sudo.yml - - include: $tasks/nagios_client.yml - include: $tasks/fedmsg_base.yml - include: $tasks/apache.yml diff --git a/playbooks/groups/kernel-qa.yml b/playbooks/groups/kernel-qa.yml index a99e3b5..b78c67e 100644 --- a/playbooks/groups/kernel-qa.yml +++ b/playbooks/groups/kernel-qa.yml @@ -15,6 +15,7 @@ roles: - rkhunter - denyhosts + - nagios_client tasks: # this is how you include other task lists @@ -25,7 +26,6 @@ - include: $tasks/2fa_client.yml - include: $tasks/motd.yml - include: $tasks/sudo.yml - - include: $tasks/nagios_client.yml handlers: diff --git a/playbooks/groups/keyserver.yml b/playbooks/groups/keyserver.yml index 9fc5066..367a189 100644 --- a/playbooks/groups/keyserver.yml +++ b/playbooks/groups/keyserver.yml @@ -32,6 +32,7 @@ roles: - rkhunter - denyhosts + - nagios_client tasks: - include: $tasks/hosts.yml @@ -41,7 +42,6 @@ - include: $tasks/2fa_client.yml - include: $tasks/motd.yml - include: $tasks/sudo.yml - - include: $tasks/nagios_client.yml - include: $tasks/fedmsg_base.yml - include: $tasks/apache.yml - include: $tasks/keyserver.yml diff --git a/playbooks/groups/koji-hub.yml b/playbooks/groups/koji-hub.yml index 2ede558..fd077ce 100644 --- a/playbooks/groups/koji-hub.yml +++ b/playbooks/groups/koji-hub.yml @@ -33,6 +33,7 @@ roles: - rkhunter - denyhosts + - nagios_client tasks: - include: $tasks/hosts.yml @@ -42,7 +43,6 @@ - include: $tasks/2fa_client.yml - include: $tasks/motd.yml - include: $tasks/sudo.yml - - include: $tasks/nagios_client.yml - include: $tasks/collectd/client.yml - include: $tasks/koji/koji_hub.yml diff --git a/playbooks/groups/mailman.yml b/playbooks/groups/mailman.yml index c90f1c5..345aa37 100644 --- a/playbooks/groups/mailman.yml +++ b/playbooks/groups/mailman.yml @@ -31,6 +31,7 @@ roles: - rkhunter - denyhosts + - nagios_client tasks: # this is how you include other task lists @@ -42,7 +43,6 @@ - include: $tasks/collectd/client.yml - include: $tasks/motd.yml - include: $tasks/sudo.yml - - include: $tasks/nagios_client.yml handlers: - include: $handlers/restart_services.yml diff --git a/playbooks/groups/mirrorlist.yml b/playbooks/groups/mirrorlist.yml index e28e034..8d5b2f8 100644 --- a/playbooks/groups/mirrorlist.yml +++ b/playbooks/groups/mirrorlist.yml @@ -41,6 +41,7 @@ roles: - rkhunter - denyhosts + - nagios_client tasks: # this is how you include other task lists @@ -53,7 +54,6 @@ - include: $tasks/openvpn_client.yml - include: $tasks/motd.yml - include: $tasks/sudo.yml - - include: $tasks/nagios_client.yml - include: $tasks/apache.yml - include: $tasks/mod_wsgi.yml - include: $tasks/geoip.yml diff --git a/playbooks/groups/postgresl-server.yml b/playbooks/groups/postgresl-server.yml index 92fdbde..8e8822b 100644 --- a/playbooks/groups/postgresl-server.yml +++ b/playbooks/groups/postgresl-server.yml @@ -33,6 +33,7 @@ roles: - rkhunter - denyhosts + - nagios_client tasks: - include: $tasks/hosts.yml @@ -42,7 +43,6 @@ - include: $tasks/2fa_client.yml - include: $tasks/motd.yml - include: $tasks/sudo.yml - - include: $tasks/nagios_client.yml - include: $tasks/collectd/client.yml - include: $tasks/postgresql_server.yml diff --git a/playbooks/groups/releng.yml b/playbooks/groups/releng.yml index 649cfbb..f642840 100644 --- a/playbooks/groups/releng.yml +++ b/playbooks/groups/releng.yml @@ -31,10 +31,12 @@ - ${private}/vars.yml - ${vars}/${ansible_distribution}.yml + roles: + - nagios_client + tasks: - include: $tasks/koji/releng_config.yml - include: $tasks/motd.yml - - include: $tasks/nagios_client.yml handlers: - include: $handlers/restart_services.yml diff --git a/playbooks/groups/taskbot.yml b/playbooks/groups/taskbot.yml index 3d57356..7641266 100644 --- a/playbooks/groups/taskbot.yml +++ b/playbooks/groups/taskbot.yml @@ -31,6 +31,7 @@ roles: - rkhunter - denyhosts + - nagios_client tasks: # this is how you include other task lists @@ -42,7 +43,6 @@ - include: $tasks/collectd/client.yml - include: $tasks/motd.yml - include: $tasks/sudo.yml - - include: $tasks/nagios_client.yml handlers: - include: $handlers/restart_services.yml diff --git a/playbooks/groups/virthost.yml b/playbooks/groups/virthost.yml index 24761a4..763002b 100644 --- a/playbooks/groups/virthost.yml +++ b/playbooks/groups/virthost.yml @@ -15,6 +15,7 @@ roles: - rkhunter - denyhosts + - nagios_client tasks: - include: $tasks/hosts.yml @@ -24,7 +25,6 @@ - include: $tasks/2fa_client.yml - include: $tasks/motd.yml - include: $tasks/sudo.yml - - include: $tasks/nagios_client.yml - include: $tasks/collectd/client.yml - include: $tasks/virthost.yml diff --git a/roles/nagios_client/files/check_cron.cfg b/roles/nagios_client/files/check_cron.cfg new file mode 100644 index 0000000..e01f407 --- /dev/null +++ b/roles/nagios_client/files/check_cron.cfg @@ -0,0 +1 @@ +command[check_cron]=/usr/lib64/nagios/plugins/check_procs -c 1:10 -C 'crond' -u root diff --git a/roles/nagios_client/files/check_disk.cfg b/roles/nagios_client/files/check_disk.cfg new file mode 100644 index 0000000..d5d3290 --- /dev/null +++ b/roles/nagios_client/files/check_disk.cfg @@ -0,0 +1,2 @@ +command[check_disk_/]=/usr/lib64/nagios/plugins/check_disk -w 15% -c 10% -p / +command[check_disk_/boot]=/usr/lib64/nagios/plugins/check_disk -w 15% -c 10% -p /boot diff --git a/roles/nagios_client/files/check_lock.cfg b/roles/nagios_client/files/check_lock.cfg new file mode 100644 index 0000000..9525f9f --- /dev/null +++ b/roles/nagios_client/files/check_lock.cfg @@ -0,0 +1 @@ +command[check_lock]=/usr/lib64/nagios/plugins/check_lock diff --git a/roles/nagios_client/files/check_mirrorlist_cache.cfg b/roles/nagios_client/files/check_mirrorlist_cache.cfg new file mode 100644 index 0000000..bd4b171 --- /dev/null +++ b/roles/nagios_client/files/check_mirrorlist_cache.cfg @@ -0,0 +1 @@ +command[check_mirrorlist_cache]=/usr/lib64/nagios/plugins/check_file_age -w 14400 -c 129600 -f /var/lib/mirrormanager/mirrorlist_cache.pkl diff --git a/roles/nagios_client/files/check_postfix_queue.cfg b/roles/nagios_client/files/check_postfix_queue.cfg new file mode 100644 index 0000000..5574698 --- /dev/null +++ b/roles/nagios_client/files/check_postfix_queue.cfg @@ -0,0 +1 @@ +command[check_postfix_queue]=/usr/lib64/nagios/plugins/check_postfix_queue -w 2 -c 5 diff --git a/roles/nagios_client/files/check_raid.cfg b/roles/nagios_client/files/check_raid.cfg new file mode 100644 index 0000000..5c15d45 --- /dev/null +++ b/roles/nagios_client/files/check_raid.cfg @@ -0,0 +1 @@ +command[check_raid]=/usr/lib64/nagios/plugins/check_raid.py diff --git a/roles/nagios_client/files/check_swap.cfg b/roles/nagios_client/files/check_swap.cfg new file mode 100644 index 0000000..bc4f19d --- /dev/null +++ b/roles/nagios_client/files/check_swap.cfg @@ -0,0 +1 @@ +command[check_swap]=/usr/lib64/nagios/plugins/check_swap -w 15% -c 10% diff --git a/roles/nagios_client/files/nrpe.cfg b/roles/nagios_client/files/nrpe.cfg new file mode 100644 index 0000000..6933f78 --- /dev/null +++ b/roles/nagios_client/files/nrpe.cfg @@ -0,0 +1,223 @@ +############################################################################# +# Sample NRPE Config File +# Written by: Ethan Galstad (nagios@xxxxxxxxxx) +# +# Last Modified: 11-23-2007 +# +# NOTES: +# This is a sample configuration file for the NRPE daemon. It needs to be +# located on the remote host that is running the NRPE daemon, not the host +# from which the check_nrpe client is being executed. +############################################################################# + + +# LOG FACILITY +# The syslog facility that should be used for logging purposes. + +log_facility=daemon + + + +# PID FILE +# The name of the file in which the NRPE daemon should write it's process ID +# number. The file is only written if the NRPE daemon is started by the root +# user and is running in standalone mode. + +pid_file=/var/run/nrpe/nrpe.pid + + + +# PORT NUMBER +# Port number we should wait for connections on. +# NOTE: This must be a non-priviledged port (i.e. > 1024). +# NOTE: This option is ignored if NRPE is running under either inetd or xinetd + +server_port=5666 + + + +# SERVER ADDRESS +# Address that nrpe should bind to in case there are more than one interface +# and you do not want nrpe to bind on all interfaces. +# NOTE: This option is ignored if NRPE is running under either inetd or xinetd + +#server_address=127.0.0.1 + + + +# NRPE USER +# This determines the effective user that the NRPE daemon should run as. +# You can either supply a username or a UID. +# +# NOTE: This option is ignored if NRPE is running under either inetd or xinetd + +nrpe_user=nrpe + + + +# NRPE GROUP +# This determines the effective group that the NRPE daemon should run as. +# You can either supply a group name or a GID. +# +# NOTE: This option is ignored if NRPE is running under either inetd or xinetd + +nrpe_group=nrpe + + + +# ALLOWED HOST ADDRESSES +# This is an optional comma-delimited list of IP address or hostnames +# that are allowed to talk to the NRPE daemon. Network addresses with a bit mask +# (i.e. 192.168.1.0/24) are also supported. Hostname wildcards are not currently +# supported. +# +# Note: The daemon only does rudimentary checking of the client's IP +# address. I would highly recommend adding entries in your /etc/hosts.allow +# file to allow only the specified host to connect to the port +# you are running this daemon on. +# +# NOTE: This option is ignored if NRPE is running under either inetd or xinetd + + +allowed_hosts=10.5.126.41,192.168.1.10,192.168.1.20,209.132.181.35 + + + +# COMMAND ARGUMENT PROCESSING +# This option determines whether or not the NRPE daemon will allow clients +# to specify arguments to commands that are executed. This option only works +# if the daemon was configured with the --enable-command-args configure script +# option. +# +# *** ENABLING THIS OPTION IS A SECURITY RISK! *** +# Read the SECURITY file for information on some of the security implications +# of enabling this variable. +# +# Values: 0=do not allow arguments, 1=allow command arguments + +dont_blame_nrpe=0 + + + +# COMMAND PREFIX +# This option allows you to prefix all commands with a user-defined string. +# A space is automatically added between the specified prefix string and the +# command line from the command definition. +# +# *** THIS EXAMPLE MAY POSE A POTENTIAL SECURITY RISK, SO USE WITH CAUTION! *** +# Usage scenario: +# Execute restricted commmands using sudo. For this to work, you need to add +# the nagios user to your /etc/sudoers. An example entry for alllowing +# execution of the plugins from might be: +# +# nagios ALL=(ALL) NOPASSWD: /usr/lib/nagios/plugins/ +# +# This lets the nagios user run all commands in that directory (and only them) +# without asking for a password. If you do this, make sure you don't give +# random users write access to that directory or its contents! + +# command_prefix=/usr/bin/sudo + + + +# DEBUGGING OPTION +# This option determines whether or not debugging messages are logged to the +# syslog facility. +# Values: 0=debugging off, 1=debugging on + +debug=0 + + + +# COMMAND TIMEOUT +# This specifies the maximum number of seconds that the NRPE daemon will +# allow plugins to finish executing before killing them off. + +command_timeout=60 + + + +# CONNECTION TIMEOUT +# This specifies the maximum number of seconds that the NRPE daemon will +# wait for a connection to be established before exiting. This is sometimes +# seen where a network problem stops the SSL being established even though +# all network sessions are connected. This causes the nrpe daemons to +# accumulate, eating system resources. Do not set this too low. + +connection_timeout=300 + + + +# WEEK RANDOM SEED OPTION +# This directive allows you to use SSL even if your system does not have +# a /dev/random or /dev/urandom (on purpose or because the necessary patches +# were not applied). The random number generator will be seeded from a file +# which is either a file pointed to by the environment valiable $RANDFILE +# or $HOME/.rnd. If neither exists, the pseudo random number generator will +# be initialized and a warning will be issued. +# Values: 0=only seed from /dev/[u]random, 1=also seed from weak randomness + +#allow_weak_random_seed=1 + + + +# INCLUDE CONFIG FILE +# This directive allows you to include definitions from an external config file. + +#include=<somefile.cfg> + + + +# INCLUDE CONFIG DIRECTORY +# This directive allows you to include definitions from config files (with a +# .cfg extension) in one or more directories (with recursion). + +include_dir=/etc/nrpe.d/ + + + +# COMMAND DEFINITIONS +# Command definitions that this daemon will run. Definitions +# are in the following format: +# +# command[<command_name>]=<command_line> +# +# When the daemon receives a request to return the results of <command_name> +# it will execute the command specified by the <command_line> argument. +# +# Unlike Nagios, the command line cannot contain macros - it must be +# typed exactly as it should be executed. +# +# Note: Any plugins that are used in the command lines must reside +# on the machine that this daemon is running on! The examples below +# assume that you have plugins installed in a /usr/local/nagios/libexec +# directory. Also note that you will have to modify the definitions below +# to match the argument format the plugins expect. Remember, these are +# examples only! + + +# The following examples use hardcoded command arguments... + +command[check_users]=/usr/lib64/nagios/plugins/check_users -w 5 -c 10 +command[check_load]=/usr/lib64/nagios/plugins/check_load -w 15,10,5 -c 30,25,20 +command[check_hda1]=/usr/lib64/nagios/plugins/check_disk -w 20% -c 10% -p /dev/hda1 +command[check_zombie_procs]=/usr/lib64/nagios/plugins/check_procs -w 5 -c 10 -s Z +command[check_total_procs]=/usr/lib64/nagios/plugins/check_procs -w {{ nrpe_procs_warn }} -c {{ nrpe_procs_crit }} + + +# The following examples allow user-supplied arguments and can +# only be used if the NRPE daemon was compiled with support for +# command arguments *AND* the dont_blame_nrpe directive in this +# config file is set to '1'. This poses a potential security risk, so +# make sure you read the SECURITY file before doing this. + +#command[check_users]=/usr/lib64/nagios/plugins/check_users -w $ARG1$ -c $ARG2$ +#command[check_load]=/usr/lib64/nagios/plugins/check_load -w $ARG1$ -c $ARG2$ +#command[check_disk]=/usr/lib64/nagios/plugins/check_disk -w $ARG1$ -c $ARG2$ -p $ARG3$ +#command[check_procs]=/usr/lib64/nagios/plugins/check_procs -w $ARG1$ -c $ARG2$ -s $ARG3$ + + +# NEVER ADD ANYTHING HERE - ANY ENTRIES TO NRPE SHOULD BE in .cfg files in /etc/nrpe.d/ + +# NEVER NEVER NEVER +# diff --git a/roles/nagios_client/files/scripts/check_lock b/roles/nagios_client/files/scripts/check_lock new file mode 100755 index 0000000..1a58e95 --- /dev/null +++ b/roles/nagios_client/files/scripts/check_lock @@ -0,0 +1,17 @@ +#!/usr/bin/python + +import fcntl +import sys + +try: + f = open('/mnt/koji/.nagios_test', 'r') + f.close() + f = open('/mnt/koji/.nagios_test', 'w') +except IOError: + print "Could not create file" + sys.exit(2) + +fcntl.flock(f, fcntl.LOCK_EX) +f.close() +print "File Locked Successfully" +sys.exit(0) diff --git a/roles/nagios_client/files/scripts/check_postfix_queue b/roles/nagios_client/files/scripts/check_postfix_queue new file mode 100755 index 0000000..44ab444 --- /dev/null +++ b/roles/nagios_client/files/scripts/check_postfix_queue @@ -0,0 +1,49 @@ +#!/bin/bash +# +# 19-07-2010 +# Author: Cherwin Nooitmeer <cherwin@xxxxxxxxx> +# + +# exit codes +e_ok=0 +e_warning=1 +e_critical=2 +e_unknown=3 + +# regular expression that matches queue IDs (e.g. D71EF7AC80F8) +queue_id='^[A-F0-9][A-F0-9][A-F0-9][A-F0-9][A-F0-9][A-F0-9][A-F0-9][A-F0-9][A-F0-9][A-F0-9]' + +usage="Invalid command line usage" + +if [ -z $1 ]; then + echo $usage + exit $e_unknown +fi + +while getopts ":w:c:" options +do + case $options in + w ) warning=$OPTARG ;; + c ) critical=$OPTARG ;; + * ) echo $usage + exit $e_unknown ;; + esac +done + +# determine queue size +qsize=$(mailq | egrep -c $queue_id) +if [ -z $qsize ] +then + exit $e_unknown +fi + +if [ $qsize -ge $critical ]; then + retval=$e_critical +elif [ $qsize -ge $warning ]; then + retval=$e_warning +elif [ $qsize -lt $warning ]; then + retval=$e_ok +fi + +echo "$qsize mail(s) in queue | mail_queue=$qsize" +exit $retval diff --git a/roles/nagios_client/files/scripts/check_raid.py b/roles/nagios_client/files/scripts/check_raid.py new file mode 100755 index 0000000..48cddd9 --- /dev/null +++ b/roles/nagios_client/files/scripts/check_raid.py @@ -0,0 +1,45 @@ +#!/usr/bin/env python +# +# very simple python script to parse out /proc/mdstat +# and give results for nagios to monitor +# + +import sys +import string + +devices = [] + +try: + mdstat = string.split(open('/proc/mdstat').read(), '\n') +except IOError: + # seems we have no software raid on this machines + sys.exit(0) + +error = "" +i = 0 +for line in mdstat: + if line[0:2] == 'md': + device = string.split(line)[0] + devices.append(device) + status = string.split(mdstat[i+1])[3] + if string.count(status, "_"): + # see if we can figure out what's going on + err = string.split(mdstat[i+2]) + msg = "device=%s status=%s" % (device, status) + if len(err) > 0: + msg = msg + " rebuild=%s" % err[0] + + if not error: + error = msg + else: + error = error + ", " + msg + i = i + 1 + +if not error: + print "DEVICES %s OK" % " ".join(devices) + sys.exit(0) + +else: + print error + sys.exit(2) + diff --git a/roles/nagios_client/handlers/main.yml b/roles/nagios_client/handlers/main.yml new file mode 100644 index 0000000..1086c3d --- /dev/null +++ b/roles/nagios_client/handlers/main.yml @@ -0,0 +1,3 @@ +--- +- name: restart nrpe + action: service name=nrpe state=restarted diff --git a/roles/nagios_client/tasks/main.yml b/roles/nagios_client/tasks/main.yml new file mode 100644 index 0000000..5888ede --- /dev/null +++ b/roles/nagios_client/tasks/main.yml @@ -0,0 +1,63 @@ +# nagios-client/nrpe + +--- +# install pkgs: +- name: install nagios client pkgs + yum: name=$item state=installed + with_items: + - nrpe + - nagios-plugins + - nagios-plugins-disk + - nagios-plugins-file_age + - nagios-plugins-users + - nagios-plugins-procs + - nagios-plugins-swap + - nagios-plugins-load + - nagios-plugins-ping + tags: + - packages + +- name: install local nrpe check scripts that are not packaged + copy: src=scripts/$item dest=/usr/lib64/nagios/plugins/$item mode=0755 owner=nagios group=nagios + with_items: + - check_postfix_queue + - check_raid.py + - check_lock + +# create dirs +# puppet used to make /var/spool/nagios (owned by nagios.nagios) mode 750 +# and /usr/lib/nagios/plugins (owned by root) mode 755 - but we don't know WHY +# then stuff it with plugins from the plugins dir in the nagios module +# then we symlinked that to /usr/lib64/nagios/plugins +# it was a nightmare - don't do that - my ghost will haunt you if you do +# skvidal 2013-05-21 + + +# FIXME? figure out nrpe selinux policy of DOOM is needed + +- name: /etc/nagios/nrpe.cfg + template: src=nrpe.cfg dest=/etc/nagios/nrpe.cfg + notify: + - restart nrpe + tags: + - config + +- name: install nrpe client configs + template: src=$item dest=/etc/nrpe.d/$item + with_items: + - check_mirrorlist_cache.cfg + - check_raid.cfg + - check_cron.cfg + - check_disk.cfg + - check_swap.cfg + - check_postfix_queue.cfg + - check_lock.cfg + notify: + - restart nrpe + tags: + - config + +- name: nrpe service start + service: name=nrpe state=running enabled=true + tags: + - service diff --git a/tasks/nagios_client.yml b/tasks/nagios_client.yml deleted file mode 100644 index 56093f0..0000000 --- a/tasks/nagios_client.yml +++ /dev/null @@ -1,63 +0,0 @@ -# nagios-client/nrpe - ---- -# install pkgs: -- name: install nagios client pkgs - yum: name=$item state=installed - with_items: - - nrpe - - nagios-plugins - - nagios-plugins-disk - - nagios-plugins-file_age - - nagios-plugins-users - - nagios-plugins-procs - - nagios-plugins-swap - - nagios-plugins-load - - nagios-plugins-ping - tags: - - packages - -- name: install local nrpe check scripts that are not packaged - copy: src=$files/nagios/client/scripts/$item dest=/usr/lib64/nagios/plugins/$item mode=0755 owner=nagios group=nagios - with_items: - - check_postfix_queue - - check_raid.py - - check_lock - -# create dirs -# puppet used to make /var/spool/nagios (owned by nagios.nagios) mode 750 -# and /usr/lib/nagios/plugins (owned by root) mode 755 - but we don't know WHY -# then stuff it with plugins from the plugins dir in the nagios module -# then we symlinked that to /usr/lib64/nagios/plugins -# it was a nightmare - don't do that - my ghost will haunt you if you do -# skvidal 2013-05-21 - - -# FIXME? figure out nrpe selinux policy of DOOM is needed - -- name: /etc/nagios/nrpe.cfg - template: src=$files/nagios/client/nrpe.cfg dest=/etc/nagios/nrpe.cfg - notify: - - restart nrpe - tags: - - config - -- name: install nrpe client configs - template: src=$files/nagios/client/$item dest=/etc/nrpe.d/$item - with_items: - - check_mirrorlist_cache.cfg - - check_raid.cfg - - check_cron.cfg - - check_disk.cfg - - check_swap.cfg - - check_postfix_queue.cfg - - check_lock.cfg - notify: - - restart nrpe - tags: - - config - -- name: nrpe service start - service: name=nrpe state=running enabled=true - tags: - - service -- 1.8.3.1 _______________________________________________ infrastructure mailing list infrastructure@xxxxxxxxxxxxxxxxxxxxxxx https://admin.fedoraproject.org/mailman/listinfo/infrastructure