This is an attempt to add some simple monitoring so if the Pagure task queue starts backing up, we know about it. --- commit f207778a0e5aceba6c18885be7478317aaeab4b0 Author: Ricky Elrod <codeblock@xxxxxxxxxxxxxxxxx> Date: Fri May 26 23:09:02 2017 +0000 add simple monitoring for pagure's celery redis queue Signed-off-by: Ricky Elrod <codeblock@xxxxxxxxxxxxxxxxx> diff --git a/roles/nagios_client/files/scripts/check_redis_queue.sh b/roles/nagios_client/files/scripts/check_redis_queue.sh new file mode 100644 index 0000000..ca1f186 --- /dev/null +++ b/roles/nagios_client/files/scripts/check_redis_queue.sh @@ -0,0 +1,23 @@ +#!/usr/bin/env bash + +. /usr/lib64/nagios/plugins/utils.sh + +if [[ "$#" -ne 3 ]]; then + echo "Arguments: key warn crit" + exit $STATE_UNKNOWN +fi + +tasks="$(redis-cli llen "$1" | awk '{print $1}')" + +check_range $tasks $2:$3 +status=$? + +if [[ "$status" == "$STATE_OK" ]]; then + echo "OK: $1 queue has $tasks tasks" +elif [[ "$status" == "$STATE_WARNING" ]]; then + echo "WARNING: $1 queue has $tasks tasks" +elif [[ "$status" == "$STATE_CRITICAL" ]]; then + echo "CRITICAL: $1 queue has $tasks tasks" +fi + +exit $status diff --git a/roles/nagios_client/tasks/main.yml b/roles/nagios_client/tasks/main.yml index 714be36..22ddd4f 100644 --- a/roles/nagios_client/tasks/main.yml +++ b/roles/nagios_client/tasks/main.yml @@ -73,6 +73,7 @@ - check_osbs_builds.py - check_osbs_api.py - check_ipa_replication + - check_redis_queue.sh when: not inventory_hostname.startswith('noc') tags: - nagios_client @@ -146,6 +147,7 @@ - check_koschei_watcher_proc.cfg - check_testcloud.cfg - check_mirrorlist_docker_proxy.cfg + - check_celery_redis_queue.cfg notify: - restart nrpe tags: diff --git a/roles/nagios_client/templates/check_celery_redis_queue.cfg.j2 b/roles/nagios_client/templates/check_celery_redis_queue.cfg.j2 new file mode 100644 index 0000000..56279f3 --- /dev/null +++ b/roles/nagios_client/templates/check_celery_redis_queue.cfg.j2 @@ -0,0 +1 @@ +command[check_celery_redis_queue]=/usr/lib64/nagios/plugins/check_redis_queue.sh celery 5 10 diff --git a/roles/nagios_server/files/nagios/services/pagure_redis.cfg b/roles/nagios_server/files/nagios/services/pagure_redis.cfg new file mode 100644 index 0000000..d5387d0 --- /dev/null +++ b/roles/nagios_server/files/nagios/services/pagure_redis.cfg @@ -0,0 +1,6 @@ +define service { + host_name pagure01.fedoraproject.org + service_description Redis/celery queue + check_command check_by_nrpe!check_celery_redis_queue + use defaulttemplate +} _______________________________________________ infrastructure mailing list -- infrastructure@xxxxxxxxxxxxxxxxxxxxxxx To unsubscribe send an email to infrastructure-leave@xxxxxxxxxxxxxxxxxxxxxxx