Hi all, We currently don't monitor that resultsdb sends messages on the bus. This patch will make nagios trigger an alert if we don't see a message in 2 days. +1s ? Thanks Clément
From bb3a6083d2c50a010ab837a7c5c80017224957e4 Mon Sep 17 00:00:00 2001 From: Clement Verna <cverna@xxxxxxxxxxxx> Date: Wed, 24 Apr 2019 09:48:26 +0200 Subject: [PATCH] Nagios: monitor that resultsdb sends messages on the bus Signed-off-by: Clement Verna <cverna@xxxxxxxxxxxx> --- .../nagios_client/templates/check_datanommer_history.cfg.j2 | 1 + roles/nagios_server/files/nagios/services/fedmsg.cfg | 6 ++++++ 2 files changed, 7 insertions(+) diff --git a/roles/nagios_client/templates/check_datanommer_history.cfg.j2 b/roles/nagios_client/templates/check_datanommer_history.cfg.j2 index b967285fb..f89ebfb3a 100644 --- a/roles/nagios_client/templates/check_datanommer_history.cfg.j2 +++ b/roles/nagios_client/templates/check_datanommer_history.cfg.j2 @@ -40,6 +40,7 @@ command[check_datanommer_autocloud]={{libdir}}/nagios/plugins/check_datanommer_t command[check_datanommer_twoweekatomic]=/usr/lib64/nagios/plugins/check_datanommer_timesince.py org.fedoraproject.prod.releng.atomic.twoweek.complete 1296000 1382400 command[check_datanommer_mdapi]=/usr/lib64/nagios/plugins/check_datanommer_timesince.py mdapi 28800 86400 command[check_datanommer_greenwave]=/usr/lib64/nagios/plugins/check_datanommer_timesince.py greenwave 172800 172800 +command[check_datanommer_resultsdb]=/usr/lib64/nagios/plugins/check_datanommer_timesince.py resultsdb 172800 172800 # This one is retired since it times out all the time. Too few messages. #command[check_datanommer_nuancier]={{libdir}}/nagios/plugins/check_datanommer_timesince.py nuancier 23652000 31536000 diff --git a/roles/nagios_server/files/nagios/services/fedmsg.cfg b/roles/nagios_server/files/nagios/services/fedmsg.cfg index 255e619e6..b10aaee58 100644 --- a/roles/nagios_server/files/nagios/services/fedmsg.cfg +++ b/roles/nagios_server/files/nagios/services/fedmsg.cfg @@ -320,6 +320,12 @@ define service { check_command check_by_nrpe!check_datanommer_greenwave use defaulttemplate } +define service { + host_name busgateway01.phx2.fedoraproject.org + service_description Check datanommer for recent resultsdb messages + check_command check_by_nrpe!check_datanommer_resultsdb + use defaulttemplate +} # BEGIN, check consumers and producers -- 2.21.0
_______________________________________________ infrastructure mailing list -- infrastructure@xxxxxxxxxxxxxxxxxxxxxxx To unsubscribe send an email to infrastructure-leave@xxxxxxxxxxxxxxxxxxxxxxx Fedora Code of Conduct: https://getfedora.org/code-of-conduct.html List Guidelines: https://fedoraproject.org/wiki/Mailing_list_guidelines List Archives: https://lists.fedoraproject.org/archives/list/infrastructure@xxxxxxxxxxxxxxxxxxxxxxx