From fc4d38b8cd88064831bcb3667828b429ff4f1f56 Mon Sep 17 00:00:00 2001 From: Chl Date: Mon, 23 Aug 2021 01:18:31 +0200 Subject: [PATCH] nagios: adding check_swaping.sh --- nagios/check_swaping.sh | 169 +++++++++++++++++++++++++++++++++++ nagios/etc/30_nrpe-basic.cfg | 1 + 2 files changed, 170 insertions(+) create mode 100755 nagios/check_swaping.sh diff --git a/nagios/check_swaping.sh b/nagios/check_swaping.sh new file mode 100755 index 0000000..ec400f6 --- /dev/null +++ b/nagios/check_swaping.sh @@ -0,0 +1,169 @@ +#!/bin/sh + + +# Little custom script to : +# - check if the system has a heavy swap activity +# - record swapin activity as perfdata +# +# For pnp4nagios' sake, create a file usually named 'check_nrpe_swaping.cfg' +# (depends on your configuration) with the following content : +# DATATYPE = COUNTER,COUNTER + + +# Default values +THRESHOLD_WARNING="1000" +THRESHOLD_CRITICAL="2000" +VMSTAT_PREVIOUS_DATA_FILE="/tmp/.monitoring_vmstat.txt" + + +# Output +OUTPUT_EXIT_STATUS=0 +OUTPUT_DETAIL_WARNING="" +OUTPUT_DETAIL_CRITICAL="" +OUTPUT_PERFDATA="" + +PROGPATH=$( echo $0 | sed -e 's,[\\/][^\\/][^\\/]*$,,' ) +REVISION="0.1" + +# Stop at the first non-catched error +set -e + +# Include check_range() +. $PROGPATH/utils.sh +# If you don't have the previous file, just comment the line and uncomment the following +# (it's only possible because we don't use the range checking function in this script) +#STATE_OK=0 +#STATE_WARNING=1 +#STATE_CRITICAL=2 +#STATE_UNKNOWN=3 +#STATE_DEPENDENT=4 + + +# +# Help function +# +usage() { + cat <"$1" + echo "# It can be deleted if the need arise, it will be easily recreated without too much lost." >>"$1" + echo "" >>"$1" + + cat /proc/vmstat >> "$1" +} + +# +# Check if arg is an integer +# (copied from jilles @ http://stackoverflow.com/questions/806906/how-do-i-test-if-a-variable-is-a-number-in-bash ) +# +is_int() { + case "$1" in + ''|*[!0-9]*) return 1;; + *) return 0;; + esac +} + + +# +# Parameters management +# +while getopts hw:c:f: f; do + case "$f" in + 'h') + usage + exit + ;; + + 'w') + THRESHOLD_WARNING="$OPTARG" + ;; + + 'c') + THRESHOLD_CRITICAL="$OPTARG" + ;; + + 'f') + VMSTAT_PREVIOUS_DATA_FILE="$OPTARG" + ;; + + \?) + usage + exit 1 + ;; + esac +done +shift $( expr $OPTIND - 1 ) + +# Little checks +if ! is_int "$THRESHOLD_WARNING" || ! is_int "$THRESHOLD_CRITICAL"; then + echo "UNKNOWN invalid parameter : one of the threshold is not an integer." + exit $STATE_UNKNOWN +fi + +# Check if the previous data file exists +if [ ! -w "$VMSTAT_PREVIOUS_DATA_FILE" ]; then + update_vmstat_previous_data_file "$VMSTAT_PREVIOUS_DATA_FILE" + + # We wait a little bit to gather some data even on the first run + # (or we could return an UNKNOWN ?) + sleep 2 +fi + +# Read previous data +PREVIOUS_PSWPIN="$( grep '^pswpin' "$VMSTAT_PREVIOUS_DATA_FILE" | cut -d " " -f 2 )" +PREVIOUS_PSWPOUT="$( grep '^pswpout' "$VMSTAT_PREVIOUS_DATA_FILE" | cut -d " " -f 2 )" +PREVIOUS_PSWPTOTAL=$(( $PREVIOUS_PSWPIN + $PREVIOUS_PSWPOUT )) + +# Get time elapsed since last call +PREVIOUS_DATA_AGE=$(( $( date +%s ) - $( stat --printf="%Y" "$VMSTAT_PREVIOUS_DATA_FILE" ) )) +if [ "$PREVIOUS_DATA_AGE" -le "0" ]; then + echo "UNKNOWN: $PREVIOUS_DATA_AGE second(s) elapsed since last call." + exit $STATE_UNKNOWN +fi + +# Update and read current data +update_vmstat_previous_data_file "$VMSTAT_PREVIOUS_DATA_FILE" +CURRENT_PSWPIN="$( grep '^pswpin' "$VMSTAT_PREVIOUS_DATA_FILE" | cut -d " " -f 2 )" +CURRENT_PSWPOUT="$( grep '^pswpout' "$VMSTAT_PREVIOUS_DATA_FILE" | cut -d " " -f 2 )" +CURRENT_PSWPTOTAL=$(( $CURRENT_PSWPIN + $CURRENT_PSWPOUT )) + +# Calculate the swaping rate +PSWP_RATE=$(( ( $CURRENT_PSWPTOTAL - $PREVIOUS_PSWPTOTAL ) / $PREVIOUS_DATA_AGE )) + +# Generate perfdata +OUTPUT_PERFDATA="$( printf " pswpin=%d pswpout=%d" "$CURRENT_PSWPIN" "$CURRENT_PSWPOUT" )" + +# Comparison +# note: remember that numbers can be reseted to zero from time to time (64bits counter ?) +if [ $PSWP_RATE -gt $THRESHOLD_CRITICAL ]; then + echo "CRITICAL swaping rate at $PSWP_RATE (limit at $THRESHOLD_CRITICAL) |$OUTPUT_PERFDATA" + exit $STATE_CRITICAL +elif [ $PSWP_RATE -gt $THRESHOLD_WARNING ]; then + echo "WARNING swaping rate at $PSWP_RATE (limit at $THRESHOLD_WARNING) |$OUTPUT_PERFDATA" + exit $STATE_WARNING +fi + +echo "OK swaping rate at $PSWP_RATE |$OUTPUT_PERFDATA" +exit $STATE_OK diff --git a/nagios/etc/30_nrpe-basic.cfg b/nagios/etc/30_nrpe-basic.cfg index 82ae802..d9aa4a9 100644 --- a/nagios/etc/30_nrpe-basic.cfg +++ b/nagios/etc/30_nrpe-basic.cfg @@ -2,6 +2,7 @@ command[check_disks]=/usr/lib/nagios/plugins/check_disk -f -w 10% -c 5% -W 50% -K 5% -l -X tmpfs -X devpts -X usbfs -X nsfs -X overlay command[check_load]=/usr/lib/nagios/plugins/check_load -w 1,1,1 -c 3,2,2 command[check_network_volume]=/usr/local/share/scripts-admin/nagios/check_network_volume.sh +command[check_swaping]=/usr/local/share/scripts-admin/nagios/check_swaping.sh # Petite commande temporaire pour étudier souci neighbour table overflow command[check_network-neighbour-table]=/usr/local/share/scripts-admin/nagios/check_network-neighbour-table.sh