nagios: adding check_swaping.sh
This commit is contained in:
parent
2e7e98e812
commit
fc4d38b8cd
2 changed files with 170 additions and 0 deletions
169
nagios/check_swaping.sh
Executable file
169
nagios/check_swaping.sh
Executable file
|
@ -0,0 +1,169 @@
|
||||||
|
#!/bin/sh
|
||||||
|
|
||||||
|
|
||||||
|
# Little custom script to :
|
||||||
|
# - check if the system has a heavy swap activity
|
||||||
|
# - record swapin activity as perfdata
|
||||||
|
#
|
||||||
|
# For pnp4nagios' sake, create a file usually named 'check_nrpe_swaping.cfg'
|
||||||
|
# (depends on your configuration) with the following content :
|
||||||
|
# DATATYPE = COUNTER,COUNTER
|
||||||
|
|
||||||
|
|
||||||
|
# Default values
|
||||||
|
THRESHOLD_WARNING="1000"
|
||||||
|
THRESHOLD_CRITICAL="2000"
|
||||||
|
VMSTAT_PREVIOUS_DATA_FILE="/tmp/.monitoring_vmstat.txt"
|
||||||
|
|
||||||
|
|
||||||
|
# Output
|
||||||
|
OUTPUT_EXIT_STATUS=0
|
||||||
|
OUTPUT_DETAIL_WARNING=""
|
||||||
|
OUTPUT_DETAIL_CRITICAL=""
|
||||||
|
OUTPUT_PERFDATA=""
|
||||||
|
|
||||||
|
PROGPATH=$( echo $0 | sed -e 's,[\\/][^\\/][^\\/]*$,,' )
|
||||||
|
REVISION="0.1"
|
||||||
|
|
||||||
|
# Stop at the first non-catched error
|
||||||
|
set -e
|
||||||
|
|
||||||
|
# Include check_range()
|
||||||
|
. $PROGPATH/utils.sh
|
||||||
|
# If you don't have the previous file, just comment the line and uncomment the following
|
||||||
|
# (it's only possible because we don't use the range checking function in this script)
|
||||||
|
#STATE_OK=0
|
||||||
|
#STATE_WARNING=1
|
||||||
|
#STATE_CRITICAL=2
|
||||||
|
#STATE_UNKNOWN=3
|
||||||
|
#STATE_DEPENDENT=4
|
||||||
|
|
||||||
|
|
||||||
|
#
|
||||||
|
# Help function
|
||||||
|
#
|
||||||
|
usage() {
|
||||||
|
cat <<EOF
|
||||||
|
Usage :
|
||||||
|
$0 [-w warning_threshold] [-c critical_threshold] [-f vmstat_previous_data_file ]
|
||||||
|
|
||||||
|
Note 1 : the script will measure the number of seconds passed since its last call and will
|
||||||
|
divide the measures accordingly, so write the thresholds using pages/s in mind.
|
||||||
|
The script will measure against pswpin and pswpout added together.
|
||||||
|
|
||||||
|
Note 2 : the thresholds use the kernel page size as unit. Use 'getconf PAGESIZE' to get it if needed.
|
||||||
|
|
||||||
|
Default values:
|
||||||
|
warning_threshold : $THRESHOLD_WARNING
|
||||||
|
critical_threshold : $THRESHOLD_CRITICAL
|
||||||
|
vmstat_previous_data_file : $VMSTAT_PREVIOUS_DATA_FILE
|
||||||
|
EOF
|
||||||
|
}
|
||||||
|
|
||||||
|
#
|
||||||
|
# Copy fresh vmstat data into the file passed as an argument
|
||||||
|
#
|
||||||
|
update_vmstat_previous_data_file() {
|
||||||
|
# Harden default files permissions to avoid some data leaks
|
||||||
|
umask "0077" || true
|
||||||
|
|
||||||
|
echo "# This file was written by $0 for monitoring swap activity." >"$1"
|
||||||
|
echo "# It can be deleted if the need arise, it will be easily recreated without too much lost." >>"$1"
|
||||||
|
echo "" >>"$1"
|
||||||
|
|
||||||
|
cat /proc/vmstat >> "$1"
|
||||||
|
}
|
||||||
|
|
||||||
|
#
|
||||||
|
# Check if arg is an integer
|
||||||
|
# (copied from jilles @ http://stackoverflow.com/questions/806906/how-do-i-test-if-a-variable-is-a-number-in-bash )
|
||||||
|
#
|
||||||
|
is_int() {
|
||||||
|
case "$1" in
|
||||||
|
''|*[!0-9]*) return 1;;
|
||||||
|
*) return 0;;
|
||||||
|
esac
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#
|
||||||
|
# Parameters management
|
||||||
|
#
|
||||||
|
while getopts hw:c:f: f; do
|
||||||
|
case "$f" in
|
||||||
|
'h')
|
||||||
|
usage
|
||||||
|
exit
|
||||||
|
;;
|
||||||
|
|
||||||
|
'w')
|
||||||
|
THRESHOLD_WARNING="$OPTARG"
|
||||||
|
;;
|
||||||
|
|
||||||
|
'c')
|
||||||
|
THRESHOLD_CRITICAL="$OPTARG"
|
||||||
|
;;
|
||||||
|
|
||||||
|
'f')
|
||||||
|
VMSTAT_PREVIOUS_DATA_FILE="$OPTARG"
|
||||||
|
;;
|
||||||
|
|
||||||
|
\?)
|
||||||
|
usage
|
||||||
|
exit 1
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
done
|
||||||
|
shift $( expr $OPTIND - 1 )
|
||||||
|
|
||||||
|
# Little checks
|
||||||
|
if ! is_int "$THRESHOLD_WARNING" || ! is_int "$THRESHOLD_CRITICAL"; then
|
||||||
|
echo "UNKNOWN invalid parameter : one of the threshold is not an integer."
|
||||||
|
exit $STATE_UNKNOWN
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Check if the previous data file exists
|
||||||
|
if [ ! -w "$VMSTAT_PREVIOUS_DATA_FILE" ]; then
|
||||||
|
update_vmstat_previous_data_file "$VMSTAT_PREVIOUS_DATA_FILE"
|
||||||
|
|
||||||
|
# We wait a little bit to gather some data even on the first run
|
||||||
|
# (or we could return an UNKNOWN ?)
|
||||||
|
sleep 2
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Read previous data
|
||||||
|
PREVIOUS_PSWPIN="$( grep '^pswpin' "$VMSTAT_PREVIOUS_DATA_FILE" | cut -d " " -f 2 )"
|
||||||
|
PREVIOUS_PSWPOUT="$( grep '^pswpout' "$VMSTAT_PREVIOUS_DATA_FILE" | cut -d " " -f 2 )"
|
||||||
|
PREVIOUS_PSWPTOTAL=$(( $PREVIOUS_PSWPIN + $PREVIOUS_PSWPOUT ))
|
||||||
|
|
||||||
|
# Get time elapsed since last call
|
||||||
|
PREVIOUS_DATA_AGE=$(( $( date +%s ) - $( stat --printf="%Y" "$VMSTAT_PREVIOUS_DATA_FILE" ) ))
|
||||||
|
if [ "$PREVIOUS_DATA_AGE" -le "0" ]; then
|
||||||
|
echo "UNKNOWN: $PREVIOUS_DATA_AGE second(s) elapsed since last call."
|
||||||
|
exit $STATE_UNKNOWN
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Update and read current data
|
||||||
|
update_vmstat_previous_data_file "$VMSTAT_PREVIOUS_DATA_FILE"
|
||||||
|
CURRENT_PSWPIN="$( grep '^pswpin' "$VMSTAT_PREVIOUS_DATA_FILE" | cut -d " " -f 2 )"
|
||||||
|
CURRENT_PSWPOUT="$( grep '^pswpout' "$VMSTAT_PREVIOUS_DATA_FILE" | cut -d " " -f 2 )"
|
||||||
|
CURRENT_PSWPTOTAL=$(( $CURRENT_PSWPIN + $CURRENT_PSWPOUT ))
|
||||||
|
|
||||||
|
# Calculate the swaping rate
|
||||||
|
PSWP_RATE=$(( ( $CURRENT_PSWPTOTAL - $PREVIOUS_PSWPTOTAL ) / $PREVIOUS_DATA_AGE ))
|
||||||
|
|
||||||
|
# Generate perfdata
|
||||||
|
OUTPUT_PERFDATA="$( printf " pswpin=%d pswpout=%d" "$CURRENT_PSWPIN" "$CURRENT_PSWPOUT" )"
|
||||||
|
|
||||||
|
# Comparison
|
||||||
|
# note: remember that numbers can be reseted to zero from time to time (64bits counter ?)
|
||||||
|
if [ $PSWP_RATE -gt $THRESHOLD_CRITICAL ]; then
|
||||||
|
echo "CRITICAL swaping rate at $PSWP_RATE (limit at $THRESHOLD_CRITICAL) |$OUTPUT_PERFDATA"
|
||||||
|
exit $STATE_CRITICAL
|
||||||
|
elif [ $PSWP_RATE -gt $THRESHOLD_WARNING ]; then
|
||||||
|
echo "WARNING swaping rate at $PSWP_RATE (limit at $THRESHOLD_WARNING) |$OUTPUT_PERFDATA"
|
||||||
|
exit $STATE_WARNING
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "OK swaping rate at $PSWP_RATE |$OUTPUT_PERFDATA"
|
||||||
|
exit $STATE_OK
|
|
@ -2,6 +2,7 @@
|
||||||
command[check_disks]=/usr/lib/nagios/plugins/check_disk -f -w 10% -c 5% -W 50% -K 5% -l -X tmpfs -X devpts -X usbfs -X nsfs -X overlay
|
command[check_disks]=/usr/lib/nagios/plugins/check_disk -f -w 10% -c 5% -W 50% -K 5% -l -X tmpfs -X devpts -X usbfs -X nsfs -X overlay
|
||||||
command[check_load]=/usr/lib/nagios/plugins/check_load -w 1,1,1 -c 3,2,2
|
command[check_load]=/usr/lib/nagios/plugins/check_load -w 1,1,1 -c 3,2,2
|
||||||
command[check_network_volume]=/usr/local/share/scripts-admin/nagios/check_network_volume.sh
|
command[check_network_volume]=/usr/local/share/scripts-admin/nagios/check_network_volume.sh
|
||||||
|
command[check_swaping]=/usr/local/share/scripts-admin/nagios/check_swaping.sh
|
||||||
|
|
||||||
# Petite commande temporaire pour étudier souci neighbour table overflow
|
# Petite commande temporaire pour étudier souci neighbour table overflow
|
||||||
command[check_network-neighbour-table]=/usr/local/share/scripts-admin/nagios/check_network-neighbour-table.sh
|
command[check_network-neighbour-table]=/usr/local/share/scripts-admin/nagios/check_network-neighbour-table.sh
|
||||||
|
|
Loading…
Reference in a new issue