1
0
Fork 0
scripts-admin-quickndirty-p.../nagios/check_swaping.sh

169 lines
4.5 KiB
Bash
Executable file
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/bin/sh
# Little custom script to :
# - check if the system has a heavy swap activity
# - record swapin activity as perfdata
#
# For pnp4nagios' sake, create a file usually named 'check_nrpe_swaping.cfg'
# (depends on your configuration) with the following content :
# DATATYPE = COUNTER,COUNTER
# Default values
THRESHOLD_WARNING="1000"
THRESHOLD_CRITICAL="2000"
VMSTAT_PREVIOUS_DATA_FILE="/tmp/.monitoring_vmstat.txt"
# Output
OUTPUT_EXIT_STATUS=0
OUTPUT_DETAIL_WARNING=""
OUTPUT_DETAIL_CRITICAL=""
OUTPUT_PERFDATA=""
PROGPATH=$( echo $0 | sed -e 's,[\\/][^\\/][^\\/]*$,,' )
REVISION="0.1"
# Stop at the first non-catched error
set -e
# Include check_range()
. $PROGPATH/utils.sh
# If you don't have the previous file, just comment the line and uncomment the following
# (it's only possible because we don't use the range checking function in this script)
#STATE_OK=0
#STATE_WARNING=1
#STATE_CRITICAL=2
#STATE_UNKNOWN=3
#STATE_DEPENDENT=4
#
# Help function
#
usage() {
cat <<EOF
Usage :
$0 [-w warning_threshold] [-c critical_threshold] [-f vmstat_previous_data_file ]
Note 1 : the script will measure the number of seconds passed since its last call and will
divide the measures accordingly, so write the thresholds using pages/s in mind.
The script will measure against pswpin and pswpout added together.
Note 2 : the thresholds use the kernel page size as unit. Use 'getconf PAGESIZE' to get it if needed.
Default values:
warning_threshold : $THRESHOLD_WARNING
critical_threshold : $THRESHOLD_CRITICAL
vmstat_previous_data_file : $VMSTAT_PREVIOUS_DATA_FILE
EOF
}
#
# Copy fresh vmstat data into the file passed as an argument
#
update_vmstat_previous_data_file() {
# Harden default files permissions to avoid some data leaks
umask "0077" || true
echo "# This file was written by $0 for monitoring swap activity." >"$1"
echo "# It can be deleted if the need arise, it will be easily recreated without too much lost." >>"$1"
echo "" >>"$1"
cat /proc/vmstat >> "$1"
}
#
# Check if arg is an integer
# (copied from jilles @ http://stackoverflow.com/questions/806906/how-do-i-test-if-a-variable-is-a-number-in-bash )
#
is_int() {
case "$1" in
''|*[!0-9]*) return 1;;
*) return 0;;
esac
}
#
# Parameters management
#
while getopts hw:c:f: f; do
case "$f" in
'h')
usage
exit
;;
'w')
THRESHOLD_WARNING="$OPTARG"
;;
'c')
THRESHOLD_CRITICAL="$OPTARG"
;;
'f')
VMSTAT_PREVIOUS_DATA_FILE="$OPTARG"
;;
\?)
usage
exit 1
;;
esac
done
shift $( expr $OPTIND - 1 )
# Little checks
if ! is_int "$THRESHOLD_WARNING" || ! is_int "$THRESHOLD_CRITICAL"; then
echo "UNKNOWN invalid parameter : one of the threshold is not an integer."
exit $STATE_UNKNOWN
fi
# Check if the previous data file exists
if [ ! -w "$VMSTAT_PREVIOUS_DATA_FILE" ]; then
update_vmstat_previous_data_file "$VMSTAT_PREVIOUS_DATA_FILE"
# We wait a little bit to gather some data even on the first run
# (or we could return an UNKNOWN ?)
sleep 2
fi
# Read previous data
PREVIOUS_PSWPIN="$( grep '^pswpin' "$VMSTAT_PREVIOUS_DATA_FILE" | cut -d " " -f 2 )"
PREVIOUS_PSWPOUT="$( grep '^pswpout' "$VMSTAT_PREVIOUS_DATA_FILE" | cut -d " " -f 2 )"
PREVIOUS_PSWPTOTAL=$(( $PREVIOUS_PSWPIN + $PREVIOUS_PSWPOUT ))
# Get time elapsed since last call
PREVIOUS_DATA_AGE=$(( $( date +%s ) - $( stat --printf="%Y" "$VMSTAT_PREVIOUS_DATA_FILE" ) ))
if [ "$PREVIOUS_DATA_AGE" -le "0" ]; then
echo "UNKNOWN: $PREVIOUS_DATA_AGE second(s) elapsed since last call."
exit $STATE_UNKNOWN
fi
# Update and read current data
update_vmstat_previous_data_file "$VMSTAT_PREVIOUS_DATA_FILE"
CURRENT_PSWPIN="$( grep '^pswpin' "$VMSTAT_PREVIOUS_DATA_FILE" | cut -d " " -f 2 )"
CURRENT_PSWPOUT="$( grep '^pswpout' "$VMSTAT_PREVIOUS_DATA_FILE" | cut -d " " -f 2 )"
CURRENT_PSWPTOTAL=$(( $CURRENT_PSWPIN + $CURRENT_PSWPOUT ))
# Calculate the swaping rate
PSWP_RATE=$(( ( $CURRENT_PSWPTOTAL - $PREVIOUS_PSWPTOTAL ) / $PREVIOUS_DATA_AGE ))
# Generate perfdata
OUTPUT_PERFDATA="$( printf " pswpin=%d pswpout=%d" "$CURRENT_PSWPIN" "$CURRENT_PSWPOUT" )"
# Comparison
# note: remember that numbers can be reseted to zero from time to time (64bits counter ?)
if [ $PSWP_RATE -gt $THRESHOLD_CRITICAL ]; then
echo "CRITICAL swaping rate at $PSWP_RATE (limit at $THRESHOLD_CRITICAL) |$OUTPUT_PERFDATA"
exit $STATE_CRITICAL
elif [ $PSWP_RATE -gt $THRESHOLD_WARNING ]; then
echo "WARNING swaping rate at $PSWP_RATE (limit at $THRESHOLD_WARNING) |$OUTPUT_PERFDATA"
exit $STATE_WARNING
fi
echo "OK swaping rate at $PSWP_RATE |$OUTPUT_PERFDATA"
exit $STATE_OK