#!/bin/sh # Little custom script to : # - check if the system has a heavy swap activity # - record swapin activity as perfdata # # For pnp4nagios' sake, create a file usually named 'check_nrpe_swaping.cfg' # (depends on your configuration) with the following content : # DATATYPE = COUNTER,COUNTER # Default values PAGESIZE=4096 THRESHOLD_WARNING="200000" THRESHOLD_CRITICAL="1000000" THRESHOLD_OOMKILL="0" DISABLE_OOMKILL="" VMSTAT_PREVIOUS_DATA_FILE="/tmp/.monitoring_vmstat.txt" # Output OUTPUT_EXIT_STATUS=0 OUTPUT_DETAIL_WARNING="" OUTPUT_DETAIL_CRITICAL="" OUTPUT_PERFDATA="" PROGPATH=$( echo $0 | sed -e 's,[\\/][^\\/][^\\/]*$,,' ) REVISION="0.1" # Stop at the first non-catched error set -e # Set/guess current page size which getconf >/dev/null 2>&1 && PAGESIZE="$( getconf PAGESIZE )" # Include check_range() . $PROGPATH/utils.sh # If you don't have the previous file, just comment the line and uncomment the following # (it's only possible because we don't use the range checking function in this script) #STATE_OK=0 #STATE_WARNING=1 #STATE_CRITICAL=2 #STATE_UNKNOWN=3 #STATE_DEPENDENT=4 # # Help function # usage() { cat <"$1" echo "# It can be deleted if the need arise, it will be easily recreated without too much lost." >>"$1" echo "" >>"$1" cat /proc/vmstat >> "$1" } # # Check if arg is an integer # (copied from jilles @ http://stackoverflow.com/questions/806906/how-do-i-test-if-a-variable-is-a-number-in-bash ) # is_int() { case "$1" in ''|*[!0-9]*) return 1;; *) return 0;; esac } # # Parameters management # while getopts hw:c:f:k:K f; do case "$f" in 'h') usage exit ;; 'w') THRESHOLD_WARNING="$OPTARG" ;; 'c') THRESHOLD_CRITICAL="$OPTARG" ;; 'f') VMSTAT_PREVIOUS_DATA_FILE="$OPTARG" ;; 'k') THRESHOLD_OOMKILL="$OPTARG" ;; 'K') DISABLE_OOMKILL="1" ;; \?) usage exit 1 ;; esac done shift $( expr $OPTIND - 1 ) # Little checks if ! is_int "$THRESHOLD_WARNING" || ! is_int "$THRESHOLD_CRITICAL" || ! is_int "$THRESHOLD_OOMKILL" ; then echo "UNKNOWN invalid parameter : one of the threshold is not an integer." exit $STATE_UNKNOWN fi if [ -z "$DISABLE_OOMKILL" ] && ! grep '^oom_kill ' /proc/vmstat >/dev/null 2>&1; then echo "UNKNOWN threshold set for oom_kill but the counter not available in /proc/vmstat." exit $STATE_UNKNOWN fi # Check if the previous data file exists if [ ! -w "$VMSTAT_PREVIOUS_DATA_FILE" ]; then update_vmstat_previous_data_file "$VMSTAT_PREVIOUS_DATA_FILE" # We wait a little bit to gather some data even on the first run # (or we could return an UNKNOWN ?) sleep 2 fi # Read previous data PREVIOUS_PSWPIN="$(( $( grep '^pswpin' "$VMSTAT_PREVIOUS_DATA_FILE" | cut -d " " -f 2 ) * $PAGESIZE ))" PREVIOUS_PSWPOUT="$(( $( grep '^pswpout' "$VMSTAT_PREVIOUS_DATA_FILE" | cut -d " " -f 2 ) * $PAGESIZE ))" PREVIOUS_PSWPTOTAL=$(( $PREVIOUS_PSWPIN + $PREVIOUS_PSWPOUT )) # Get time elapsed since last call PREVIOUS_DATA_AGE=$(( $( date +%s ) - $( stat --printf="%Y" "$VMSTAT_PREVIOUS_DATA_FILE" ) )) if [ "$PREVIOUS_DATA_AGE" -le "0" ]; then echo "UNKNOWN: $PREVIOUS_DATA_AGE second(s) elapsed since last call." exit $STATE_UNKNOWN fi # Update and read current data update_vmstat_previous_data_file "$VMSTAT_PREVIOUS_DATA_FILE" CURRENT_OOMKILL="$( grep '^oom_kill' "$VMSTAT_PREVIOUS_DATA_FILE" | cut -d " " -f 2 )" CURRENT_PSWPIN="$(( $( grep '^pswpin' "$VMSTAT_PREVIOUS_DATA_FILE" | cut -d " " -f 2 ) * $PAGESIZE ))" CURRENT_PSWPOUT="$(( $( grep '^pswpout' "$VMSTAT_PREVIOUS_DATA_FILE" | cut -d " " -f 2 ) * $PAGESIZE ))" CURRENT_PSWPTOTAL=$(( $CURRENT_PSWPIN + $CURRENT_PSWPOUT )) # Calculate the swaping rate PSWP_RATE=$(( ( $CURRENT_PSWPTOTAL - $PREVIOUS_PSWPTOTAL ) / $PREVIOUS_DATA_AGE )) # Generate perfdata if [ -n "$CURRENT_OOMKILL" ]; then OUTPUT_PERFDATA="$( printf " oomkill=%d" "$CURRENT_OOMKILL" )" fi OUTPUT_PERFDATA="$( printf "$OUTPUT_PERFDATA pswpin=%d pswpout=%d" "$CURRENT_PSWPIN" "$CURRENT_PSWPOUT" )" # Comparison # note: remember that numbers can be reseted to zero from time to time (64bits counter ?) if [ $PSWP_RATE -gt $THRESHOLD_CRITICAL ]; then echo "CRITICAL swaping rate at $PSWP_RATE (limit at $THRESHOLD_CRITICAL) |$OUTPUT_PERFDATA" exit $STATE_CRITICAL elif [ -z "$DISABLE_OOMKILL" ] && [ $CURRENT_OOMKILL -gt $THRESHOLD_OOMKILL ]; then echo "CRITICAL $CURRENT_OOMKILL OOM kill(s) (limit at $THRESHOLD_OOMKILL) |$OUTPUT_PERFDATA" exit $STATE_CRITICAL elif [ $PSWP_RATE -gt $THRESHOLD_WARNING ]; then echo "WARNING swaping rate at $PSWP_RATE (limit at $THRESHOLD_WARNING) |$OUTPUT_PERFDATA" exit $STATE_WARNING fi echo "OK swaping rate at $PSWP_RATE |$OUTPUT_PERFDATA" exit $STATE_OK