#!/bin/sh # Little custom script to : # - check if the system has a heavy swap activity # - record swapin activity as perfdata # # For pnp4nagios' sake, create a file usually named 'check_nrpe_swaping.cfg' # (depends on your configuration) with the following content : # DATATYPE = COUNTER,COUNTER # Default values THRESHOLD_WARNING="1000" THRESHOLD_CRITICAL="2000" VMSTAT_PREVIOUS_DATA_FILE="/tmp/.monitoring_vmstat.txt" # Output OUTPUT_EXIT_STATUS=0 OUTPUT_DETAIL_WARNING="" OUTPUT_DETAIL_CRITICAL="" OUTPUT_PERFDATA="" PROGPATH=$( echo $0 | sed -e 's,[\\/][^\\/][^\\/]*$,,' ) REVISION="0.1" # Stop at the first non-catched error set -e # Include check_range() . $PROGPATH/utils.sh # If you don't have the previous file, just comment the line and uncomment the following # (it's only possible because we don't use the range checking function in this script) #STATE_OK=0 #STATE_WARNING=1 #STATE_CRITICAL=2 #STATE_UNKNOWN=3 #STATE_DEPENDENT=4 # # Help function # usage() { cat <"$1" echo "# It can be deleted if the need arise, it will be easily recreated without too much lost." >>"$1" echo "" >>"$1" cat /proc/vmstat >> "$1" } # # Check if arg is an integer # (copied from jilles @ http://stackoverflow.com/questions/806906/how-do-i-test-if-a-variable-is-a-number-in-bash ) # is_int() { case "$1" in ''|*[!0-9]*) return 1;; *) return 0;; esac } # # Parameters management # while getopts hw:c:f: f; do case "$f" in 'h') usage exit ;; 'w') THRESHOLD_WARNING="$OPTARG" ;; 'c') THRESHOLD_CRITICAL="$OPTARG" ;; 'f') VMSTAT_PREVIOUS_DATA_FILE="$OPTARG" ;; \?) usage exit 1 ;; esac done shift $( expr $OPTIND - 1 ) # Little checks if ! is_int "$THRESHOLD_WARNING" || ! is_int "$THRESHOLD_CRITICAL"; then echo "UNKNOWN invalid parameter : one of the threshold is not an integer." exit $STATE_UNKNOWN fi # Check if the previous data file exists if [ ! -w "$VMSTAT_PREVIOUS_DATA_FILE" ]; then update_vmstat_previous_data_file "$VMSTAT_PREVIOUS_DATA_FILE" # We wait a little bit to gather some data even on the first run # (or we could return an UNKNOWN ?) sleep 2 fi # Read previous data PREVIOUS_PSWPIN="$( grep '^pswpin' "$VMSTAT_PREVIOUS_DATA_FILE" | cut -d " " -f 2 )" PREVIOUS_PSWPOUT="$( grep '^pswpout' "$VMSTAT_PREVIOUS_DATA_FILE" | cut -d " " -f 2 )" PREVIOUS_PSWPTOTAL=$(( $PREVIOUS_PSWPIN + $PREVIOUS_PSWPOUT )) # Get time elapsed since last call PREVIOUS_DATA_AGE=$(( $( date +%s ) - $( stat --printf="%Y" "$VMSTAT_PREVIOUS_DATA_FILE" ) )) if [ "$PREVIOUS_DATA_AGE" -le "0" ]; then echo "UNKNOWN: $PREVIOUS_DATA_AGE second(s) elapsed since last call." exit $STATE_UNKNOWN fi # Update and read current data update_vmstat_previous_data_file "$VMSTAT_PREVIOUS_DATA_FILE" CURRENT_PSWPIN="$( grep '^pswpin' "$VMSTAT_PREVIOUS_DATA_FILE" | cut -d " " -f 2 )" CURRENT_PSWPOUT="$( grep '^pswpout' "$VMSTAT_PREVIOUS_DATA_FILE" | cut -d " " -f 2 )" CURRENT_PSWPTOTAL=$(( $CURRENT_PSWPIN + $CURRENT_PSWPOUT )) # Calculate the swaping rate PSWP_RATE=$(( ( $CURRENT_PSWPTOTAL - $PREVIOUS_PSWPTOTAL ) / $PREVIOUS_DATA_AGE )) # Generate perfdata OUTPUT_PERFDATA="$( printf " pswpin=%d pswpout=%d" "$CURRENT_PSWPIN" "$CURRENT_PSWPOUT" )" # Comparison # note: remember that numbers can be reseted to zero from time to time (64bits counter ?) if [ $PSWP_RATE -gt $THRESHOLD_CRITICAL ]; then echo "CRITICAL swaping rate at $PSWP_RATE (limit at $THRESHOLD_CRITICAL) |$OUTPUT_PERFDATA" exit $STATE_CRITICAL elif [ $PSWP_RATE -gt $THRESHOLD_WARNING ]; then echo "WARNING swaping rate at $PSWP_RATE (limit at $THRESHOLD_WARNING) |$OUTPUT_PERFDATA" exit $STATE_WARNING fi echo "OK swaping rate at $PSWP_RATE |$OUTPUT_PERFDATA" exit $STATE_OK