diff --git a/nagios/check_linux_memory.sh b/nagios/check_linux_memory.sh index ff037ac..05dff7a 100755 --- a/nagios/check_linux_memory.sh +++ b/nagios/check_linux_memory.sh @@ -6,6 +6,7 @@ # Nagios script to check memory usage on linux server # version 1.3.0 # +# Deprecated and soon to be deleted: see version 2.0.0 with reworked perfdata. ########################################################## MEMINFO="/proc/meminfo" diff --git a/nagios/check_linux_memory2.sh b/nagios/check_linux_memory2.sh new file mode 100755 index 0000000..f0bf4c4 --- /dev/null +++ b/nagios/check_linux_memory2.sh @@ -0,0 +1,148 @@ +#!/bin/sh +# +# Plugin to check system memory +# by hugme (nagios@hugme.org) +# You can find my checks here: https://github.com/hugme/Nag_checks +# Nagios script to check memory usage on linux server +# version 2.0.0 +# +########################################################## + +MEMINFO="/proc/meminfo" +OOMKILLINFO="/proc/vmstat" + +########################################################## +# We call them functions because they're fun +########################################################## + +print_help() { +cat << EOF +Linux Memory Plugin for Nagios +Copyright (c) hugme (nagios@hugme.org) +Version: 1.2.0 +Last Modified: 10-07-2014 +License: This software can be used for free unless I meet you, then you owe me lunch. + +Usage: check_linux_memory -w [warning %] -c [critical %] + +Options: + -w [0-99] = Your warning %. 20 means 20% of your memory can remain before a warning alarm. Do not use the % sign. + -c [0-99] = Your critical %. 10 means 10% of your memory can remain before a critical alarm. Do not use the % sign. + -d [K,M,G,T] = divider K=kilobytes, M=megabytes, G=gigabytes, T=terabytes + -f = Included for backwards compatability to older versions + -n = Don't Include cached memory as free memory when calculating your percentage free + -K = Don't check the OutOfMemory Kill counter + -k [0-9999] = Threshold for OOMKill alert (default: 0) +EOF + } + +invalid_type() { + echo "\nInvalid $1\n" + print_help + exit 3 + } + +############################################## +## Suck in the user input +############################################## + + +while test -n "$1"; do + case $1 in + --help) print_help ; exit 0 ;; + -h) print_help ; exit 0 ;; + -w) WARN="$2"; shift ;; + -c) CRIT="$2"; shift ;; + -d) DIV="$2"; shift ;; + -n) NC=1 ;; + -k) THRESHOLD_OOMKILL="$2"; shift ;; + -K) DISABLE_OOMKILL=1 ;; + esac + shift +done + +############################################## +## Set the defaults if needed +############################################## + +[ -z "$WARN" ] && WARN=20 +[ -z "$CRIT" ] && CRIT=10 +[ -z "$DIV" ] && DIV=M +[ -z "$FC" ] && FC=0 +[ -z "$DISABLE_OOMKILL" ] && DISABLE_OOMKILL="" +[ -z "$THRESHOLD_OOMKILL" ] && THRESHOLD_OOMKILL=0 + +############################################## +## Check user input +############################################## + +[ ! -z `echo $WARN | tr -d [:digit:]` ] && invalid_type "Warning: Warning value can only contain numbers" +[ ! -z `echo $CRIT | tr -d [:digit:]` ] && invalid_type "Critical: Critical value can only contain numbers" +[ "${WARN%.*}" -ge 100 ] && invalid_type "Warning: Warning must be smaller than 100%" +[ "${CRIT%.*}" -ge 100 ] && invalid_type "Critical: Critical must be smaller than 100%" +[ "${CRIT%.*}" -gt "${WARN%.*}" ] && invalid_type "Critical: Your Warning must be Higher than your Critical" + +case $DIV in + k|K) DIVNUM=1024;; + m|M) DIVNUM=1048576;; + g|G) DIVNUM=1073741824;; + t|T) DIVNUM=1099511627776;; + *) invalid_type;; +esac + +[ ! -f "$MEMINFO" ] && { + echo "Your Memory info file seems to be missing" + exit 1 + } + +if [ -z "$DISABLE_OOMKILL" ] && ! grep '^oom_kill ' "$OOMKILLINFO" >/dev/null 2>&1; then + echo "UNKNOWN threshold set for oom_kill but the counter not available in '$OOMKILLINFO'." + exit 3 +fi + +############################################## +## Do the work +## Pull the memory file into awk +## grab the lines we need +## Print the information +############################################## + +RESULT=$(awk -v warnperct=$WARN -v critperct=$CRIT -v div=$DIV -v divnum=$DIVNUM -v nc=$NC -v disable_oomkill=$DISABLE_OOMKILL -v threshold_oomkill=$THRESHOLD_OOMKILL ' +{ + UnitQuantity["B"]=1 + UnitQuantity["kB"]=1024 + UnitQuantity["MB"]=1048576 + UnitQuantity["GB"]=1073741824 + UnitQuantity["TB"]=1099511627776 +} +/^MemTotal:/ { tot=$2*UnitQuantity[$3] } +/^MemFree:/ { free=$2*UnitQuantity[$3] } +/^Buffers:/ { buff=$2*UnitQuantity[$3] } +/^Cached:/ { cache=$2*UnitQuantity[$3] } +/^Active:/ { active=$2*UnitQuantity[$3] } +/^Inactive:/ { inactive=$2*UnitQuantity[$3] } +/^oom_kill / { oomkill=$2 } +END { if ( nc != 1 ) { free=free+cache+buff } + { + freeperct=free/tot*100 + warn=int(warnperct*tot/100) # convert to int to avoid dealing with weird display + crit=int(critperct*tot/100) + } + if ( freeperct > warnperct ) { result="OK" ; xit="0"} + if ( freeperct <= warnperct ) { + if ( freeperct > critperct ) { result="WARNING" ; xit="1" } + else if ( freeperct <= critperct ) { result="CRITICAL" ; xit="2" } + } + if ( disable_oomkill != 1 ) { + oomkill_display=" OOMKills:"oomkill + oomkill_perfdata=" oomkill="oomkill";;"threshold_oomkill";0" + if ( oomkill > threshold_oomkill ) { result="CRITICAL - Out of memory kills detected" ; xit="2" } + } + { + print xit" MEMORY "result" - "freeperct"% Free - Total:"tot/divnum div" Active:"active/divnum div" Inactive:"inactive/divnum div" Buffers:"buff/divnum div" Cached:"cache/divnum div" "oomkill_display" |Active="active"B;0;0;0 Buffers="buff"B;0;0;0 Cached="cache"B;0;0;0 Free="free"B;"warn";"crit";0 Inactive="inactive"B;0;0;0"oomkill_perfdata + } +}' "$MEMINFO" "$OOMKILLINFO" ) + +echo ${RESULT#* } +exit ${RESULT%% *} + diff --git a/nagios/etc/30_nrpe-basic.cfg b/nagios/etc/30_nrpe-basic.cfg index 03f53a2..a490e11 100644 --- a/nagios/etc/30_nrpe-basic.cfg +++ b/nagios/etc/30_nrpe-basic.cfg @@ -5,7 +5,7 @@ command[check_load]=/usr/lib/nagios/plugins/check_load -w 1,1,1 -c 3,2,2 command[check_network_volume]=/usr/local/share/scripts-admin/nagios/check_network_volume.sh command[check_swaping]=/usr/local/share/scripts-admin/nagios/check_swaping.sh command[check_swap]=/usr/lib/nagios/plugins/check_swap -w 60% -c 30% -command[check_linux_memory]=/usr/local/share/scripts-admin/nagios/check_linux_memory.sh +command[check_linux_memory]=/usr/local/share/scripts-admin/nagios/check_linux_memory2.sh # Petite commande temporaire pour étudier souci neighbour table overflow command[check_network-neighbour-table]=/usr/local/share/scripts-admin/nagios/check_network-neighbour-table.sh