nagios: reworked perfdata for check_linux_memory
This commit is contained in:
parent
94b53010c8
commit
57fbb41825
3 changed files with 150 additions and 1 deletions
|
@ -6,6 +6,7 @@
|
|||
# Nagios script to check memory usage on linux server
|
||||
# version 1.3.0
|
||||
#
|
||||
# Deprecated and soon to be deleted: see version 2.0.0 with reworked perfdata.
|
||||
##########################################################
|
||||
|
||||
MEMINFO="/proc/meminfo"
|
||||
|
|
148
nagios/check_linux_memory2.sh
Executable file
148
nagios/check_linux_memory2.sh
Executable file
|
@ -0,0 +1,148 @@
|
|||
#!/bin/sh
|
||||
#
|
||||
# Plugin to check system memory
|
||||
# by hugme (nagios@hugme.org)
|
||||
# You can find my checks here: https://github.com/hugme/Nag_checks
|
||||
# Nagios script to check memory usage on linux server
|
||||
# version 2.0.0
|
||||
#
|
||||
##########################################################
|
||||
|
||||
MEMINFO="/proc/meminfo"
|
||||
OOMKILLINFO="/proc/vmstat"
|
||||
|
||||
##########################################################
|
||||
# We call them functions because they're fun
|
||||
##########################################################
|
||||
|
||||
print_help() {
|
||||
cat << EOF
|
||||
Linux Memory Plugin for Nagios
|
||||
Copyright (c) hugme (nagios@hugme.org)
|
||||
Version: 1.2.0
|
||||
Last Modified: 10-07-2014
|
||||
License: This software can be used for free unless I meet you, then you owe me lunch.
|
||||
|
||||
Usage: check_linux_memory -w [warning %] -c [critical %]
|
||||
|
||||
Options:
|
||||
-w [0-99] = Your warning %. 20 means 20% of your memory can remain before a warning alarm. Do not use the % sign.
|
||||
-c [0-99] = Your critical %. 10 means 10% of your memory can remain before a critical alarm. Do not use the % sign.
|
||||
-d [K,M,G,T] = divider K=kilobytes, M=megabytes, G=gigabytes, T=terabytes
|
||||
-f = Included for backwards compatability to older versions
|
||||
-n = Don't Include cached memory as free memory when calculating your percentage free
|
||||
-K = Don't check the OutOfMemory Kill counter
|
||||
-k [0-9999] = Threshold for OOMKill alert (default: 0)
|
||||
EOF
|
||||
}
|
||||
|
||||
invalid_type() {
|
||||
echo "\nInvalid $1\n"
|
||||
print_help
|
||||
exit 3
|
||||
}
|
||||
|
||||
##############################################
|
||||
## Suck in the user input
|
||||
##############################################
|
||||
|
||||
|
||||
while test -n "$1"; do
|
||||
case $1 in
|
||||
--help) print_help ; exit 0 ;;
|
||||
-h) print_help ; exit 0 ;;
|
||||
-w) WARN="$2"; shift ;;
|
||||
-c) CRIT="$2"; shift ;;
|
||||
-d) DIV="$2"; shift ;;
|
||||
-n) NC=1 ;;
|
||||
-k) THRESHOLD_OOMKILL="$2"; shift ;;
|
||||
-K) DISABLE_OOMKILL=1 ;;
|
||||
esac
|
||||
shift
|
||||
done
|
||||
|
||||
##############################################
|
||||
## Set the defaults if needed
|
||||
##############################################
|
||||
|
||||
[ -z "$WARN" ] && WARN=20
|
||||
[ -z "$CRIT" ] && CRIT=10
|
||||
[ -z "$DIV" ] && DIV=M
|
||||
[ -z "$FC" ] && FC=0
|
||||
[ -z "$DISABLE_OOMKILL" ] && DISABLE_OOMKILL=""
|
||||
[ -z "$THRESHOLD_OOMKILL" ] && THRESHOLD_OOMKILL=0
|
||||
|
||||
##############################################
|
||||
## Check user input
|
||||
##############################################
|
||||
|
||||
[ ! -z `echo $WARN | tr -d [:digit:]` ] && invalid_type "Warning: Warning value can only contain numbers"
|
||||
[ ! -z `echo $CRIT | tr -d [:digit:]` ] && invalid_type "Critical: Critical value can only contain numbers"
|
||||
[ "${WARN%.*}" -ge 100 ] && invalid_type "Warning: Warning must be smaller than 100%"
|
||||
[ "${CRIT%.*}" -ge 100 ] && invalid_type "Critical: Critical must be smaller than 100%"
|
||||
[ "${CRIT%.*}" -gt "${WARN%.*}" ] && invalid_type "Critical: Your Warning must be Higher than your Critical"
|
||||
|
||||
case $DIV in
|
||||
k|K) DIVNUM=1024;;
|
||||
m|M) DIVNUM=1048576;;
|
||||
g|G) DIVNUM=1073741824;;
|
||||
t|T) DIVNUM=1099511627776;;
|
||||
*) invalid_type;;
|
||||
esac
|
||||
|
||||
[ ! -f "$MEMINFO" ] && {
|
||||
echo "Your Memory info file seems to be missing"
|
||||
exit 1
|
||||
}
|
||||
|
||||
if [ -z "$DISABLE_OOMKILL" ] && ! grep '^oom_kill ' "$OOMKILLINFO" >/dev/null 2>&1; then
|
||||
echo "UNKNOWN threshold set for oom_kill but the counter not available in '$OOMKILLINFO'."
|
||||
exit 3
|
||||
fi
|
||||
|
||||
##############################################
|
||||
## Do the work
|
||||
## Pull the memory file into awk
|
||||
## grab the lines we need
|
||||
## Print the information
|
||||
##############################################
|
||||
|
||||
RESULT=$(awk -v warnperct=$WARN -v critperct=$CRIT -v div=$DIV -v divnum=$DIVNUM -v nc=$NC -v disable_oomkill=$DISABLE_OOMKILL -v threshold_oomkill=$THRESHOLD_OOMKILL '
|
||||
{
|
||||
UnitQuantity["B"]=1
|
||||
UnitQuantity["kB"]=1024
|
||||
UnitQuantity["MB"]=1048576
|
||||
UnitQuantity["GB"]=1073741824
|
||||
UnitQuantity["TB"]=1099511627776
|
||||
}
|
||||
/^MemTotal:/ { tot=$2*UnitQuantity[$3] }
|
||||
/^MemFree:/ { free=$2*UnitQuantity[$3] }
|
||||
/^Buffers:/ { buff=$2*UnitQuantity[$3] }
|
||||
/^Cached:/ { cache=$2*UnitQuantity[$3] }
|
||||
/^Active:/ { active=$2*UnitQuantity[$3] }
|
||||
/^Inactive:/ { inactive=$2*UnitQuantity[$3] }
|
||||
/^oom_kill / { oomkill=$2 }
|
||||
END { if ( nc != 1 ) { free=free+cache+buff }
|
||||
{
|
||||
freeperct=free/tot*100
|
||||
warn=int(warnperct*tot/100) # convert to int to avoid dealing with weird display
|
||||
crit=int(critperct*tot/100)
|
||||
}
|
||||
if ( freeperct > warnperct ) { result="OK" ; xit="0"}
|
||||
if ( freeperct <= warnperct ) {
|
||||
if ( freeperct > critperct ) { result="WARNING" ; xit="1" }
|
||||
else if ( freeperct <= critperct ) { result="CRITICAL" ; xit="2" }
|
||||
}
|
||||
if ( disable_oomkill != 1 ) {
|
||||
oomkill_display=" OOMKills:"oomkill
|
||||
oomkill_perfdata=" oomkill="oomkill";;"threshold_oomkill";0"
|
||||
if ( oomkill > threshold_oomkill ) { result="CRITICAL - Out of memory kills detected" ; xit="2" }
|
||||
}
|
||||
{
|
||||
print xit" MEMORY "result" - "freeperct"% Free - Total:"tot/divnum div" Active:"active/divnum div" Inactive:"inactive/divnum div" Buffers:"buff/divnum div" Cached:"cache/divnum div" "oomkill_display" |Active="active"B;0;0;0 Buffers="buff"B;0;0;0 Cached="cache"B;0;0;0 Free="free"B;"warn";"crit";0 Inactive="inactive"B;0;0;0"oomkill_perfdata
|
||||
}
|
||||
}' "$MEMINFO" "$OOMKILLINFO" )
|
||||
|
||||
echo ${RESULT#* }
|
||||
exit ${RESULT%% *}
|
||||
|
|
@ -5,7 +5,7 @@ command[check_load]=/usr/lib/nagios/plugins/check_load -w 1,1,1 -c 3,2,2
|
|||
command[check_network_volume]=/usr/local/share/scripts-admin/nagios/check_network_volume.sh
|
||||
command[check_swaping]=/usr/local/share/scripts-admin/nagios/check_swaping.sh
|
||||
command[check_swap]=/usr/lib/nagios/plugins/check_swap -w 60% -c 30%
|
||||
command[check_linux_memory]=/usr/local/share/scripts-admin/nagios/check_linux_memory.sh
|
||||
command[check_linux_memory]=/usr/local/share/scripts-admin/nagios/check_linux_memory2.sh
|
||||
|
||||
# Petite commande temporaire pour étudier souci neighbour table overflow
|
||||
command[check_network-neighbour-table]=/usr/local/share/scripts-admin/nagios/check_network-neighbour-table.sh
|
||||
|
|
Loading…
Reference in a new issue