check_linux_memory: adding OOMKill detection
This commit is contained in:
parent
160c938744
commit
1d609af762
2 changed files with 21 additions and 4 deletions
|
@ -9,6 +9,7 @@
|
||||||
##########################################################
|
##########################################################
|
||||||
|
|
||||||
MEMINFO="/proc/meminfo"
|
MEMINFO="/proc/meminfo"
|
||||||
|
OOMKILLINFO="/proc/vmstat"
|
||||||
|
|
||||||
##########################################################
|
##########################################################
|
||||||
# We call them functions because they're fun
|
# We call them functions because they're fun
|
||||||
|
@ -28,7 +29,7 @@ Options:
|
||||||
-w [0-99] = Your warning %. 20 means 20% of your memory can remain before a warning alarm. Do not use the % sign.
|
-w [0-99] = Your warning %. 20 means 20% of your memory can remain before a warning alarm. Do not use the % sign.
|
||||||
-c [0-99] = Your critical %. 10 means 10% of your memory can remain before a critical alarm. Do not use the % sign.
|
-c [0-99] = Your critical %. 10 means 10% of your memory can remain before a critical alarm. Do not use the % sign.
|
||||||
-d [K,M,G,T] = divider K=kilobytes, M=megabytes, G=gigabytes, T=terabytes
|
-d [K,M,G,T] = divider K=kilobytes, M=megabytes, G=gigabytes, T=terabytes
|
||||||
-f = Included for backwards compatability to older verserions
|
-f = Included for backwards compatability to older versions
|
||||||
-n = Don't Include cached memory as free memory when calculating your percentage free
|
-n = Don't Include cached memory as free memory when calculating your percentage free
|
||||||
EOF
|
EOF
|
||||||
}
|
}
|
||||||
|
@ -52,6 +53,8 @@ while test -n "$1"; do
|
||||||
-c) CRIT="$2"; shift ;;
|
-c) CRIT="$2"; shift ;;
|
||||||
-d) DIV="$2"; shift ;;
|
-d) DIV="$2"; shift ;;
|
||||||
-n) NC=1 ;;
|
-n) NC=1 ;;
|
||||||
|
-k) THRESHOLD_OOMKILL="$2"; shift ;;
|
||||||
|
-K) DISABLE_OOMKILL=1 ;;
|
||||||
esac
|
esac
|
||||||
shift
|
shift
|
||||||
done
|
done
|
||||||
|
@ -64,6 +67,8 @@ done
|
||||||
[ -z "$CRIT" ] && CRIT=10
|
[ -z "$CRIT" ] && CRIT=10
|
||||||
[ -z "$DIV" ] && DIV=M
|
[ -z "$DIV" ] && DIV=M
|
||||||
[ -z "$FC" ] && FC=0
|
[ -z "$FC" ] && FC=0
|
||||||
|
[ -z "$DISABLE_OOMKILL" ] && DISABLE_OOMKILL=""
|
||||||
|
[ -z "$THRESHOLD_OOMKILL" ] && THRESHOLD_OOMKILL=0
|
||||||
|
|
||||||
##############################################
|
##############################################
|
||||||
## Check user input
|
## Check user input
|
||||||
|
@ -88,6 +93,11 @@ esac
|
||||||
exit 1
|
exit 1
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if [ -z "$DISABLE_OOMKILL" ] && ! grep '^oom_kill ' "$OOMKILLINFO" >/dev/null 2>&1; then
|
||||||
|
echo "UNKNOWN threshold set for oom_kill but the counter not available in '$OOMKILLINFO'."
|
||||||
|
exit 3
|
||||||
|
fi
|
||||||
|
|
||||||
##############################################
|
##############################################
|
||||||
## Do the work
|
## Do the work
|
||||||
## Pull the memory file into awk
|
## Pull the memory file into awk
|
||||||
|
@ -95,13 +105,14 @@ esac
|
||||||
## Print the information
|
## Print the information
|
||||||
##############################################
|
##############################################
|
||||||
|
|
||||||
RESULT=$(awk -v warn=$WARN -v crit=$CRIT -v div=$DIV -v divnum=$DIVNUM -v nc=$NC '/^MemTotal:/ { total=$2 }
|
RESULT=$(awk -v warn=$WARN -v crit=$CRIT -v div=$DIV -v divnum=$DIVNUM -v nc=$NC -v disable_oomkill=$DISABLE_OOMKILL -v threshold_oomkill=$THRESHOLD_OOMKILL '/^MemTotal:/ { total=$2 }
|
||||||
/^MemTotal:/ { tot=$2 }
|
/^MemTotal:/ { tot=$2 }
|
||||||
/^MemFree:/ { free=$2 }
|
/^MemFree:/ { free=$2 }
|
||||||
/^Buffers:/ { buff=$2 }
|
/^Buffers:/ { buff=$2 }
|
||||||
/^Cached:/ { cache=$2 }
|
/^Cached:/ { cache=$2 }
|
||||||
/^Active:/ { active=$2 }
|
/^Active:/ { active=$2 }
|
||||||
/^Inactive:/ { inactive=$2 }
|
/^Inactive:/ { inactive=$2 }
|
||||||
|
/^oom_kill / { oomkill=$2 }
|
||||||
END { if ( nc != 1 ) { free=free+cache+buff }
|
END { if ( nc != 1 ) { free=free+cache+buff }
|
||||||
{ freeperct=free/tot*100 }
|
{ freeperct=free/tot*100 }
|
||||||
if ( freeperct > warn ) { result="OK" ; xit="0"}
|
if ( freeperct > warn ) { result="OK" ; xit="0"}
|
||||||
|
@ -109,8 +120,13 @@ END { if ( nc != 1 ) { free=free+cache+buff }
|
||||||
if ( freeperct > crit ) { result="WARNING" ; xit="1" }
|
if ( freeperct > crit ) { result="WARNING" ; xit="1" }
|
||||||
else if ( freeperct <= crit ) { result="CRITICAL" ; xit="2" }
|
else if ( freeperct <= crit ) { result="CRITICAL" ; xit="2" }
|
||||||
}
|
}
|
||||||
{print xit" MEMORY "result" - "freeperct"% Free - Total:"tot/divnum div" Active:"active/divnum div" Inactive:"inactive/divnum div" Buffers:"buff/divnum div" Cached:"cache/divnum div" |Free="freeperct";"warn";"crit";0 Active="active";0;0;0 Inactive="inactive";0;0;0 Buffers="buff";0;0;0 Cached="cache";0;0;0" }
|
if ( disable_oomkill != 1 ) {
|
||||||
}' /proc/meminfo)
|
oomkill_display=" OOMKills:"oomkill
|
||||||
|
oomkill_perfdata=" oomkill="oomkill";;"threshold_oomkill";0"
|
||||||
|
if ( oomkill > threshold_oomkill ) { result="CRITICAL - Out of memory kills detected" ; xit="2" }
|
||||||
|
}
|
||||||
|
{print xit" MEMORY "result" - "freeperct"% Free - Total:"tot/divnum div" Active:"active/divnum div" Inactive:"inactive/divnum div" Buffers:"buff/divnum div" Cached:"cache/divnum div" "oomkill_display" |Free="freeperct";"warn";"crit";0 Active="active";0;0;0 Inactive="inactive";0;0;0 Buffers="buff";0;0;0 Cached="cache";0;0;0"oomkill_perfdata }
|
||||||
|
}' "$MEMINFO" "$OOMKILLINFO" )
|
||||||
|
|
||||||
echo ${RESULT#* }
|
echo ${RESULT#* }
|
||||||
exit ${RESULT%% *}
|
exit ${RESULT%% *}
|
||||||
|
|
|
@ -4,6 +4,7 @@ command[check_load]=/usr/lib/nagios/plugins/check_load -w 1,1,1 -c 3,2,2
|
||||||
command[check_network_volume]=/usr/local/share/scripts-admin/nagios/check_network_volume.sh
|
command[check_network_volume]=/usr/local/share/scripts-admin/nagios/check_network_volume.sh
|
||||||
command[check_swaping]=/usr/local/share/scripts-admin/nagios/check_swaping.sh
|
command[check_swaping]=/usr/local/share/scripts-admin/nagios/check_swaping.sh
|
||||||
command[check_swap]=/usr/lib/nagios/plugins/check_swap -w 60% -c 30%
|
command[check_swap]=/usr/lib/nagios/plugins/check_swap -w 60% -c 30%
|
||||||
|
command[check_linux_memory]=/usr/local/share/scripts-admin/nagios/check_linux_memory.sh
|
||||||
|
|
||||||
# Petite commande temporaire pour étudier souci neighbour table overflow
|
# Petite commande temporaire pour étudier souci neighbour table overflow
|
||||||
command[check_network-neighbour-table]=/usr/local/share/scripts-admin/nagios/check_network-neighbour-table.sh
|
command[check_network-neighbour-table]=/usr/local/share/scripts-admin/nagios/check_network-neighbour-table.sh
|
||||||
|
|
Loading…
Reference in a new issue