nagios: ajout check_lxc_mem
This commit is contained in:
parent
e4c8cf28b0
commit
5caf403901
1 changed files with 369 additions and 0 deletions
369
nagios/check_lxc_mem.sh
Executable file
369
nagios/check_lxc_mem.sh
Executable file
|
@ -0,0 +1,369 @@
|
|||
#!/bin/sh
|
||||
|
||||
# TODO:
|
||||
# - high dans une métrique dédiée ? peak aussi ?
|
||||
# - -S switch to add swap usage
|
||||
# - -W to use memory.high as warning range
|
||||
|
||||
# Little check for memory usage of LXC containers
|
||||
# GPL v3+ (copyright chl-dev@bugness.org)
|
||||
#
|
||||
# This was writtent with LXC 5 and cgroup 2 in mind.
|
||||
# For older versions, check the excellent
|
||||
# https://www.claudiokuenzler.com/monitoring-plugins/check_lxc.php
|
||||
|
||||
PROGPATH=$( echo $0 | sed -e 's,[\\/][^\\/][^\\/]*$,,' )
|
||||
REVISION="0.1"
|
||||
|
||||
# Stop at the first non-catched error
|
||||
set -e
|
||||
|
||||
# Disable localization to uniformize commands' output
|
||||
LANG="C"
|
||||
|
||||
# Include check_range()
|
||||
# Not needed at the moment
|
||||
#. $PROGPATH/utils.sh
|
||||
STATE_OK=0
|
||||
STATE_WARNING=1
|
||||
STATE_CRITICAL=2
|
||||
STATE_UNKNOWN=3
|
||||
STATE_DEPENDENT=4
|
||||
|
||||
# Default values
|
||||
RANGE_MEMORY_WARNING="20%"
|
||||
RANGE_MEMORY_CRITICAL="10%"
|
||||
RANGE_SWAP_WARNING="20%"
|
||||
RANGE_SWAP_CRITICAL="10%"
|
||||
ADD_MEMORY_PEAK_TO_PERFDATA=0
|
||||
MEMORY_TOTAL_SYSTEM="$( free -b | sed -n '/^Mem:/s/^Mem:[[:space:]]*\([[:digit:]]\+\)[[:space:]].*/\1/p' )"
|
||||
SWAP_TOTAL_SYSTEM="$( free -b | sed -n '/^Swap:/s/^Swap:[[:space:]]*\([[:digit:]]\+\)[[:space:]].*/\1/p' )"
|
||||
USE_MEMORY_HIGH_AS_WARNING=0
|
||||
|
||||
# Initial values
|
||||
NB_CHECKED_CONTAINERS=0
|
||||
|
||||
# Output
|
||||
OUTPUT_EXIT_STATUS=$STATE_OK
|
||||
OUTPUT_DETAIL_OK=""
|
||||
OUTPUT_DETAIL_WARNING=""
|
||||
OUTPUT_DETAIL_CRITICAL=""
|
||||
OUTPUT_PERFDATA=""
|
||||
|
||||
#
|
||||
# Help function
|
||||
#
|
||||
usage() {
|
||||
cat <<EOF
|
||||
Usage :
|
||||
$0 [-w warning-free-memory%] [-c critical-free-memory%] [-W] [-P] -n <container|ALL> -n container ...
|
||||
|
||||
Example :
|
||||
./check_lxc_mem.sh -w 20% -c 10% -n monitoring.example.net
|
||||
|
||||
-P : add a perfdata entry for memory.peak
|
||||
|
||||
We adopt the same behaviour than check_disks so thresholds are matched
|
||||
against "free" space. The example above will trigger a warning if less
|
||||
than 20% of memory.max is available.
|
||||
|
||||
Thresholds can be specified with a '%' or without, in which case the unit
|
||||
is the byte, and with the keyword 'high', instructing to use the memory.high
|
||||
value as a threshold.
|
||||
|
||||
Note: Since the containers are checked against the latest thresholds specified, order
|
||||
of the arguments is important. Ex:
|
||||
./check_lxc_mem -w 20% -n container1 -w 50% -n container2
|
||||
|
||||
Default values:
|
||||
warning-free-memory: $RANGE_MEMORY_WARNING
|
||||
critical-free-memory: $RANGE_MEMORY_CRITICAL
|
||||
EOF
|
||||
}
|
||||
|
||||
check_range_syntax() {
|
||||
return 0
|
||||
}
|
||||
# check_range 0 "$1" >/dev/null 2>&1
|
||||
# if [ "$?" -eq "2" ]; then
|
||||
# return 1
|
||||
# fi
|
||||
# return 0
|
||||
#}
|
||||
|
||||
#
|
||||
# Remove the '%' suffix (or error if no '%')
|
||||
#
|
||||
get_percent() {
|
||||
if [ "$( echo -n "$1" | tail -c 1 )" = "%" ]; then
|
||||
echo -n "$1" | head -c -1
|
||||
return 0
|
||||
fi
|
||||
return 1
|
||||
}
|
||||
|
||||
#
|
||||
# Uniformize thresholds:
|
||||
# if "20%" then calculate
|
||||
# else leave as is
|
||||
# args:
|
||||
# 1: MAXVALUE
|
||||
# 2: THRESHOLD
|
||||
#
|
||||
get_absolute_threshold() {
|
||||
local MAXVALUE="$1"
|
||||
local THRESHOLD="$2"
|
||||
|
||||
if get_percent "$THRESHOLD" >/dev/null; then
|
||||
echo "$(( $MAXVALUE * ( 100 - $( get_percent "$THRESHOLD" ) ) / 100 ))"
|
||||
else
|
||||
echo "$(( $MAXVALUE - $THRESHOLD ))"
|
||||
fi
|
||||
}
|
||||
|
||||
#
|
||||
# Returns 1 if value is above threshold, 0 if not
|
||||
#
|
||||
# args:
|
||||
# 1: value
|
||||
# 2: maximum
|
||||
# 3: threshold (in percent with a '%' suffix, or plain unit)
|
||||
#
|
||||
check_above_threshold() {
|
||||
local VALUE="$1"
|
||||
local MAXVALUE="$2"
|
||||
local THRESHOLD="$3"
|
||||
|
||||
if [ "$VALUE" -gt "$( get_absolute_threshold "$MAXVALUE" "$THRESHOLD" )" ]; then
|
||||
return 1
|
||||
fi
|
||||
return 0
|
||||
}
|
||||
|
||||
#
|
||||
# The actual check, done in a function to factorize
|
||||
# the code between '-n container1' and '-n ALL' calls
|
||||
#
|
||||
# WARNING: this function modifies global variables.
|
||||
#
|
||||
# args:
|
||||
# 1: container's name
|
||||
check_single_container() {
|
||||
CONTAINER="$1"
|
||||
|
||||
# Get the values for the current container
|
||||
MEMORY_CURRENT=$( lxc-cgroup -n "$CONTAINER" memory.current )
|
||||
MEMORY_HIGH=$( lxc-cgroup -n "$CONTAINER" memory.high )
|
||||
MEMORY_MAX=$( lxc-cgroup -n "$CONTAINER" memory.max )
|
||||
SWAP_CURRENT=$( lxc-cgroup -n "$CONTAINER" memory.swap.current )
|
||||
SWAP_HIGH=$( lxc-cgroup -n "$CONTAINER" memory.swap.high )
|
||||
SWAP_MAX=$( lxc-cgroup -n "$CONTAINER" memory.swap.max )
|
||||
# memory.peak is not available everywhere, so don't consult it carelessly.
|
||||
[ "$ADD_MEMORY_PEAK_TO_PERFDATA" = "1" ] && MEMORY_PEAK=$( lxc-cgroup -n $CONTAINER memory.peak )
|
||||
|
||||
# Replace "max" values
|
||||
[ "$MEMORY_MAX" = "max" ] && MEMORY_MAX="$MEMORY_TOTAL_SYSTEM"
|
||||
[ "$MEMORY_HIGH" = "max" ] && MEMORY_HIGH="$MEMORY_TOTAL_SYSTEM"
|
||||
[ "$SWAP_MAX" = "max" ] && SWAP_MAX="$SWAP_TOTAL_SYSTEM"
|
||||
[ "$SWAP_HIGH" = "max" ] && SWAP_HIGH="$SWAP_TOTAL_SYSTEM"
|
||||
|
||||
# If the ranges parameters are set to 'high', we use
|
||||
# memory.high/memory.swap.high as the threshold's value.
|
||||
if [ "$RANGE_MEMORY_WARNING" = "high" ]; then
|
||||
THRESHOLD_MEMORY_WARNING="$MEMORY_HIGH"
|
||||
else
|
||||
THRESHOLD_MEMORY_WARNING="$( get_absolute_threshold "$MEMORY_MAX" "$RANGE_MEMORY_WARNING" )"
|
||||
fi
|
||||
if [ "$RANGE_MEMORY_CRITICAL" = "high" ]; then
|
||||
THRESHOLD_MEMORY_CRITICAL="$MEMORY_HIGH"
|
||||
else
|
||||
THRESHOLD_MEMORY_CRITICAL="$( get_absolute_threshold "$MEMORY_MAX" "$RANGE_MEMORY_CRITICAL" )"
|
||||
fi
|
||||
if [ "$RANGE_SWAP_WARNING" = "high" ]; then
|
||||
THRESHOLD_SWAP_WARNING="$SWAP_HIGH"
|
||||
else
|
||||
THRESHOLD_SWAP_WARNING="$( get_absolute_threshold "$SWAP_MAX" "$RANGE_SWAP_WARNING" )"
|
||||
fi
|
||||
if [ "$RANGE_SWAP_CRITICAL" = "high" ]; then
|
||||
THRESHOLD_SWAP_CRITICAL="$SWAP_HIGH"
|
||||
else
|
||||
THRESHOLD_SWAP_CRITICAL="$( get_absolute_threshold "$SWAP_MAX" "$RANGE_SWAP_CRITICAL" )"
|
||||
fi
|
||||
|
||||
# Fill the perfdata
|
||||
# Remember:
|
||||
# 'label'=value[UOM];[warn];[crit];[min];[max]
|
||||
# label can contain any characters except the equals sign or single quote (')
|
||||
# (except pnp4nagios has trouble with '<>'...)
|
||||
OUTPUT_PERFDATA="$( printf "%s\n'container_%s'=%dB;%d;%d;0;%d" \
|
||||
"$OUTPUT_PERFDATA" \
|
||||
"$( echo $CONTAINER | tr "'=<>" "_" )" \
|
||||
"$MEMORY_CURRENT" \
|
||||
"$THRESHOLD_MEMORY_WARNING" \
|
||||
"$THRESHOLD_MEMORY_CRITICAL" \
|
||||
"$MEMORY_MAX" \
|
||||
)"
|
||||
OUTPUT_PERFDATA="$( printf "%s\n'contswap_%s'=%dB;%d;%d;0;%d" \
|
||||
"$OUTPUT_PERFDATA" \
|
||||
"$( echo $CONTAINER | tr "'=<>" "_" )" \
|
||||
"$SWAP_CURRENT" \
|
||||
"$THRESHOLD_SWAP_WARNING" \
|
||||
"$THRESHOLD_SWAP_CRITICAL" \
|
||||
"$SWAP_MAX" \
|
||||
)"
|
||||
|
||||
# Following the homnymous setting, we add the peak memory consumption to
|
||||
# the perfdata.
|
||||
if [ "$ADD_MEMORY_PEAK_TO_PERFDATA" = "1" ]; then
|
||||
OUTPUT_PERFDATA="$( printf "%s\n'contpeak_%s'=%dB;%d;%d;0;%d" \
|
||||
"$OUTPUT_PERFDATA" \
|
||||
"$( echo $CONTAINER | tr "'=<>" "_" )" \
|
||||
"$MEMORY_PEAK" \
|
||||
"$THRESHOLD_MEMORY_WARNING" \
|
||||
"$THRESHOLD_MEMORY_CRITICAL" \
|
||||
"$MEMORY_MAX" \
|
||||
)"
|
||||
fi
|
||||
|
||||
# Is the value above critical or warning threshold ?
|
||||
if [ "$MEMORY_CURRENT" -gt "$THRESHOLD_MEMORY_CRITICAL" ]; then
|
||||
# Critical state
|
||||
OUTPUT_EXIT_STATUS="$STATE_CRITICAL"
|
||||
OUTPUT_DETAIL_CRITICAL="$OUTPUT_DETAIL_CRITICAL container $CONTAINER uses $( echo $MEMORY_CURRENT | pretty_formatter ) over $( echo $THRESHOLD_MEMORY_CRITICAL | pretty_formatter ) (max: $( echo $MEMORY_MAX | pretty_formatter ))"
|
||||
elif [ "$MEMORY_CURRENT" -gt "$THRESHOLD_MEMORY_WARNING" ]; then
|
||||
# Warning state : let's change the exit status (if not already at a upper level)
|
||||
[ "$OUTPUT_EXIT_STATUS" != "$STATE_CRITICAL" ] && OUTPUT_EXIT_STATUS="$STATE_WARNING"
|
||||
OUTPUT_DETAIL_WARNING="$OUTPUT_DETAIL_WARNING container $CONTAINER uses $( echo $MEMORY_CURRENT | pretty_formatter ) over $( echo $THRESHOLD_MEMORY_WARNING | pretty_formatter ) (max: $( echo $MEMORY_MAX | pretty_formatter ))"
|
||||
fi
|
||||
if [ "$SWAP_CURRENT" -gt "$THRESHOLD_SWAP_CRITICAL" ]; then
|
||||
# Critical state
|
||||
OUTPUT_EXIT_STATUS="$STATE_CRITICAL"
|
||||
OUTPUT_DETAIL_CRITICAL="$OUTPUT_DETAIL_CRITICAL container $CONTAINER uses $( echo $SWAP_CURRENT | pretty_formatter ) over $( echo $THRESHOLD_SWAP_CRITICAL | pretty_formatter ) (max: $( echo $SWAP_MAX | pretty_formatter ))"
|
||||
elif [ "$SWAP_CURRENT" -gt "$THRESHOLD_SWAP_WARNING" ]; then
|
||||
# Warning state : let's change the exit status (if not already at a upper level)
|
||||
[ "$OUTPUT_EXIT_STATUS" != "$STATE_CRITICAL" ] && OUTPUT_EXIT_STATUS="$STATE_WARNING"
|
||||
OUTPUT_DETAIL_WARNING="$OUTPUT_DETAIL_WARNING container $CONTAINER uses $( echo $SWAP_CURRENT | pretty_formatter ) over $( echo $THRESHOLD_SWAP_WARNING | pretty_formatter ) (max: $( echo $SWAP_MAX | pretty_formatter ))"
|
||||
fi
|
||||
|
||||
# Little counter, 'cause we never know, it might be useful :)
|
||||
NB_CHECKED_CONTAINERS=$(( $NB_CHECKED_CONTAINERS + 1 ))
|
||||
}
|
||||
|
||||
#
|
||||
# Try to get a pretty formatter and
|
||||
# fall back to "cat"
|
||||
#
|
||||
# Usage: echo "1024" | pretty_formatter
|
||||
#
|
||||
pretty_formatter() {
|
||||
if command -v numfmt >/dev/null 2>&1; then
|
||||
numfmt --to si
|
||||
else
|
||||
cat -
|
||||
fi
|
||||
}
|
||||
|
||||
# Some early checks
|
||||
# - The following base commands are required
|
||||
for cmd in head tail sed lxc-ls lxc-cgroup free sort tr; do
|
||||
if ! command -v $cmd 1>/dev/null
|
||||
then echo "UNKNOWN: $cmd not found, please check if command exists and PATH is correct"
|
||||
exit $STATE_UNKNOWN
|
||||
fi
|
||||
done
|
||||
# - Check if memory accounting is enableda
|
||||
if ! grep -E '^memory[[:space:]].*[[:space:]]1$' /proc/cgroups >/dev/null 2>&1; then
|
||||
echo "cgroup is not defined as kernel cmdline parameter (cgroup_enable=memory)"
|
||||
exit $STATE_UNKNOWN
|
||||
fi
|
||||
|
||||
#
|
||||
# Loop on parameters + tests
|
||||
#
|
||||
while getopts hw:c:W:C:n:P f; do
|
||||
case "$f" in
|
||||
'h')
|
||||
usage
|
||||
exit
|
||||
;;
|
||||
|
||||
'w')
|
||||
if check_range_syntax "$OPTARG" >/dev/null; then
|
||||
RANGE_MEMORY_WARNING="$OPTARG"
|
||||
else
|
||||
echo "UNKNOWN: invalid range."
|
||||
exit 3
|
||||
fi
|
||||
;;
|
||||
|
||||
'c')
|
||||
if check_range_syntax "$OPTARG" >/dev/null; then
|
||||
RANGE_MEMORY_CRITICAL="$OPTARG"
|
||||
else
|
||||
echo "UNKNOWN: invalid range."
|
||||
exit 3
|
||||
fi
|
||||
;;
|
||||
|
||||
'W')
|
||||
if check_range_syntax "$OPTARG" >/dev/null; then
|
||||
RANGE_SWAP_WARNING="$OPTARG"
|
||||
else
|
||||
echo "UNKNOWN: invalid range."
|
||||
exit 3
|
||||
fi
|
||||
;;
|
||||
|
||||
'C')
|
||||
if check_range_syntax "$OPTARG" >/dev/null; then
|
||||
RANGE_SWAP_CRITICAL="$OPTARG"
|
||||
else
|
||||
echo "UNKNOWN: invalid range."
|
||||
exit 3
|
||||
fi
|
||||
;;
|
||||
|
||||
'n')
|
||||
# Immediatly launch the check on this/those container(s)
|
||||
if [ "$OPTARG" = "ALL" ]; then
|
||||
for CONTAINER in $( lxc-ls --running ); do
|
||||
check_single_container "$CONTAINER"
|
||||
done
|
||||
else
|
||||
check_single_container "$OPTARG"
|
||||
fi
|
||||
;;
|
||||
|
||||
'P')
|
||||
# Toggle the insertion of memory.peak as a perfdata entry
|
||||
ADD_MEMORY_PEAK_TO_PERFDATA=$(( ( $ADD_MEMORY_PEAK_TO_PERFDATA + 1 ) % 2 ))
|
||||
;;
|
||||
|
||||
\?)
|
||||
usage
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
case "$OUTPUT_EXIT_STATUS" in
|
||||
'0')
|
||||
printf "OK %s" "$OUTPUT_DETAIL_OK"
|
||||
;;
|
||||
'1')
|
||||
printf "WARNING %s" "$OUTPUT_DETAIL_WARNING"
|
||||
;;
|
||||
'2')
|
||||
printf "CRITICAL %s" "$OUTPUT_DETAIL_CRITICAL"
|
||||
;;
|
||||
*)
|
||||
printf "UNKNOWN"
|
||||
;;
|
||||
esac
|
||||
|
||||
# Add the containers' counter to the perfdata
|
||||
OUTPUT_PERFDATA="$( printf "%s\n%s" "$OUTPUT_PERFDATA" "nb_containers=$NB_CHECKED_CONTAINERS" )"
|
||||
|
||||
# We sort the perfdata because of a weird bug in some icinga/pnp4nagios interactions
|
||||
printf "|%s\n" "$( printf "%s" "$OUTPUT_PERFDATA" | sort | grep -v "^$" | tr "\n" " " )"
|
||||
exit $OUTPUT_EXIT_STATUS
|
Loading…
Reference in a new issue