nagios: ajout check_lxc_mem
This commit is contained in:
parent
e4c8cf28b0
commit
5caf403901
1 changed files with 369 additions and 0 deletions
369
nagios/check_lxc_mem.sh
Executable file
369
nagios/check_lxc_mem.sh
Executable file
|
@ -0,0 +1,369 @@
|
||||||
|
#!/bin/sh
|
||||||
|
|
||||||
|
# TODO:
|
||||||
|
# - high dans une métrique dédiée ? peak aussi ?
|
||||||
|
# - -S switch to add swap usage
|
||||||
|
# - -W to use memory.high as warning range
|
||||||
|
|
||||||
|
# Little check for memory usage of LXC containers
|
||||||
|
# GPL v3+ (copyright chl-dev@bugness.org)
|
||||||
|
#
|
||||||
|
# This was writtent with LXC 5 and cgroup 2 in mind.
|
||||||
|
# For older versions, check the excellent
|
||||||
|
# https://www.claudiokuenzler.com/monitoring-plugins/check_lxc.php
|
||||||
|
|
||||||
|
PROGPATH=$( echo $0 | sed -e 's,[\\/][^\\/][^\\/]*$,,' )
|
||||||
|
REVISION="0.1"
|
||||||
|
|
||||||
|
# Stop at the first non-catched error
|
||||||
|
set -e
|
||||||
|
|
||||||
|
# Disable localization to uniformize commands' output
|
||||||
|
LANG="C"
|
||||||
|
|
||||||
|
# Include check_range()
|
||||||
|
# Not needed at the moment
|
||||||
|
#. $PROGPATH/utils.sh
|
||||||
|
STATE_OK=0
|
||||||
|
STATE_WARNING=1
|
||||||
|
STATE_CRITICAL=2
|
||||||
|
STATE_UNKNOWN=3
|
||||||
|
STATE_DEPENDENT=4
|
||||||
|
|
||||||
|
# Default values
|
||||||
|
RANGE_MEMORY_WARNING="20%"
|
||||||
|
RANGE_MEMORY_CRITICAL="10%"
|
||||||
|
RANGE_SWAP_WARNING="20%"
|
||||||
|
RANGE_SWAP_CRITICAL="10%"
|
||||||
|
ADD_MEMORY_PEAK_TO_PERFDATA=0
|
||||||
|
MEMORY_TOTAL_SYSTEM="$( free -b | sed -n '/^Mem:/s/^Mem:[[:space:]]*\([[:digit:]]\+\)[[:space:]].*/\1/p' )"
|
||||||
|
SWAP_TOTAL_SYSTEM="$( free -b | sed -n '/^Swap:/s/^Swap:[[:space:]]*\([[:digit:]]\+\)[[:space:]].*/\1/p' )"
|
||||||
|
USE_MEMORY_HIGH_AS_WARNING=0
|
||||||
|
|
||||||
|
# Initial values
|
||||||
|
NB_CHECKED_CONTAINERS=0
|
||||||
|
|
||||||
|
# Output
|
||||||
|
OUTPUT_EXIT_STATUS=$STATE_OK
|
||||||
|
OUTPUT_DETAIL_OK=""
|
||||||
|
OUTPUT_DETAIL_WARNING=""
|
||||||
|
OUTPUT_DETAIL_CRITICAL=""
|
||||||
|
OUTPUT_PERFDATA=""
|
||||||
|
|
||||||
|
#
|
||||||
|
# Help function
|
||||||
|
#
|
||||||
|
usage() {
|
||||||
|
cat <<EOF
|
||||||
|
Usage :
|
||||||
|
$0 [-w warning-free-memory%] [-c critical-free-memory%] [-W] [-P] -n <container|ALL> -n container ...
|
||||||
|
|
||||||
|
Example :
|
||||||
|
./check_lxc_mem.sh -w 20% -c 10% -n monitoring.example.net
|
||||||
|
|
||||||
|
-P : add a perfdata entry for memory.peak
|
||||||
|
|
||||||
|
We adopt the same behaviour than check_disks so thresholds are matched
|
||||||
|
against "free" space. The example above will trigger a warning if less
|
||||||
|
than 20% of memory.max is available.
|
||||||
|
|
||||||
|
Thresholds can be specified with a '%' or without, in which case the unit
|
||||||
|
is the byte, and with the keyword 'high', instructing to use the memory.high
|
||||||
|
value as a threshold.
|
||||||
|
|
||||||
|
Note: Since the containers are checked against the latest thresholds specified, order
|
||||||
|
of the arguments is important. Ex:
|
||||||
|
./check_lxc_mem -w 20% -n container1 -w 50% -n container2
|
||||||
|
|
||||||
|
Default values:
|
||||||
|
warning-free-memory: $RANGE_MEMORY_WARNING
|
||||||
|
critical-free-memory: $RANGE_MEMORY_CRITICAL
|
||||||
|
EOF
|
||||||
|
}
|
||||||
|
|
||||||
|
check_range_syntax() {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
# check_range 0 "$1" >/dev/null 2>&1
|
||||||
|
# if [ "$?" -eq "2" ]; then
|
||||||
|
# return 1
|
||||||
|
# fi
|
||||||
|
# return 0
|
||||||
|
#}
|
||||||
|
|
||||||
|
#
|
||||||
|
# Remove the '%' suffix (or error if no '%')
|
||||||
|
#
|
||||||
|
get_percent() {
|
||||||
|
if [ "$( echo -n "$1" | tail -c 1 )" = "%" ]; then
|
||||||
|
echo -n "$1" | head -c -1
|
||||||
|
return 0
|
||||||
|
fi
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
|
||||||
|
#
|
||||||
|
# Uniformize thresholds:
|
||||||
|
# if "20%" then calculate
|
||||||
|
# else leave as is
|
||||||
|
# args:
|
||||||
|
# 1: MAXVALUE
|
||||||
|
# 2: THRESHOLD
|
||||||
|
#
|
||||||
|
get_absolute_threshold() {
|
||||||
|
local MAXVALUE="$1"
|
||||||
|
local THRESHOLD="$2"
|
||||||
|
|
||||||
|
if get_percent "$THRESHOLD" >/dev/null; then
|
||||||
|
echo "$(( $MAXVALUE * ( 100 - $( get_percent "$THRESHOLD" ) ) / 100 ))"
|
||||||
|
else
|
||||||
|
echo "$(( $MAXVALUE - $THRESHOLD ))"
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
#
|
||||||
|
# Returns 1 if value is above threshold, 0 if not
|
||||||
|
#
|
||||||
|
# args:
|
||||||
|
# 1: value
|
||||||
|
# 2: maximum
|
||||||
|
# 3: threshold (in percent with a '%' suffix, or plain unit)
|
||||||
|
#
|
||||||
|
check_above_threshold() {
|
||||||
|
local VALUE="$1"
|
||||||
|
local MAXVALUE="$2"
|
||||||
|
local THRESHOLD="$3"
|
||||||
|
|
||||||
|
if [ "$VALUE" -gt "$( get_absolute_threshold "$MAXVALUE" "$THRESHOLD" )" ]; then
|
||||||
|
return 1
|
||||||
|
fi
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
#
|
||||||
|
# The actual check, done in a function to factorize
|
||||||
|
# the code between '-n container1' and '-n ALL' calls
|
||||||
|
#
|
||||||
|
# WARNING: this function modifies global variables.
|
||||||
|
#
|
||||||
|
# args:
|
||||||
|
# 1: container's name
|
||||||
|
check_single_container() {
|
||||||
|
CONTAINER="$1"
|
||||||
|
|
||||||
|
# Get the values for the current container
|
||||||
|
MEMORY_CURRENT=$( lxc-cgroup -n "$CONTAINER" memory.current )
|
||||||
|
MEMORY_HIGH=$( lxc-cgroup -n "$CONTAINER" memory.high )
|
||||||
|
MEMORY_MAX=$( lxc-cgroup -n "$CONTAINER" memory.max )
|
||||||
|
SWAP_CURRENT=$( lxc-cgroup -n "$CONTAINER" memory.swap.current )
|
||||||
|
SWAP_HIGH=$( lxc-cgroup -n "$CONTAINER" memory.swap.high )
|
||||||
|
SWAP_MAX=$( lxc-cgroup -n "$CONTAINER" memory.swap.max )
|
||||||
|
# memory.peak is not available everywhere, so don't consult it carelessly.
|
||||||
|
[ "$ADD_MEMORY_PEAK_TO_PERFDATA" = "1" ] && MEMORY_PEAK=$( lxc-cgroup -n $CONTAINER memory.peak )
|
||||||
|
|
||||||
|
# Replace "max" values
|
||||||
|
[ "$MEMORY_MAX" = "max" ] && MEMORY_MAX="$MEMORY_TOTAL_SYSTEM"
|
||||||
|
[ "$MEMORY_HIGH" = "max" ] && MEMORY_HIGH="$MEMORY_TOTAL_SYSTEM"
|
||||||
|
[ "$SWAP_MAX" = "max" ] && SWAP_MAX="$SWAP_TOTAL_SYSTEM"
|
||||||
|
[ "$SWAP_HIGH" = "max" ] && SWAP_HIGH="$SWAP_TOTAL_SYSTEM"
|
||||||
|
|
||||||
|
# If the ranges parameters are set to 'high', we use
|
||||||
|
# memory.high/memory.swap.high as the threshold's value.
|
||||||
|
if [ "$RANGE_MEMORY_WARNING" = "high" ]; then
|
||||||
|
THRESHOLD_MEMORY_WARNING="$MEMORY_HIGH"
|
||||||
|
else
|
||||||
|
THRESHOLD_MEMORY_WARNING="$( get_absolute_threshold "$MEMORY_MAX" "$RANGE_MEMORY_WARNING" )"
|
||||||
|
fi
|
||||||
|
if [ "$RANGE_MEMORY_CRITICAL" = "high" ]; then
|
||||||
|
THRESHOLD_MEMORY_CRITICAL="$MEMORY_HIGH"
|
||||||
|
else
|
||||||
|
THRESHOLD_MEMORY_CRITICAL="$( get_absolute_threshold "$MEMORY_MAX" "$RANGE_MEMORY_CRITICAL" )"
|
||||||
|
fi
|
||||||
|
if [ "$RANGE_SWAP_WARNING" = "high" ]; then
|
||||||
|
THRESHOLD_SWAP_WARNING="$SWAP_HIGH"
|
||||||
|
else
|
||||||
|
THRESHOLD_SWAP_WARNING="$( get_absolute_threshold "$SWAP_MAX" "$RANGE_SWAP_WARNING" )"
|
||||||
|
fi
|
||||||
|
if [ "$RANGE_SWAP_CRITICAL" = "high" ]; then
|
||||||
|
THRESHOLD_SWAP_CRITICAL="$SWAP_HIGH"
|
||||||
|
else
|
||||||
|
THRESHOLD_SWAP_CRITICAL="$( get_absolute_threshold "$SWAP_MAX" "$RANGE_SWAP_CRITICAL" )"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Fill the perfdata
|
||||||
|
# Remember:
|
||||||
|
# 'label'=value[UOM];[warn];[crit];[min];[max]
|
||||||
|
# label can contain any characters except the equals sign or single quote (')
|
||||||
|
# (except pnp4nagios has trouble with '<>'...)
|
||||||
|
OUTPUT_PERFDATA="$( printf "%s\n'container_%s'=%dB;%d;%d;0;%d" \
|
||||||
|
"$OUTPUT_PERFDATA" \
|
||||||
|
"$( echo $CONTAINER | tr "'=<>" "_" )" \
|
||||||
|
"$MEMORY_CURRENT" \
|
||||||
|
"$THRESHOLD_MEMORY_WARNING" \
|
||||||
|
"$THRESHOLD_MEMORY_CRITICAL" \
|
||||||
|
"$MEMORY_MAX" \
|
||||||
|
)"
|
||||||
|
OUTPUT_PERFDATA="$( printf "%s\n'contswap_%s'=%dB;%d;%d;0;%d" \
|
||||||
|
"$OUTPUT_PERFDATA" \
|
||||||
|
"$( echo $CONTAINER | tr "'=<>" "_" )" \
|
||||||
|
"$SWAP_CURRENT" \
|
||||||
|
"$THRESHOLD_SWAP_WARNING" \
|
||||||
|
"$THRESHOLD_SWAP_CRITICAL" \
|
||||||
|
"$SWAP_MAX" \
|
||||||
|
)"
|
||||||
|
|
||||||
|
# Following the homnymous setting, we add the peak memory consumption to
|
||||||
|
# the perfdata.
|
||||||
|
if [ "$ADD_MEMORY_PEAK_TO_PERFDATA" = "1" ]; then
|
||||||
|
OUTPUT_PERFDATA="$( printf "%s\n'contpeak_%s'=%dB;%d;%d;0;%d" \
|
||||||
|
"$OUTPUT_PERFDATA" \
|
||||||
|
"$( echo $CONTAINER | tr "'=<>" "_" )" \
|
||||||
|
"$MEMORY_PEAK" \
|
||||||
|
"$THRESHOLD_MEMORY_WARNING" \
|
||||||
|
"$THRESHOLD_MEMORY_CRITICAL" \
|
||||||
|
"$MEMORY_MAX" \
|
||||||
|
)"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Is the value above critical or warning threshold ?
|
||||||
|
if [ "$MEMORY_CURRENT" -gt "$THRESHOLD_MEMORY_CRITICAL" ]; then
|
||||||
|
# Critical state
|
||||||
|
OUTPUT_EXIT_STATUS="$STATE_CRITICAL"
|
||||||
|
OUTPUT_DETAIL_CRITICAL="$OUTPUT_DETAIL_CRITICAL container $CONTAINER uses $( echo $MEMORY_CURRENT | pretty_formatter ) over $( echo $THRESHOLD_MEMORY_CRITICAL | pretty_formatter ) (max: $( echo $MEMORY_MAX | pretty_formatter ))"
|
||||||
|
elif [ "$MEMORY_CURRENT" -gt "$THRESHOLD_MEMORY_WARNING" ]; then
|
||||||
|
# Warning state : let's change the exit status (if not already at a upper level)
|
||||||
|
[ "$OUTPUT_EXIT_STATUS" != "$STATE_CRITICAL" ] && OUTPUT_EXIT_STATUS="$STATE_WARNING"
|
||||||
|
OUTPUT_DETAIL_WARNING="$OUTPUT_DETAIL_WARNING container $CONTAINER uses $( echo $MEMORY_CURRENT | pretty_formatter ) over $( echo $THRESHOLD_MEMORY_WARNING | pretty_formatter ) (max: $( echo $MEMORY_MAX | pretty_formatter ))"
|
||||||
|
fi
|
||||||
|
if [ "$SWAP_CURRENT" -gt "$THRESHOLD_SWAP_CRITICAL" ]; then
|
||||||
|
# Critical state
|
||||||
|
OUTPUT_EXIT_STATUS="$STATE_CRITICAL"
|
||||||
|
OUTPUT_DETAIL_CRITICAL="$OUTPUT_DETAIL_CRITICAL container $CONTAINER uses $( echo $SWAP_CURRENT | pretty_formatter ) over $( echo $THRESHOLD_SWAP_CRITICAL | pretty_formatter ) (max: $( echo $SWAP_MAX | pretty_formatter ))"
|
||||||
|
elif [ "$SWAP_CURRENT" -gt "$THRESHOLD_SWAP_WARNING" ]; then
|
||||||
|
# Warning state : let's change the exit status (if not already at a upper level)
|
||||||
|
[ "$OUTPUT_EXIT_STATUS" != "$STATE_CRITICAL" ] && OUTPUT_EXIT_STATUS="$STATE_WARNING"
|
||||||
|
OUTPUT_DETAIL_WARNING="$OUTPUT_DETAIL_WARNING container $CONTAINER uses $( echo $SWAP_CURRENT | pretty_formatter ) over $( echo $THRESHOLD_SWAP_WARNING | pretty_formatter ) (max: $( echo $SWAP_MAX | pretty_formatter ))"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Little counter, 'cause we never know, it might be useful :)
|
||||||
|
NB_CHECKED_CONTAINERS=$(( $NB_CHECKED_CONTAINERS + 1 ))
|
||||||
|
}
|
||||||
|
|
||||||
|
#
|
||||||
|
# Try to get a pretty formatter and
|
||||||
|
# fall back to "cat"
|
||||||
|
#
|
||||||
|
# Usage: echo "1024" | pretty_formatter
|
||||||
|
#
|
||||||
|
pretty_formatter() {
|
||||||
|
if command -v numfmt >/dev/null 2>&1; then
|
||||||
|
numfmt --to si
|
||||||
|
else
|
||||||
|
cat -
|
||||||
|
fi
|
||||||
|
}
|
||||||
|
|
||||||
|
# Some early checks
|
||||||
|
# - The following base commands are required
|
||||||
|
for cmd in head tail sed lxc-ls lxc-cgroup free sort tr; do
|
||||||
|
if ! command -v $cmd 1>/dev/null
|
||||||
|
then echo "UNKNOWN: $cmd not found, please check if command exists and PATH is correct"
|
||||||
|
exit $STATE_UNKNOWN
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
# - Check if memory accounting is enableda
|
||||||
|
if ! grep -E '^memory[[:space:]].*[[:space:]]1$' /proc/cgroups >/dev/null 2>&1; then
|
||||||
|
echo "cgroup is not defined as kernel cmdline parameter (cgroup_enable=memory)"
|
||||||
|
exit $STATE_UNKNOWN
|
||||||
|
fi
|
||||||
|
|
||||||
|
#
|
||||||
|
# Loop on parameters + tests
|
||||||
|
#
|
||||||
|
while getopts hw:c:W:C:n:P f; do
|
||||||
|
case "$f" in
|
||||||
|
'h')
|
||||||
|
usage
|
||||||
|
exit
|
||||||
|
;;
|
||||||
|
|
||||||
|
'w')
|
||||||
|
if check_range_syntax "$OPTARG" >/dev/null; then
|
||||||
|
RANGE_MEMORY_WARNING="$OPTARG"
|
||||||
|
else
|
||||||
|
echo "UNKNOWN: invalid range."
|
||||||
|
exit 3
|
||||||
|
fi
|
||||||
|
;;
|
||||||
|
|
||||||
|
'c')
|
||||||
|
if check_range_syntax "$OPTARG" >/dev/null; then
|
||||||
|
RANGE_MEMORY_CRITICAL="$OPTARG"
|
||||||
|
else
|
||||||
|
echo "UNKNOWN: invalid range."
|
||||||
|
exit 3
|
||||||
|
fi
|
||||||
|
;;
|
||||||
|
|
||||||
|
'W')
|
||||||
|
if check_range_syntax "$OPTARG" >/dev/null; then
|
||||||
|
RANGE_SWAP_WARNING="$OPTARG"
|
||||||
|
else
|
||||||
|
echo "UNKNOWN: invalid range."
|
||||||
|
exit 3
|
||||||
|
fi
|
||||||
|
;;
|
||||||
|
|
||||||
|
'C')
|
||||||
|
if check_range_syntax "$OPTARG" >/dev/null; then
|
||||||
|
RANGE_SWAP_CRITICAL="$OPTARG"
|
||||||
|
else
|
||||||
|
echo "UNKNOWN: invalid range."
|
||||||
|
exit 3
|
||||||
|
fi
|
||||||
|
;;
|
||||||
|
|
||||||
|
'n')
|
||||||
|
# Immediatly launch the check on this/those container(s)
|
||||||
|
if [ "$OPTARG" = "ALL" ]; then
|
||||||
|
for CONTAINER in $( lxc-ls --running ); do
|
||||||
|
check_single_container "$CONTAINER"
|
||||||
|
done
|
||||||
|
else
|
||||||
|
check_single_container "$OPTARG"
|
||||||
|
fi
|
||||||
|
;;
|
||||||
|
|
||||||
|
'P')
|
||||||
|
# Toggle the insertion of memory.peak as a perfdata entry
|
||||||
|
ADD_MEMORY_PEAK_TO_PERFDATA=$(( ( $ADD_MEMORY_PEAK_TO_PERFDATA + 1 ) % 2 ))
|
||||||
|
;;
|
||||||
|
|
||||||
|
\?)
|
||||||
|
usage
|
||||||
|
exit 1
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
done
|
||||||
|
|
||||||
|
case "$OUTPUT_EXIT_STATUS" in
|
||||||
|
'0')
|
||||||
|
printf "OK %s" "$OUTPUT_DETAIL_OK"
|
||||||
|
;;
|
||||||
|
'1')
|
||||||
|
printf "WARNING %s" "$OUTPUT_DETAIL_WARNING"
|
||||||
|
;;
|
||||||
|
'2')
|
||||||
|
printf "CRITICAL %s" "$OUTPUT_DETAIL_CRITICAL"
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
printf "UNKNOWN"
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
|
||||||
|
# Add the containers' counter to the perfdata
|
||||||
|
OUTPUT_PERFDATA="$( printf "%s\n%s" "$OUTPUT_PERFDATA" "nb_containers=$NB_CHECKED_CONTAINERS" )"
|
||||||
|
|
||||||
|
# We sort the perfdata because of a weird bug in some icinga/pnp4nagios interactions
|
||||||
|
printf "|%s\n" "$( printf "%s" "$OUTPUT_PERFDATA" | sort | grep -v "^$" | tr "\n" " " )"
|
||||||
|
exit $OUTPUT_EXIT_STATUS
|
Loading…
Reference in a new issue