1
0
Fork 0
scripts-admin-quickndirty-p.../nagios/check_lxc_mem.sh
2023-10-31 00:44:46 +01:00

369 lines
10 KiB
Bash
Executable file

#!/bin/sh
# TODO:
# - high dans une métrique dédiée ? peak aussi ?
# - -S switch to add swap usage
# - -W to use memory.high as warning range
# Little check for memory usage of LXC containers
# GPL v3+ (copyright chl-dev@bugness.org)
#
# This was writtent with LXC 5 and cgroup 2 in mind.
# For older versions, check the excellent
# https://www.claudiokuenzler.com/monitoring-plugins/check_lxc.php
PROGPATH=$( echo $0 | sed -e 's,[\\/][^\\/][^\\/]*$,,' )
REVISION="0.1"
# Stop at the first non-catched error
set -e
# Disable localization to uniformize commands' output
LANG="C"
# Include check_range()
# Not needed at the moment
#. $PROGPATH/utils.sh
STATE_OK=0
STATE_WARNING=1
STATE_CRITICAL=2
STATE_UNKNOWN=3
STATE_DEPENDENT=4
# Default values
RANGE_MEMORY_WARNING="20%"
RANGE_MEMORY_CRITICAL="10%"
RANGE_SWAP_WARNING="20%"
RANGE_SWAP_CRITICAL="10%"
ADD_MEMORY_PEAK_TO_PERFDATA=0
MEMORY_TOTAL_SYSTEM="$( free -b | sed -n '/^Mem:/s/^Mem:[[:space:]]*\([[:digit:]]\+\)[[:space:]].*/\1/p' )"
SWAP_TOTAL_SYSTEM="$( free -b | sed -n '/^Swap:/s/^Swap:[[:space:]]*\([[:digit:]]\+\)[[:space:]].*/\1/p' )"
USE_MEMORY_HIGH_AS_WARNING=0
# Initial values
NB_CHECKED_CONTAINERS=0
# Output
OUTPUT_EXIT_STATUS=$STATE_OK
OUTPUT_DETAIL_OK=""
OUTPUT_DETAIL_WARNING=""
OUTPUT_DETAIL_CRITICAL=""
OUTPUT_PERFDATA=""
#
# Help function
#
usage() {
cat <<EOF
Usage :
$0 [-w warning-free-memory%] [-c critical-free-memory%] [-W] [-P] -n <container|ALL> -n container ...
Example :
./check_lxc_mem.sh -w 20% -c 10% -n monitoring.example.net
-P : add a perfdata entry for memory.peak
We adopt the same behaviour than check_disks so thresholds are matched
against "free" space. The example above will trigger a warning if less
than 20% of memory.max is available.
Thresholds can be specified with a '%' or without, in which case the unit
is the byte, and with the keyword 'high', instructing to use the memory.high
value as a threshold.
Note: Since the containers are checked against the latest thresholds specified, order
of the arguments is important. Ex:
./check_lxc_mem -w 20% -n container1 -w 50% -n container2
Default values:
warning-free-memory: $RANGE_MEMORY_WARNING
critical-free-memory: $RANGE_MEMORY_CRITICAL
EOF
}
check_range_syntax() {
return 0
}
# check_range 0 "$1" >/dev/null 2>&1
# if [ "$?" -eq "2" ]; then
# return 1
# fi
# return 0
#}
#
# Remove the '%' suffix (or error if no '%')
#
get_percent() {
if [ "$( echo -n "$1" | tail -c 1 )" = "%" ]; then
echo -n "$1" | head -c -1
return 0
fi
return 1
}
#
# Uniformize thresholds:
# if "20%" then calculate
# else leave as is
# args:
# 1: MAXVALUE
# 2: THRESHOLD
#
get_absolute_threshold() {
local MAXVALUE="$1"
local THRESHOLD="$2"
if get_percent "$THRESHOLD" >/dev/null; then
echo "$(( $MAXVALUE * ( 100 - $( get_percent "$THRESHOLD" ) ) / 100 ))"
else
echo "$(( $MAXVALUE - $THRESHOLD ))"
fi
}
#
# Returns 1 if value is above threshold, 0 if not
#
# args:
# 1: value
# 2: maximum
# 3: threshold (in percent with a '%' suffix, or plain unit)
#
check_above_threshold() {
local VALUE="$1"
local MAXVALUE="$2"
local THRESHOLD="$3"
if [ "$VALUE" -gt "$( get_absolute_threshold "$MAXVALUE" "$THRESHOLD" )" ]; then
return 1
fi
return 0
}
#
# The actual check, done in a function to factorize
# the code between '-n container1' and '-n ALL' calls
#
# WARNING: this function modifies global variables.
#
# args:
# 1: container's name
check_single_container() {
CONTAINER="$1"
# Get the values for the current container
MEMORY_CURRENT=$( lxc-cgroup -n "$CONTAINER" memory.current )
MEMORY_HIGH=$( lxc-cgroup -n "$CONTAINER" memory.high )
MEMORY_MAX=$( lxc-cgroup -n "$CONTAINER" memory.max )
SWAP_CURRENT=$( lxc-cgroup -n "$CONTAINER" memory.swap.current )
SWAP_HIGH=$( lxc-cgroup -n "$CONTAINER" memory.swap.high )
SWAP_MAX=$( lxc-cgroup -n "$CONTAINER" memory.swap.max )
# memory.peak is not available everywhere, so don't consult it carelessly.
[ "$ADD_MEMORY_PEAK_TO_PERFDATA" = "1" ] && MEMORY_PEAK=$( lxc-cgroup -n $CONTAINER memory.peak )
# Replace "max" values
[ "$MEMORY_MAX" = "max" ] && MEMORY_MAX="$MEMORY_TOTAL_SYSTEM"
[ "$MEMORY_HIGH" = "max" ] && MEMORY_HIGH="$MEMORY_TOTAL_SYSTEM"
[ "$SWAP_MAX" = "max" ] && SWAP_MAX="$SWAP_TOTAL_SYSTEM"
[ "$SWAP_HIGH" = "max" ] && SWAP_HIGH="$SWAP_TOTAL_SYSTEM"
# If the ranges parameters are set to 'high', we use
# memory.high/memory.swap.high as the threshold's value.
if [ "$RANGE_MEMORY_WARNING" = "high" ]; then
THRESHOLD_MEMORY_WARNING="$MEMORY_HIGH"
else
THRESHOLD_MEMORY_WARNING="$( get_absolute_threshold "$MEMORY_MAX" "$RANGE_MEMORY_WARNING" )"
fi
if [ "$RANGE_MEMORY_CRITICAL" = "high" ]; then
THRESHOLD_MEMORY_CRITICAL="$MEMORY_HIGH"
else
THRESHOLD_MEMORY_CRITICAL="$( get_absolute_threshold "$MEMORY_MAX" "$RANGE_MEMORY_CRITICAL" )"
fi
if [ "$RANGE_SWAP_WARNING" = "high" ]; then
THRESHOLD_SWAP_WARNING="$SWAP_HIGH"
else
THRESHOLD_SWAP_WARNING="$( get_absolute_threshold "$SWAP_MAX" "$RANGE_SWAP_WARNING" )"
fi
if [ "$RANGE_SWAP_CRITICAL" = "high" ]; then
THRESHOLD_SWAP_CRITICAL="$SWAP_HIGH"
else
THRESHOLD_SWAP_CRITICAL="$( get_absolute_threshold "$SWAP_MAX" "$RANGE_SWAP_CRITICAL" )"
fi
# Fill the perfdata
# Remember:
# 'label'=value[UOM];[warn];[crit];[min];[max]
# label can contain any characters except the equals sign or single quote (')
# (except pnp4nagios has trouble with '<>'...)
OUTPUT_PERFDATA="$( printf "%s\n'container_%s'=%dB;%d;%d;0;%d" \
"$OUTPUT_PERFDATA" \
"$( echo $CONTAINER | tr "'=<>" "_" )" \
"$MEMORY_CURRENT" \
"$THRESHOLD_MEMORY_WARNING" \
"$THRESHOLD_MEMORY_CRITICAL" \
"$MEMORY_MAX" \
)"
OUTPUT_PERFDATA="$( printf "%s\n'contswap_%s'=%dB;%d;%d;0;%d" \
"$OUTPUT_PERFDATA" \
"$( echo $CONTAINER | tr "'=<>" "_" )" \
"$SWAP_CURRENT" \
"$THRESHOLD_SWAP_WARNING" \
"$THRESHOLD_SWAP_CRITICAL" \
"$SWAP_MAX" \
)"
# Following the homnymous setting, we add the peak memory consumption to
# the perfdata.
if [ "$ADD_MEMORY_PEAK_TO_PERFDATA" = "1" ]; then
OUTPUT_PERFDATA="$( printf "%s\n'contpeak_%s'=%dB;%d;%d;0;%d" \
"$OUTPUT_PERFDATA" \
"$( echo $CONTAINER | tr "'=<>" "_" )" \
"$MEMORY_PEAK" \
"$THRESHOLD_MEMORY_WARNING" \
"$THRESHOLD_MEMORY_CRITICAL" \
"$MEMORY_MAX" \
)"
fi
# Is the value above critical or warning threshold ?
if [ "$MEMORY_CURRENT" -gt "$THRESHOLD_MEMORY_CRITICAL" ]; then
# Critical state
OUTPUT_EXIT_STATUS="$STATE_CRITICAL"
OUTPUT_DETAIL_CRITICAL="$OUTPUT_DETAIL_CRITICAL container $CONTAINER uses $( echo $MEMORY_CURRENT | pretty_formatter ) over $( echo $THRESHOLD_MEMORY_CRITICAL | pretty_formatter ) (max: $( echo $MEMORY_MAX | pretty_formatter ))"
elif [ "$MEMORY_CURRENT" -gt "$THRESHOLD_MEMORY_WARNING" ]; then
# Warning state : let's change the exit status (if not already at a upper level)
[ "$OUTPUT_EXIT_STATUS" != "$STATE_CRITICAL" ] && OUTPUT_EXIT_STATUS="$STATE_WARNING"
OUTPUT_DETAIL_WARNING="$OUTPUT_DETAIL_WARNING container $CONTAINER uses $( echo $MEMORY_CURRENT | pretty_formatter ) over $( echo $THRESHOLD_MEMORY_WARNING | pretty_formatter ) (max: $( echo $MEMORY_MAX | pretty_formatter ))"
fi
if [ "$SWAP_CURRENT" -gt "$THRESHOLD_SWAP_CRITICAL" ]; then
# Critical state
OUTPUT_EXIT_STATUS="$STATE_CRITICAL"
OUTPUT_DETAIL_CRITICAL="$OUTPUT_DETAIL_CRITICAL container $CONTAINER uses $( echo $SWAP_CURRENT | pretty_formatter ) over $( echo $THRESHOLD_SWAP_CRITICAL | pretty_formatter ) (max: $( echo $SWAP_MAX | pretty_formatter ))"
elif [ "$SWAP_CURRENT" -gt "$THRESHOLD_SWAP_WARNING" ]; then
# Warning state : let's change the exit status (if not already at a upper level)
[ "$OUTPUT_EXIT_STATUS" != "$STATE_CRITICAL" ] && OUTPUT_EXIT_STATUS="$STATE_WARNING"
OUTPUT_DETAIL_WARNING="$OUTPUT_DETAIL_WARNING container $CONTAINER uses $( echo $SWAP_CURRENT | pretty_formatter ) over $( echo $THRESHOLD_SWAP_WARNING | pretty_formatter ) (max: $( echo $SWAP_MAX | pretty_formatter ))"
fi
# Little counter, 'cause we never know, it might be useful :)
NB_CHECKED_CONTAINERS=$(( $NB_CHECKED_CONTAINERS + 1 ))
}
#
# Try to get a pretty formatter and
# fall back to "cat"
#
# Usage: echo "1024" | pretty_formatter
#
pretty_formatter() {
if command -v numfmt >/dev/null 2>&1; then
numfmt --to si
else
cat -
fi
}
# Some early checks
# - The following base commands are required
for cmd in head tail sed lxc-ls lxc-cgroup free sort tr; do
if ! command -v $cmd 1>/dev/null
then echo "UNKNOWN: $cmd not found, please check if command exists and PATH is correct"
exit $STATE_UNKNOWN
fi
done
# - Check if memory accounting is enableda
if ! grep -E '^memory[[:space:]].*[[:space:]]1$' /proc/cgroups >/dev/null 2>&1; then
echo "cgroup is not defined as kernel cmdline parameter (cgroup_enable=memory)"
exit $STATE_UNKNOWN
fi
#
# Loop on parameters + tests
#
while getopts hw:c:W:C:n:P f; do
case "$f" in
'h')
usage
exit
;;
'w')
if check_range_syntax "$OPTARG" >/dev/null; then
RANGE_MEMORY_WARNING="$OPTARG"
else
echo "UNKNOWN: invalid range."
exit 3
fi
;;
'c')
if check_range_syntax "$OPTARG" >/dev/null; then
RANGE_MEMORY_CRITICAL="$OPTARG"
else
echo "UNKNOWN: invalid range."
exit 3
fi
;;
'W')
if check_range_syntax "$OPTARG" >/dev/null; then
RANGE_SWAP_WARNING="$OPTARG"
else
echo "UNKNOWN: invalid range."
exit 3
fi
;;
'C')
if check_range_syntax "$OPTARG" >/dev/null; then
RANGE_SWAP_CRITICAL="$OPTARG"
else
echo "UNKNOWN: invalid range."
exit 3
fi
;;
'n')
# Immediatly launch the check on this/those container(s)
if [ "$OPTARG" = "ALL" ]; then
for CONTAINER in $( lxc-ls --running ); do
check_single_container "$CONTAINER"
done
else
check_single_container "$OPTARG"
fi
;;
'P')
# Toggle the insertion of memory.peak as a perfdata entry
ADD_MEMORY_PEAK_TO_PERFDATA=$(( ( $ADD_MEMORY_PEAK_TO_PERFDATA + 1 ) % 2 ))
;;
\?)
usage
exit 1
;;
esac
done
case "$OUTPUT_EXIT_STATUS" in
'0')
printf "OK %s" "$OUTPUT_DETAIL_OK"
;;
'1')
printf "WARNING %s" "$OUTPUT_DETAIL_WARNING"
;;
'2')
printf "CRITICAL %s" "$OUTPUT_DETAIL_CRITICAL"
;;
*)
printf "UNKNOWN"
;;
esac
# Add the containers' counter to the perfdata
OUTPUT_PERFDATA="$( printf "%s\n%s" "$OUTPUT_PERFDATA" "nb_containers=$NB_CHECKED_CONTAINERS" )"
# We sort the perfdata because of a weird bug in some icinga/pnp4nagios interactions
printf "|%s\n" "$( printf "%s" "$OUTPUT_PERFDATA" | sort | grep -v "^$" | tr "\n" " " )"
exit $OUTPUT_EXIT_STATUS