diff --git a/nagios/check_lxc_mem.sh b/nagios/check_lxc_mem.sh new file mode 100755 index 0000000..7f40b20 --- /dev/null +++ b/nagios/check_lxc_mem.sh @@ -0,0 +1,369 @@ +#!/bin/sh + +# TODO: +# - high dans une métrique dédiée ? peak aussi ? +# - -S switch to add swap usage +# - -W to use memory.high as warning range + +# Little check for memory usage of LXC containers +# GPL v3+ (copyright chl-dev@bugness.org) +# +# This was writtent with LXC 5 and cgroup 2 in mind. +# For older versions, check the excellent +# https://www.claudiokuenzler.com/monitoring-plugins/check_lxc.php + +PROGPATH=$( echo $0 | sed -e 's,[\\/][^\\/][^\\/]*$,,' ) +REVISION="0.1" + +# Stop at the first non-catched error +set -e + +# Disable localization to uniformize commands' output +LANG="C" + +# Include check_range() +# Not needed at the moment +#. $PROGPATH/utils.sh +STATE_OK=0 +STATE_WARNING=1 +STATE_CRITICAL=2 +STATE_UNKNOWN=3 +STATE_DEPENDENT=4 + +# Default values +RANGE_MEMORY_WARNING="20%" +RANGE_MEMORY_CRITICAL="10%" +RANGE_SWAP_WARNING="20%" +RANGE_SWAP_CRITICAL="10%" +ADD_MEMORY_PEAK_TO_PERFDATA=0 +MEMORY_TOTAL_SYSTEM="$( free -b | sed -n '/^Mem:/s/^Mem:[[:space:]]*\([[:digit:]]\+\)[[:space:]].*/\1/p' )" +SWAP_TOTAL_SYSTEM="$( free -b | sed -n '/^Swap:/s/^Swap:[[:space:]]*\([[:digit:]]\+\)[[:space:]].*/\1/p' )" +USE_MEMORY_HIGH_AS_WARNING=0 + +# Initial values +NB_CHECKED_CONTAINERS=0 + +# Output +OUTPUT_EXIT_STATUS=$STATE_OK +OUTPUT_DETAIL_OK="" +OUTPUT_DETAIL_WARNING="" +OUTPUT_DETAIL_CRITICAL="" +OUTPUT_PERFDATA="" + +# +# Help function +# +usage() { + cat < -n container ... + +Example : + ./check_lxc_mem.sh -w 20% -c 10% -n monitoring.example.net + + -P : add a perfdata entry for memory.peak + +We adopt the same behaviour than check_disks so thresholds are matched +against "free" space. The example above will trigger a warning if less +than 20% of memory.max is available. + +Thresholds can be specified with a '%' or without, in which case the unit +is the byte, and with the keyword 'high', instructing to use the memory.high +value as a threshold. + +Note: Since the containers are checked against the latest thresholds specified, order + of the arguments is important. Ex: + ./check_lxc_mem -w 20% -n container1 -w 50% -n container2 + +Default values: + warning-free-memory: $RANGE_MEMORY_WARNING + critical-free-memory: $RANGE_MEMORY_CRITICAL +EOF +} + +check_range_syntax() { + return 0 +} +# check_range 0 "$1" >/dev/null 2>&1 +# if [ "$?" -eq "2" ]; then +# return 1 +# fi +# return 0 +#} + +# +# Remove the '%' suffix (or error if no '%') +# +get_percent() { + if [ "$( echo -n "$1" | tail -c 1 )" = "%" ]; then + echo -n "$1" | head -c -1 + return 0 + fi + return 1 +} + +# +# Uniformize thresholds: +# if "20%" then calculate +# else leave as is +# args: +# 1: MAXVALUE +# 2: THRESHOLD +# +get_absolute_threshold() { + local MAXVALUE="$1" + local THRESHOLD="$2" + + if get_percent "$THRESHOLD" >/dev/null; then + echo "$(( $MAXVALUE * ( 100 - $( get_percent "$THRESHOLD" ) ) / 100 ))" + else + echo "$(( $MAXVALUE - $THRESHOLD ))" + fi +} + +# +# Returns 1 if value is above threshold, 0 if not +# +# args: +# 1: value +# 2: maximum +# 3: threshold (in percent with a '%' suffix, or plain unit) +# +check_above_threshold() { + local VALUE="$1" + local MAXVALUE="$2" + local THRESHOLD="$3" + + if [ "$VALUE" -gt "$( get_absolute_threshold "$MAXVALUE" "$THRESHOLD" )" ]; then + return 1 + fi + return 0 +} + +# +# The actual check, done in a function to factorize +# the code between '-n container1' and '-n ALL' calls +# +# WARNING: this function modifies global variables. +# +# args: +# 1: container's name +check_single_container() { + CONTAINER="$1" + + # Get the values for the current container + MEMORY_CURRENT=$( lxc-cgroup -n "$CONTAINER" memory.current ) + MEMORY_HIGH=$( lxc-cgroup -n "$CONTAINER" memory.high ) + MEMORY_MAX=$( lxc-cgroup -n "$CONTAINER" memory.max ) + SWAP_CURRENT=$( lxc-cgroup -n "$CONTAINER" memory.swap.current ) + SWAP_HIGH=$( lxc-cgroup -n "$CONTAINER" memory.swap.high ) + SWAP_MAX=$( lxc-cgroup -n "$CONTAINER" memory.swap.max ) + # memory.peak is not available everywhere, so don't consult it carelessly. + [ "$ADD_MEMORY_PEAK_TO_PERFDATA" = "1" ] && MEMORY_PEAK=$( lxc-cgroup -n $CONTAINER memory.peak ) + + # Replace "max" values + [ "$MEMORY_MAX" = "max" ] && MEMORY_MAX="$MEMORY_TOTAL_SYSTEM" + [ "$MEMORY_HIGH" = "max" ] && MEMORY_HIGH="$MEMORY_TOTAL_SYSTEM" + [ "$SWAP_MAX" = "max" ] && SWAP_MAX="$SWAP_TOTAL_SYSTEM" + [ "$SWAP_HIGH" = "max" ] && SWAP_HIGH="$SWAP_TOTAL_SYSTEM" + + # If the ranges parameters are set to 'high', we use + # memory.high/memory.swap.high as the threshold's value. + if [ "$RANGE_MEMORY_WARNING" = "high" ]; then + THRESHOLD_MEMORY_WARNING="$MEMORY_HIGH" + else + THRESHOLD_MEMORY_WARNING="$( get_absolute_threshold "$MEMORY_MAX" "$RANGE_MEMORY_WARNING" )" + fi + if [ "$RANGE_MEMORY_CRITICAL" = "high" ]; then + THRESHOLD_MEMORY_CRITICAL="$MEMORY_HIGH" + else + THRESHOLD_MEMORY_CRITICAL="$( get_absolute_threshold "$MEMORY_MAX" "$RANGE_MEMORY_CRITICAL" )" + fi + if [ "$RANGE_SWAP_WARNING" = "high" ]; then + THRESHOLD_SWAP_WARNING="$SWAP_HIGH" + else + THRESHOLD_SWAP_WARNING="$( get_absolute_threshold "$SWAP_MAX" "$RANGE_SWAP_WARNING" )" + fi + if [ "$RANGE_SWAP_CRITICAL" = "high" ]; then + THRESHOLD_SWAP_CRITICAL="$SWAP_HIGH" + else + THRESHOLD_SWAP_CRITICAL="$( get_absolute_threshold "$SWAP_MAX" "$RANGE_SWAP_CRITICAL" )" + fi + + # Fill the perfdata + # Remember: + # 'label'=value[UOM];[warn];[crit];[min];[max] + # label can contain any characters except the equals sign or single quote (') + # (except pnp4nagios has trouble with '<>'...) + OUTPUT_PERFDATA="$( printf "%s\n'container_%s'=%dB;%d;%d;0;%d" \ + "$OUTPUT_PERFDATA" \ + "$( echo $CONTAINER | tr "'=<>" "_" )" \ + "$MEMORY_CURRENT" \ + "$THRESHOLD_MEMORY_WARNING" \ + "$THRESHOLD_MEMORY_CRITICAL" \ + "$MEMORY_MAX" \ + )" + OUTPUT_PERFDATA="$( printf "%s\n'contswap_%s'=%dB;%d;%d;0;%d" \ + "$OUTPUT_PERFDATA" \ + "$( echo $CONTAINER | tr "'=<>" "_" )" \ + "$SWAP_CURRENT" \ + "$THRESHOLD_SWAP_WARNING" \ + "$THRESHOLD_SWAP_CRITICAL" \ + "$SWAP_MAX" \ + )" + + # Following the homnymous setting, we add the peak memory consumption to + # the perfdata. + if [ "$ADD_MEMORY_PEAK_TO_PERFDATA" = "1" ]; then + OUTPUT_PERFDATA="$( printf "%s\n'contpeak_%s'=%dB;%d;%d;0;%d" \ + "$OUTPUT_PERFDATA" \ + "$( echo $CONTAINER | tr "'=<>" "_" )" \ + "$MEMORY_PEAK" \ + "$THRESHOLD_MEMORY_WARNING" \ + "$THRESHOLD_MEMORY_CRITICAL" \ + "$MEMORY_MAX" \ + )" + fi + + # Is the value above critical or warning threshold ? + if [ "$MEMORY_CURRENT" -gt "$THRESHOLD_MEMORY_CRITICAL" ]; then + # Critical state + OUTPUT_EXIT_STATUS="$STATE_CRITICAL" + OUTPUT_DETAIL_CRITICAL="$OUTPUT_DETAIL_CRITICAL container $CONTAINER uses $( echo $MEMORY_CURRENT | pretty_formatter ) over $( echo $THRESHOLD_MEMORY_CRITICAL | pretty_formatter ) (max: $( echo $MEMORY_MAX | pretty_formatter ))" + elif [ "$MEMORY_CURRENT" -gt "$THRESHOLD_MEMORY_WARNING" ]; then + # Warning state : let's change the exit status (if not already at a upper level) + [ "$OUTPUT_EXIT_STATUS" != "$STATE_CRITICAL" ] && OUTPUT_EXIT_STATUS="$STATE_WARNING" + OUTPUT_DETAIL_WARNING="$OUTPUT_DETAIL_WARNING container $CONTAINER uses $( echo $MEMORY_CURRENT | pretty_formatter ) over $( echo $THRESHOLD_MEMORY_WARNING | pretty_formatter ) (max: $( echo $MEMORY_MAX | pretty_formatter ))" + fi + if [ "$SWAP_CURRENT" -gt "$THRESHOLD_SWAP_CRITICAL" ]; then + # Critical state + OUTPUT_EXIT_STATUS="$STATE_CRITICAL" + OUTPUT_DETAIL_CRITICAL="$OUTPUT_DETAIL_CRITICAL container $CONTAINER uses $( echo $SWAP_CURRENT | pretty_formatter ) over $( echo $THRESHOLD_SWAP_CRITICAL | pretty_formatter ) (max: $( echo $SWAP_MAX | pretty_formatter ))" + elif [ "$SWAP_CURRENT" -gt "$THRESHOLD_SWAP_WARNING" ]; then + # Warning state : let's change the exit status (if not already at a upper level) + [ "$OUTPUT_EXIT_STATUS" != "$STATE_CRITICAL" ] && OUTPUT_EXIT_STATUS="$STATE_WARNING" + OUTPUT_DETAIL_WARNING="$OUTPUT_DETAIL_WARNING container $CONTAINER uses $( echo $SWAP_CURRENT | pretty_formatter ) over $( echo $THRESHOLD_SWAP_WARNING | pretty_formatter ) (max: $( echo $SWAP_MAX | pretty_formatter ))" + fi + + # Little counter, 'cause we never know, it might be useful :) + NB_CHECKED_CONTAINERS=$(( $NB_CHECKED_CONTAINERS + 1 )) +} + +# +# Try to get a pretty formatter and +# fall back to "cat" +# +# Usage: echo "1024" | pretty_formatter +# +pretty_formatter() { + if command -v numfmt >/dev/null 2>&1; then + numfmt --to si + else + cat - + fi +} + +# Some early checks +# - The following base commands are required +for cmd in head tail sed lxc-ls lxc-cgroup free sort tr; do + if ! command -v $cmd 1>/dev/null + then echo "UNKNOWN: $cmd not found, please check if command exists and PATH is correct" + exit $STATE_UNKNOWN + fi +done +# - Check if memory accounting is enableda +if ! grep -E '^memory[[:space:]].*[[:space:]]1$' /proc/cgroups >/dev/null 2>&1; then + echo "cgroup is not defined as kernel cmdline parameter (cgroup_enable=memory)" + exit $STATE_UNKNOWN +fi + +# +# Loop on parameters + tests +# +while getopts hw:c:W:C:n:P f; do + case "$f" in + 'h') + usage + exit + ;; + + 'w') + if check_range_syntax "$OPTARG" >/dev/null; then + RANGE_MEMORY_WARNING="$OPTARG" + else + echo "UNKNOWN: invalid range." + exit 3 + fi + ;; + + 'c') + if check_range_syntax "$OPTARG" >/dev/null; then + RANGE_MEMORY_CRITICAL="$OPTARG" + else + echo "UNKNOWN: invalid range." + exit 3 + fi + ;; + + 'W') + if check_range_syntax "$OPTARG" >/dev/null; then + RANGE_SWAP_WARNING="$OPTARG" + else + echo "UNKNOWN: invalid range." + exit 3 + fi + ;; + + 'C') + if check_range_syntax "$OPTARG" >/dev/null; then + RANGE_SWAP_CRITICAL="$OPTARG" + else + echo "UNKNOWN: invalid range." + exit 3 + fi + ;; + + 'n') + # Immediatly launch the check on this/those container(s) + if [ "$OPTARG" = "ALL" ]; then + for CONTAINER in $( lxc-ls --running ); do + check_single_container "$CONTAINER" + done + else + check_single_container "$OPTARG" + fi + ;; + + 'P') + # Toggle the insertion of memory.peak as a perfdata entry + ADD_MEMORY_PEAK_TO_PERFDATA=$(( ( $ADD_MEMORY_PEAK_TO_PERFDATA + 1 ) % 2 )) + ;; + + \?) + usage + exit 1 + ;; + esac +done + +case "$OUTPUT_EXIT_STATUS" in + '0') + printf "OK %s" "$OUTPUT_DETAIL_OK" + ;; + '1') + printf "WARNING %s" "$OUTPUT_DETAIL_WARNING" + ;; + '2') + printf "CRITICAL %s" "$OUTPUT_DETAIL_CRITICAL" + ;; + *) + printf "UNKNOWN" + ;; +esac + +# Add the containers' counter to the perfdata +OUTPUT_PERFDATA="$( printf "%s\n%s" "$OUTPUT_PERFDATA" "nb_containers=$NB_CHECKED_CONTAINERS" )" + +# We sort the perfdata because of a weird bug in some icinga/pnp4nagios interactions +printf "|%s\n" "$( printf "%s" "$OUTPUT_PERFDATA" | sort | grep -v "^$" | tr "\n" " " )" +exit $OUTPUT_EXIT_STATUS