1
0
Fork 0

nagios: adding check_btrfs_quota

This commit is contained in:
Chl 2021-08-29 15:27:33 +02:00
parent 6363625979
commit 737f61e844

244
nagios/check_btrfs_quota.sh Executable file
View file

@ -0,0 +1,244 @@
#!/bin/sh
# Little check for quota usage on Btrfs subvolumes
# GPL v3+ (copyright chl-dev@bugness.org)
PROGPATH=$( echo $0 | sed -e 's,[\\/][^\\/][^\\/]*$,,' )
REVISION="0.1"
# Stop at the first non-catched error
set -e
# Include check_range()
# Not needed at the moment
#. $PROGPATH/utils.sh
STATE_OK=0
STATE_WARNING=1
STATE_CRITICAL=2
STATE_UNKNOWN=3
STATE_DEPENDENT=4
# Default values
RANGE_WARNING="20%"
RANGE_CRITICAL="10%"
# Output
OUTPUT_EXIT_STATUS=$STATE_OK
OUTPUT_DETAIL_OK=""
OUTPUT_DETAIL_WARNING=""
OUTPUT_DETAIL_CRITICAL=""
OUTPUT_PERFDATA=""
#
# Help function
#
usage() {
cat <<EOF
Usage :
$0 [-w warning-free-space%] [-c critical-free-space%] -V volume -V...
Example :
./check_btrfs_quota -w 20% -c 10% -V /home
We adopt the same behaviour than check_disks so thresholds are matched
against "free" space. The example above will trigger a warning if less
than 20% of maxrfer or maxecl is available.
Note: Since the volumes are checked against the latest thresholds specified, order
of the arguments is important. Ex:
./check_btrfs_quota -w 20% -V /home -w 50% -V /tmp
Default values:
warning-free-space: $RANGE_WARNING
critical-free-space: $RANGE_CRITICAL
EOF
}
check_range_syntax() {
return 0
}
# check_range 0 "$1" >/dev/null 2>&1
# if [ "$?" -eq "2" ]; then
# return 1
# fi
# return 0
#}
#
# Remove the '%' suffix (or error if no '%')
#
get_percent() {
if [ "$( echo -n "$1" | tail -c 1 )" = "%" ]; then
echo -n "$1" | head -c -1
return 0
fi
return 1
}
#
# Returns 0 if freespace is above threshold, 1 if not
#
# args:
# - "free" space value
# - quota (aka. maximum space possible)
# - threshold (in percent with a '%' suffix, or plain unit)
#
check_freespace_is_above_threshold() {
local VALUE="$1"
local MAXVALUE="$2"
local THRESHOLD="$3"
if get_percent "$THRESHOLD" >/dev/null; then
VALUE="$(( $VALUE * 100 / $MAXVALUE ))"
THRESHOLD="$( get_percent "$THRESHOLD" )"
fi
if [ "$VALUE" -le "$THRESHOLD" ]; then
return 1
fi
return 0
}
#
# Factorize output in the form of:
# /home:0/260 (refr:10M(50%) excl:5M(20%)
# args:
# 1: VOLUME
# 2: QGROUPID
# 3: REFR
# 4: EXCL
# 5: MAXRFER
# 6: MAXEXCL
format_output_item() {
local LOCAL_OUTPUT=""
if [ "$5" != "none" ]; then
LOCAL_OUTPUT="$( printf "rfer:%s(%d%%)" \
"$( numfmt --to=iec $(( $5 - $3 )) )" \
"$(( 100 * ($5 - $3) / $5 ))" \
)"
fi
if [ "$6" != "none" ]; then
[ -n "$LOCAL_OUTPUT" ] && LOCAL_OUTPUT="$LOCAL_OUTPUT "
LOCAL_OUTPUT="$( printf "excl:%s(%d%%)" \
"$( numfmt --to=iec $(( $6 - $4 )) )" \
"$(( 100 * ($6 - $4) / $6 ))" \
)"
fi
if [ -n "$LOCAL_OUTPUT" ]; then
printf "%s:%s (%s)" "$1" "$2" "$LOCAL_OUTPUT"
fi
}
# Some early checks
if ! which btrfs 2>&1 >/dev/null; then
echo "UNKNOWN 'btrfs' command not found."
exit $STATE_UNKNOWN
fi
#
# Loop on parameters + tests
#
while getopts hw:c:V: f; do
case "$f" in
'h')
usage
exit
;;
'w')
if check_range_syntax "$OPTARG" >/dev/null; then
RANGE_WARNING="$OPTARG"
else
echo "UNKNOWN: invalid range."
exit 3
fi
;;
'c')
if check_range_syntax "$OPTARG" >/dev/null; then
RANGE_CRITICAL="$OPTARG"
else
echo "UNKNOWN: invalid range."
exit 3
fi
;;
'V')
VOLUME="$OPTARG"
# Quickly check if we have enough permission to launch btrfs commands
if ! btrfs qgroup show "$VOLUME" >/dev/null 2>&1 ; then
echo "UNKNOWN: unable to launch 'btrfs qgroup show $VOLUME' command."
exit $STATE_UNKNOWN
fi
# Loop on each qgroup
# (the kinda weird <<EOF at the end is to avoid entering a sub-shell, so we can keep
# our precious vars even with prosix-strict dash)
while read QGROUPID RFER EXCL MAXRFER MAXEXCL; do
LOOP_OUTPUT_STATUS=$STATE_OK
if [ "$MAXRFER" != "none" ]; then
FREERFER="$(( $MAXRFER - $RFER ))"
if ! check_freespace_is_above_threshold "$FREERFER" "$MAXRFER" "$RANGE_CRITICAL"; then
OUTPUT_EXIT_STATUS=$STATE_CRITICAL
LOOP_OUTPUT_STATUS=$STATE_CRITICAL
elif ! check_freespace_is_above_threshold "$FREERFER" "$MAXRFER" "$RANGE_WARNING"; then
[ "$OUTPUT_EXIT_STATUS" = "$STATE_OK" ] && OUTPUT_EXIT_STATUS=$STATE_WARNING
LOOP_OUTPUT_STATUS=$STATE_WARNING
fi
fi
if [ "$MAXEXCL" != "none" ]; then
FREEEXCL="$(( $MAXEXCL - $EXCL ))"
if ! check_freespace_is_above_threshold "$FREEEXCL" "$MAXEXCL" "$RANGE_CRITICAL"; then
OUTPUT_EXIT_STATUS=$STATE_CRITICAL
LOOP_OUTPUT_STATUS=$STATE_CRITICAL
elif ! check_freespace_is_above_threshold "$FREEEXCL" "$MAXEXCL" "$RANGE_WARNING"; then
[ "$OUTPUT_EXIT_STATUS" = "$STATE_OK" ] && OUTPUT_EXIT_STATUS=$STATE_WARNING
[ "$LOOP_OUTPUT_STATUS" = "$STATE_OK" ] && LOOP_OUTPUT_STATUS=$STATE_WARNING
fi
fi
# Depending on the alerts raised during this loop, we store
# the output in ok/warning/critical. This way, in case of critical
# alert, we don't list all the ok data.
TMP="$( format_output_item "$VOLUME" "$QGROUPID" "$RFER" "$EXCL" "$MAXRFER" "$MAXEXCL" )"
case "$LOOP_OUTPUT_STATUS" in
"$STATE_OK")
OUTPUT_DETAIL_OK="$OUTPUT_DETAIL_OK $TMP" ;;
"$STATE_WARNING")
OUTPUT_DETAIL_WARNING="$OUTPUT_DETAIL_WARNING $TMP" ;;
"$STATE_CRITICAL")
OUTPUT_DETAIL_CRITICAL="$OUTPUT_DETAIL_CRITICAL $TMP" ;;
esac
OUTPUT_PERFDATA="$( printf "%s\n%s\n%s" "$OUTPUT_PERFDATA" "$VOLUME:$QGROUPID:rfer=$RFER" "$VOLUME:$QGROUPID:excl=$EXCL" )"
done <<EOF
$( btrfs qgroup show -re --raw "$VOLUME" | tail -n +3 )
EOF
;;
\?)
usage
exit 1
;;
esac
done
case "$OUTPUT_EXIT_STATUS" in
'0')
printf "OK %s" "$OUTPUT_DETAIL_OK"
;;
'1')
printf "WARNING %s" "$OUTPUT_DETAIL_WARNING"
;;
'2')
printf "CRITICAL %s" "$OUTPUT_DETAIL_CRITICAL"
;;
*)
printf "UNKNOWN"
;;
esac
# We sort the perfdata because of a weird bug in some icinga/pnp4nagios interactions
printf "|%s\n" "$( printf "%s" "$OUTPUT_PERFDATA" | sort | grep -v "^$" | tr "\n" " " )"
exit $OUTPUT_EXIT_STATUS