1
0
Fork 0

check_btrfs: check for errors/corruption on devices

This commit is contained in:
Chl 2023-10-02 15:54:37 +02:00
parent 05fcddb210
commit 76668f21b4

131
nagios/check_btrfs.sh Executable file
View file

@ -0,0 +1,131 @@
#!/bin/sh
# Little check for device errors on Btrfs volumes
#
# GPL v3+ (copyright chl-dev@bugness.org)
PROGPATH=$( echo $0 | sed -e 's,[\\/][^\\/][^\\/]*$,,' )
REVISION="0.1"
# Stop at the first non-catched error
set -e
# Include check_range()
# Not needed at the moment
#. $PROGPATH/utils.sh
STATE_OK=0
STATE_WARNING=1
STATE_CRITICAL=2
STATE_UNKNOWN=3
STATE_DEPENDENT=4
# Default values
WARNING_STATS=0
CRITICAL_STATS=0
# Output
OUTPUT_EXIT_STATUS=$STATE_OK
OUTPUT_DETAIL_OK=""
OUTPUT_DETAIL_WARNING=""
OUTPUT_DETAIL_CRITICAL=""
OUTPUT_PERFDATA=""
NB_DEVICES=0
#
# Help function
#
usage() {
cat <<EOF
Usage :
$0 -V volume -V...
Example :
./check_btrfs.sh -V /home
EOF
}
check_range_syntax() {
return 0
}
# check_range 0 "$1" >/dev/null 2>&1
# if [ "$?" -eq "2" ]; then
# return 1
# Some early checks
if ! which btrfs 2>&1 >/dev/null; then
echo "UNKNOWN 'btrfs' command not found."
exit $STATE_UNKNOWN
fi
#
# Loop on parameters + tests
#
while getopts hw:c:V: f; do
case "$f" in
'h')
usage
exit
;;
'V')
VOLUME="$OPTARG"
# Quickly check if we have enough permission to launch btrfs commands
if ! btrfs device stats "$VOLUME" >/dev/null 2>&1 ; then
echo "UNKNOWN: unable to launch 'btrfs device stats $VOLUME' command (permissions ?)."
exit $STATE_UNKNOWN
fi
# For information, add the number of devices to the perfdata
NB_DEVICES="$(( $NB_DEVICES + $( btrfs fi show --raw "$VOLUME" | sed -n 's/^[[:space:]]*Total devices \([0-9]\+\) .*/\1/p' ) ))"
# Check the device stats
# (the kinda weird <<EOF at the end is to avoid entering a sub-shell, so we can access
# our precious vars' content outside of the loop, even with prosix-strict dash)
while read ITEM VALUE; do
# Add the value to the perfdata
OUTPUT_PERFDATA="$( printf "%s\n'%s'=%d;%d;%d;0;" "$OUTPUT_PERFDATA" "$ITEM" "$VALUE" "$WARNING_STATS" "$CRITICAL_STATS" )"
# Check if the value is within ok/warn/critical ranges
if [ "$VALUE" -gt "$CRITICAL_STATS" ]; then
OUTPUT_EXIT_STATUS=$STATE_CRITICAL
OUTPUT_DETAIL_CRITICAL="$OUTPUT_DETAIL_CRITICAL $ITEM:$VALUE"
elif [ "$VALUE" -gt "$WARNING_STATS" ] && [ "$OUTPUT_EXIT_STATUS" != "$STATE_CRITICAL" ]; then
OUTPUT_EXIT_STATUS=$STATE_WARNING
OUTPUT_DETAIL_WARNING="$OUTPUT_DETAIL_WARNING $ITEM:$VALUE"
fi
done <<EOF
$( btrfs device stats "$VOLUME" )
EOF
;;
\?)
usage
exit 1
;;
esac
done
# Insert nb devices data into output
OUTPUT_PERFDATA="$( printf "%s\n%s" "$OUTPUT_PERFDATA" "'nb_devices_total'=$NB_DEVICES;;;1;" )"
OUTPUT_DETAIL_OK="$OUTPUT_DETAIL_OK - $NB_DEVICES $( [ "$NB_DEVICES" -eq 1 ] && echo "device" || echo "devices" )"
case "$OUTPUT_EXIT_STATUS" in
'0')
printf "OK%s" "$OUTPUT_DETAIL_OK"
;;
'1')
printf "WARNING%s" "$OUTPUT_DETAIL_WARNING"
;;
'2')
printf "CRITICAL%s" "$OUTPUT_DETAIL_CRITICAL"
;;
*)
printf "UNKNOWN"
;;
esac
# We sort the perfdata because of a weird bug in some icinga/pnp4nagios interactions
printf "|%s\n" "$( printf "%s" "$OUTPUT_PERFDATA" | sort | grep -v "^$" | tr "\n" " " )"
exit $OUTPUT_EXIT_STATUS