check_btrfs: check for errors/corruption on devices
This commit is contained in:
parent
05fcddb210
commit
76668f21b4
1 changed files with 131 additions and 0 deletions
131
nagios/check_btrfs.sh
Executable file
131
nagios/check_btrfs.sh
Executable file
|
@ -0,0 +1,131 @@
|
||||||
|
#!/bin/sh
|
||||||
|
|
||||||
|
# Little check for device errors on Btrfs volumes
|
||||||
|
#
|
||||||
|
# GPL v3+ (copyright chl-dev@bugness.org)
|
||||||
|
|
||||||
|
PROGPATH=$( echo $0 | sed -e 's,[\\/][^\\/][^\\/]*$,,' )
|
||||||
|
REVISION="0.1"
|
||||||
|
|
||||||
|
# Stop at the first non-catched error
|
||||||
|
set -e
|
||||||
|
|
||||||
|
# Include check_range()
|
||||||
|
# Not needed at the moment
|
||||||
|
#. $PROGPATH/utils.sh
|
||||||
|
STATE_OK=0
|
||||||
|
STATE_WARNING=1
|
||||||
|
STATE_CRITICAL=2
|
||||||
|
STATE_UNKNOWN=3
|
||||||
|
STATE_DEPENDENT=4
|
||||||
|
|
||||||
|
# Default values
|
||||||
|
WARNING_STATS=0
|
||||||
|
CRITICAL_STATS=0
|
||||||
|
|
||||||
|
# Output
|
||||||
|
OUTPUT_EXIT_STATUS=$STATE_OK
|
||||||
|
OUTPUT_DETAIL_OK=""
|
||||||
|
OUTPUT_DETAIL_WARNING=""
|
||||||
|
OUTPUT_DETAIL_CRITICAL=""
|
||||||
|
OUTPUT_PERFDATA=""
|
||||||
|
NB_DEVICES=0
|
||||||
|
|
||||||
|
#
|
||||||
|
# Help function
|
||||||
|
#
|
||||||
|
usage() {
|
||||||
|
cat <<EOF
|
||||||
|
Usage :
|
||||||
|
$0 -V volume -V...
|
||||||
|
|
||||||
|
Example :
|
||||||
|
./check_btrfs.sh -V /home
|
||||||
|
EOF
|
||||||
|
}
|
||||||
|
|
||||||
|
check_range_syntax() {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
# check_range 0 "$1" >/dev/null 2>&1
|
||||||
|
# if [ "$?" -eq "2" ]; then
|
||||||
|
# return 1
|
||||||
|
|
||||||
|
# Some early checks
|
||||||
|
if ! which btrfs 2>&1 >/dev/null; then
|
||||||
|
echo "UNKNOWN 'btrfs' command not found."
|
||||||
|
exit $STATE_UNKNOWN
|
||||||
|
fi
|
||||||
|
|
||||||
|
#
|
||||||
|
# Loop on parameters + tests
|
||||||
|
#
|
||||||
|
while getopts hw:c:V: f; do
|
||||||
|
case "$f" in
|
||||||
|
'h')
|
||||||
|
usage
|
||||||
|
exit
|
||||||
|
;;
|
||||||
|
|
||||||
|
'V')
|
||||||
|
VOLUME="$OPTARG"
|
||||||
|
|
||||||
|
# Quickly check if we have enough permission to launch btrfs commands
|
||||||
|
if ! btrfs device stats "$VOLUME" >/dev/null 2>&1 ; then
|
||||||
|
echo "UNKNOWN: unable to launch 'btrfs device stats $VOLUME' command (permissions ?)."
|
||||||
|
exit $STATE_UNKNOWN
|
||||||
|
fi
|
||||||
|
|
||||||
|
# For information, add the number of devices to the perfdata
|
||||||
|
NB_DEVICES="$(( $NB_DEVICES + $( btrfs fi show --raw "$VOLUME" | sed -n 's/^[[:space:]]*Total devices \([0-9]\+\) .*/\1/p' ) ))"
|
||||||
|
|
||||||
|
# Check the device stats
|
||||||
|
# (the kinda weird <<EOF at the end is to avoid entering a sub-shell, so we can access
|
||||||
|
# our precious vars' content outside of the loop, even with prosix-strict dash)
|
||||||
|
while read ITEM VALUE; do
|
||||||
|
# Add the value to the perfdata
|
||||||
|
OUTPUT_PERFDATA="$( printf "%s\n'%s'=%d;%d;%d;0;" "$OUTPUT_PERFDATA" "$ITEM" "$VALUE" "$WARNING_STATS" "$CRITICAL_STATS" )"
|
||||||
|
|
||||||
|
# Check if the value is within ok/warn/critical ranges
|
||||||
|
if [ "$VALUE" -gt "$CRITICAL_STATS" ]; then
|
||||||
|
OUTPUT_EXIT_STATUS=$STATE_CRITICAL
|
||||||
|
OUTPUT_DETAIL_CRITICAL="$OUTPUT_DETAIL_CRITICAL $ITEM:$VALUE"
|
||||||
|
elif [ "$VALUE" -gt "$WARNING_STATS" ] && [ "$OUTPUT_EXIT_STATUS" != "$STATE_CRITICAL" ]; then
|
||||||
|
OUTPUT_EXIT_STATUS=$STATE_WARNING
|
||||||
|
OUTPUT_DETAIL_WARNING="$OUTPUT_DETAIL_WARNING $ITEM:$VALUE"
|
||||||
|
fi
|
||||||
|
|
||||||
|
done <<EOF
|
||||||
|
$( btrfs device stats "$VOLUME" )
|
||||||
|
EOF
|
||||||
|
;;
|
||||||
|
|
||||||
|
\?)
|
||||||
|
usage
|
||||||
|
exit 1
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
done
|
||||||
|
|
||||||
|
# Insert nb devices data into output
|
||||||
|
OUTPUT_PERFDATA="$( printf "%s\n%s" "$OUTPUT_PERFDATA" "'nb_devices_total'=$NB_DEVICES;;;1;" )"
|
||||||
|
OUTPUT_DETAIL_OK="$OUTPUT_DETAIL_OK - $NB_DEVICES $( [ "$NB_DEVICES" -eq 1 ] && echo "device" || echo "devices" )"
|
||||||
|
|
||||||
|
case "$OUTPUT_EXIT_STATUS" in
|
||||||
|
'0')
|
||||||
|
printf "OK%s" "$OUTPUT_DETAIL_OK"
|
||||||
|
;;
|
||||||
|
'1')
|
||||||
|
printf "WARNING%s" "$OUTPUT_DETAIL_WARNING"
|
||||||
|
;;
|
||||||
|
'2')
|
||||||
|
printf "CRITICAL%s" "$OUTPUT_DETAIL_CRITICAL"
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
printf "UNKNOWN"
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
|
||||||
|
# We sort the perfdata because of a weird bug in some icinga/pnp4nagios interactions
|
||||||
|
printf "|%s\n" "$( printf "%s" "$OUTPUT_PERFDATA" | sort | grep -v "^$" | tr "\n" " " )"
|
||||||
|
exit $OUTPUT_EXIT_STATUS
|
Loading…
Reference in a new issue