check_btrfs: check for errors/corruption on devices
This commit is contained in:
parent
05fcddb210
commit
76668f21b4
1 changed files with 131 additions and 0 deletions
131
nagios/check_btrfs.sh
Executable file
131
nagios/check_btrfs.sh
Executable file
|
@ -0,0 +1,131 @@
|
|||
#!/bin/sh
|
||||
|
||||
# Little check for device errors on Btrfs volumes
|
||||
#
|
||||
# GPL v3+ (copyright chl-dev@bugness.org)
|
||||
|
||||
PROGPATH=$( echo $0 | sed -e 's,[\\/][^\\/][^\\/]*$,,' )
|
||||
REVISION="0.1"
|
||||
|
||||
# Stop at the first non-catched error
|
||||
set -e
|
||||
|
||||
# Include check_range()
|
||||
# Not needed at the moment
|
||||
#. $PROGPATH/utils.sh
|
||||
STATE_OK=0
|
||||
STATE_WARNING=1
|
||||
STATE_CRITICAL=2
|
||||
STATE_UNKNOWN=3
|
||||
STATE_DEPENDENT=4
|
||||
|
||||
# Default values
|
||||
WARNING_STATS=0
|
||||
CRITICAL_STATS=0
|
||||
|
||||
# Output
|
||||
OUTPUT_EXIT_STATUS=$STATE_OK
|
||||
OUTPUT_DETAIL_OK=""
|
||||
OUTPUT_DETAIL_WARNING=""
|
||||
OUTPUT_DETAIL_CRITICAL=""
|
||||
OUTPUT_PERFDATA=""
|
||||
NB_DEVICES=0
|
||||
|
||||
#
|
||||
# Help function
|
||||
#
|
||||
usage() {
|
||||
cat <<EOF
|
||||
Usage :
|
||||
$0 -V volume -V...
|
||||
|
||||
Example :
|
||||
./check_btrfs.sh -V /home
|
||||
EOF
|
||||
}
|
||||
|
||||
check_range_syntax() {
|
||||
return 0
|
||||
}
|
||||
# check_range 0 "$1" >/dev/null 2>&1
|
||||
# if [ "$?" -eq "2" ]; then
|
||||
# return 1
|
||||
|
||||
# Some early checks
|
||||
if ! which btrfs 2>&1 >/dev/null; then
|
||||
echo "UNKNOWN 'btrfs' command not found."
|
||||
exit $STATE_UNKNOWN
|
||||
fi
|
||||
|
||||
#
|
||||
# Loop on parameters + tests
|
||||
#
|
||||
while getopts hw:c:V: f; do
|
||||
case "$f" in
|
||||
'h')
|
||||
usage
|
||||
exit
|
||||
;;
|
||||
|
||||
'V')
|
||||
VOLUME="$OPTARG"
|
||||
|
||||
# Quickly check if we have enough permission to launch btrfs commands
|
||||
if ! btrfs device stats "$VOLUME" >/dev/null 2>&1 ; then
|
||||
echo "UNKNOWN: unable to launch 'btrfs device stats $VOLUME' command (permissions ?)."
|
||||
exit $STATE_UNKNOWN
|
||||
fi
|
||||
|
||||
# For information, add the number of devices to the perfdata
|
||||
NB_DEVICES="$(( $NB_DEVICES + $( btrfs fi show --raw "$VOLUME" | sed -n 's/^[[:space:]]*Total devices \([0-9]\+\) .*/\1/p' ) ))"
|
||||
|
||||
# Check the device stats
|
||||
# (the kinda weird <<EOF at the end is to avoid entering a sub-shell, so we can access
|
||||
# our precious vars' content outside of the loop, even with prosix-strict dash)
|
||||
while read ITEM VALUE; do
|
||||
# Add the value to the perfdata
|
||||
OUTPUT_PERFDATA="$( printf "%s\n'%s'=%d;%d;%d;0;" "$OUTPUT_PERFDATA" "$ITEM" "$VALUE" "$WARNING_STATS" "$CRITICAL_STATS" )"
|
||||
|
||||
# Check if the value is within ok/warn/critical ranges
|
||||
if [ "$VALUE" -gt "$CRITICAL_STATS" ]; then
|
||||
OUTPUT_EXIT_STATUS=$STATE_CRITICAL
|
||||
OUTPUT_DETAIL_CRITICAL="$OUTPUT_DETAIL_CRITICAL $ITEM:$VALUE"
|
||||
elif [ "$VALUE" -gt "$WARNING_STATS" ] && [ "$OUTPUT_EXIT_STATUS" != "$STATE_CRITICAL" ]; then
|
||||
OUTPUT_EXIT_STATUS=$STATE_WARNING
|
||||
OUTPUT_DETAIL_WARNING="$OUTPUT_DETAIL_WARNING $ITEM:$VALUE"
|
||||
fi
|
||||
|
||||
done <<EOF
|
||||
$( btrfs device stats "$VOLUME" )
|
||||
EOF
|
||||
;;
|
||||
|
||||
\?)
|
||||
usage
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
# Insert nb devices data into output
|
||||
OUTPUT_PERFDATA="$( printf "%s\n%s" "$OUTPUT_PERFDATA" "'nb_devices_total'=$NB_DEVICES;;;1;" )"
|
||||
OUTPUT_DETAIL_OK="$OUTPUT_DETAIL_OK - $NB_DEVICES $( [ "$NB_DEVICES" -eq 1 ] && echo "device" || echo "devices" )"
|
||||
|
||||
case "$OUTPUT_EXIT_STATUS" in
|
||||
'0')
|
||||
printf "OK%s" "$OUTPUT_DETAIL_OK"
|
||||
;;
|
||||
'1')
|
||||
printf "WARNING%s" "$OUTPUT_DETAIL_WARNING"
|
||||
;;
|
||||
'2')
|
||||
printf "CRITICAL%s" "$OUTPUT_DETAIL_CRITICAL"
|
||||
;;
|
||||
*)
|
||||
printf "UNKNOWN"
|
||||
;;
|
||||
esac
|
||||
|
||||
# We sort the perfdata because of a weird bug in some icinga/pnp4nagios interactions
|
||||
printf "|%s\n" "$( printf "%s" "$OUTPUT_PERFDATA" | sort | grep -v "^$" | tr "\n" " " )"
|
||||
exit $OUTPUT_EXIT_STATUS
|
Loading…
Reference in a new issue