nagios: adding check_diskstats.sh
This commit is contained in:
parent
e48cc16b01
commit
899c8e1efe
2 changed files with 156 additions and 0 deletions
155
nagios/check_diskstats.sh
Executable file
155
nagios/check_diskstats.sh
Executable file
|
@ -0,0 +1,155 @@
|
||||||
|
#!/bin/sh
|
||||||
|
|
||||||
|
# Little Nagios/Icinga/Naemon probe for Linux disks' I/O via /proc/diskstats
|
||||||
|
#
|
||||||
|
# Based on https://www.kernel.org/doc/html/latest/admin-guide/iostats.html
|
||||||
|
#
|
||||||
|
# Published under Unlicense <http://unlicense.org/> (similar to public domain)
|
||||||
|
|
||||||
|
PROGPATH=$( echo $0 | sed -e 's,[\\/][^\\/][^\\/]*$,,' )
|
||||||
|
REVISION="0.1"
|
||||||
|
|
||||||
|
# Stop at the first non-catched error
|
||||||
|
set -e
|
||||||
|
|
||||||
|
# Include check_range()
|
||||||
|
# Not needed at the moment
|
||||||
|
#. $PROGPATH/utils.sh
|
||||||
|
STATE_OK=0
|
||||||
|
STATE_WARNING=1
|
||||||
|
STATE_CRITICAL=2
|
||||||
|
STATE_UNKNOWN=3
|
||||||
|
STATE_DEPENDENT=4
|
||||||
|
|
||||||
|
# Default values
|
||||||
|
WARNING_STATS=0
|
||||||
|
CRITICAL_STATS=0
|
||||||
|
|
||||||
|
# Output
|
||||||
|
OUTPUT_EXIT_STATUS=$STATE_OK
|
||||||
|
OUTPUT_DETAIL_OK=""
|
||||||
|
OUTPUT_DETAIL_WARNING=""
|
||||||
|
OUTPUT_DETAIL_CRITICAL=""
|
||||||
|
OUTPUT_PERFDATA=""
|
||||||
|
|
||||||
|
#
|
||||||
|
# Help function
|
||||||
|
#
|
||||||
|
usage() {
|
||||||
|
cat <<EOF
|
||||||
|
Usage :
|
||||||
|
$0 [ -i <nb_in_progress_warn_threshold> ] [ -I <nb_in_progress_crit_threshold> ] -d <device_name> [ ... -d ... ]
|
||||||
|
|
||||||
|
This probe send statistics about disks on a Linux system.
|
||||||
|
At the moment, the "number of IO in progress" (field #9) is the only metric
|
||||||
|
able to generate an alert.
|
||||||
|
|
||||||
|
Options :
|
||||||
|
-d xx Device name (usually 'sda'). Can use shell pattern like 'sda*' or 'sd?'.
|
||||||
|
-i N Number of io in progress : warning threshold
|
||||||
|
-I N Number of io in progress : critical threshold
|
||||||
|
|
||||||
|
Example :
|
||||||
|
This example is a good generic configuration for simple and light systems,
|
||||||
|
whether physical or virtual, taking any sda, sdb, vda, vdb, etc. disks found
|
||||||
|
and triggering a warning at 1 IO in progress and an alert at 5.
|
||||||
|
$0 -w 0 -W 5 -d 'sd?' -d 'vd?'
|
||||||
|
EOF
|
||||||
|
}
|
||||||
|
|
||||||
|
# Some early checks
|
||||||
|
if [ ! -r "/proc/diskstats" ]; then
|
||||||
|
echo "UNKNOWN file '/proc/diskstats' not found or not readable."
|
||||||
|
exit $STATE_UNKNOWN
|
||||||
|
fi
|
||||||
|
# TODO: check Linux kernel version. See differences between 2.4+, 2.6+, 4.18+
|
||||||
|
# https://www.kernel.org/doc/html/latest/admin-guide/iostats.html
|
||||||
|
|
||||||
|
#
|
||||||
|
# Loop on parameters + tests
|
||||||
|
#
|
||||||
|
while getopts hd:i:I: OPT; do
|
||||||
|
case "$OPT" in
|
||||||
|
'd')
|
||||||
|
while read \
|
||||||
|
MAJOR MINOR DEVICE_NAME \
|
||||||
|
READS_COMPLETED READS_MERGED SECTORS_READ TIME_SPENT_READING \
|
||||||
|
WRITES_COMPLETED WRITES_MERGED SECTORS_WRITTEN TIME_SPENT_WRITING \
|
||||||
|
IO_IN_PROGRESS \
|
||||||
|
TIME_SPENT_IO TIME_SPENT_IO_WEIGHTED \
|
||||||
|
DISCARDS_MERGED SECTORS_DISCARDED TIME_SPENT_DISCARDING \
|
||||||
|
FLUSH_REQUESTS TIME_SPENT_FLUSHING \
|
||||||
|
FUTURE_THINGIES
|
||||||
|
do
|
||||||
|
case "$DEVICE_NAME" in
|
||||||
|
$OPTARG)
|
||||||
|
# Checking the thresholds
|
||||||
|
if [ -n "$THRESHOLD_IO_IN_PROGRESS_CRITICAL" ] && [ "$IO_IN_PROGRESS" -gt "$THRESHOLD_IO_IN_PROGRESS_CRITICAL" ]; then
|
||||||
|
OUTPUT_EXIT_STATUS=$STATE_CRITICAL
|
||||||
|
OUTPUT_DETAIL_CRITICAL="$OUTPUT_DETAIL_CRITICAL I/O in progress on $DEVICE_NAME: $IO_IN_PROGRESS"
|
||||||
|
fi
|
||||||
|
if [ -n "$THRESHOLD_IO_IN_PROGRESS_WARNING" ] && [ "$OUTPUT_EXIT_STATUS" != "$STATE_CRITICAL" ] && [ "$IO_IN_PROGRESS" -gt "$THRESHOLD_IO_IN_PROGRESS_WARNING" ]; then
|
||||||
|
OUTPUT_EXIT_STATUS=$STATE_WARNING
|
||||||
|
OUTPUT_DETAIL_WARNING="$OUTPUT_DETAIL_WARNING I/O in progress on $DEVICE_NAME: $IO_IN_PROGRESS"
|
||||||
|
fi
|
||||||
|
# Generating performance data
|
||||||
|
# reads
|
||||||
|
OUTPUT_PERFDATA="$( printf "%s\n'%s_reads_completed'=%dc;;;0" "$OUTPUT_PERFDATA" "$DEVICE_NAME" "$READS_COMPLETED" )"
|
||||||
|
OUTPUT_PERFDATA="$( printf "%s\n'%s_reads_merged'=%dc;;;0" "$OUTPUT_PERFDATA" "$DEVICE_NAME" "$READS_MERGED" )"
|
||||||
|
OUTPUT_PERFDATA="$( printf "%s\n'%s_sectors_read'=%dc;;;0" "$OUTPUT_PERFDATA" "$DEVICE_NAME" "$SECTORS_READ" )"
|
||||||
|
OUTPUT_PERFDATA="$( printf "%s\n'%s_time_spent_reading'=%dc;;;0" "$OUTPUT_PERFDATA" "$DEVICE_NAME" "$TIME_SPENT_READING" )"
|
||||||
|
# writes
|
||||||
|
OUTPUT_PERFDATA="$( printf "%s\n'%s_writes_completed'=%dc;;;0" "$OUTPUT_PERFDATA" "$DEVICE_NAME" "$WRITES_COMPLETED" )"
|
||||||
|
OUTPUT_PERFDATA="$( printf "%s\n'%s_writes_merged'=%dc;;;0" "$OUTPUT_PERFDATA" "$DEVICE_NAME" "$WRITES_MERGED" )"
|
||||||
|
OUTPUT_PERFDATA="$( printf "%s\n'%s_sectors_written'=%dc;;;0" "$OUTPUT_PERFDATA" "$DEVICE_NAME" "$SECTORS_WRITTEN" )"
|
||||||
|
OUTPUT_PERFDATA="$( printf "%s\n'%s_time_spent_writing'=%dc;;;0" "$OUTPUT_PERFDATA" "$DEVICE_NAME" "$TIME_SPENT_WRITING" )"
|
||||||
|
# I/O
|
||||||
|
OUTPUT_PERFDATA="$( printf "%s\n'%s_io_in_progress'=%d;%s;%s;0" "$OUTPUT_PERFDATA" "$DEVICE_NAME" "$IO_IN_PROGRESS" "$THRESHOLD_IO_IN_PROGRESS_WARNING" "$THRESHOLD_IO_IN_PROGRESS_CRITICAL" )"
|
||||||
|
OUTPUT_PERFDATA="$( printf "%s\n'%s_time_spent_io'=%dc;;;0" "$OUTPUT_PERFDATA" "$DEVICE_NAME" "$TIME_SPENT_IO" )"
|
||||||
|
OUTPUT_PERFDATA="$( printf "%s\n'%s_time_spent_io_weighted'=%dc;;;0" "$OUTPUT_PERFDATA" "$DEVICE_NAME" "$TIME_SPENT_IO_WEIGHTED" )"
|
||||||
|
# Discards
|
||||||
|
OUTPUT_PERFDATA="$( printf "%s\n'%s_discards_merged'=%dc;;;0" "$OUTPUT_PERFDATA" "$DEVICE_NAME" "$DISCARDS_MERGED" )"
|
||||||
|
OUTPUT_PERFDATA="$( printf "%s\n'%s_sectors_discarded'=%dc;;;0" "$OUTPUT_PERFDATA" "$DEVICE_NAME" "$SECTORS_DISCARDED" )"
|
||||||
|
OUTPUT_PERFDATA="$( printf "%s\n'%s_time_spent_discarding'=%dc;;;0" "$OUTPUT_PERFDATA" "$DEVICE_NAME" "$TIME_SPENT_DISCARDING" )"
|
||||||
|
# Flushes
|
||||||
|
OUTPUT_PERFDATA="$( printf "%s\n'%s_flush_requests'=%dc;;;0" "$OUTPUT_PERFDATA" "$DEVICE_NAME" "$FLUSH_REQUESTS" )"
|
||||||
|
OUTPUT_PERFDATA="$( printf "%s\n'%s_time_spent_flushing'=%dc;;;0" "$OUTPUT_PERFDATA" "$DEVICE_NAME" "$TIME_SPENT_FLUSHING" )"
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
done < /proc/diskstats
|
||||||
|
;;
|
||||||
|
|
||||||
|
'i')
|
||||||
|
THRESHOLD_IO_IN_PROGRESS_WARNING="$OPTARG"
|
||||||
|
;;
|
||||||
|
|
||||||
|
'I')
|
||||||
|
THRESHOLD_IO_IN_PROGRESS_CRITICAL="$OPTARG"
|
||||||
|
;;
|
||||||
|
|
||||||
|
\?)
|
||||||
|
usage
|
||||||
|
exit 1
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
done
|
||||||
|
|
||||||
|
# Final ouput
|
||||||
|
case "$OUTPUT_EXIT_STATUS" in
|
||||||
|
'0')
|
||||||
|
printf "OK%s" "$OUTPUT_DETAIL_OK"
|
||||||
|
;;
|
||||||
|
'1')
|
||||||
|
printf "WARNING%s" "$OUTPUT_DETAIL_WARNING"
|
||||||
|
;;
|
||||||
|
'2')
|
||||||
|
printf "CRITICAL%s" "$OUTPUT_DETAIL_CRITICAL"
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
printf "UNKNOWN"
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
|
||||||
|
# We sort the perfdata because of a weird bug in some icinga/pnp4nagios interactions
|
||||||
|
printf "|%s\n" "$( printf "%s" "$OUTPUT_PERFDATA" | sort | grep -v "^$" | tr "\n" " " )"
|
||||||
|
exit $OUTPUT_EXIT_STATUS
|
|
@ -1,5 +1,6 @@
|
||||||
# Commandes de base pour serveur Linux
|
# Commandes de base pour serveur Linux
|
||||||
command[check_disks]=/usr/lib/nagios/plugins/check_disk -f -w 10% -c 5% -W 50% -K 5% -l -X tmpfs -X devpts -X devtmpfs -X usbfs -X nsfs -X overlay -X tracefs
|
command[check_disks]=/usr/lib/nagios/plugins/check_disk -f -w 10% -c 5% -W 50% -K 5% -l -X tmpfs -X devpts -X devtmpfs -X usbfs -X nsfs -X overlay -X tracefs
|
||||||
|
command[check_diskstats]=/usr/local/share/scripts-admin/nagios/check_diskstats.sh -i 0 -I 5 -d 'sd?' -d 'vd?'
|
||||||
command[check_load]=/usr/lib/nagios/plugins/check_load -w 1,1,1 -c 3,2,2
|
command[check_load]=/usr/lib/nagios/plugins/check_load -w 1,1,1 -c 3,2,2
|
||||||
command[check_network_volume]=/usr/local/share/scripts-admin/nagios/check_network_volume.sh
|
command[check_network_volume]=/usr/local/share/scripts-admin/nagios/check_network_volume.sh
|
||||||
command[check_swaping]=/usr/local/share/scripts-admin/nagios/check_swaping.sh
|
command[check_swaping]=/usr/local/share/scripts-admin/nagios/check_swaping.sh
|
||||||
|
|
Loading…
Reference in a new issue