1
0
Fork 0
scripts-admin-quickndirty-p.../nagios/check_gitlab_readiness.sh

203 lines
4.9 KiB
Bash
Executable file

#!/bin/bash
# Little monitoring script to check the readiness of a Gitlab instance.
#
# Remember that you have to whitelist the IP of your monitoring system :
# gitlab.rb:
# gitlab_rails['monitoring_whitelist'] = ['127.0.0.0/8', '::1/128', ...]
# See https://docs.gitlab.com/ee/administration/monitoring/health_check.html
#
# Licence: WTFPL
# Copyright: chl-dev@bugness.org 2024
PROGPATH=$( echo $0 | sed -e 's,[\\/][^\\/][^\\/]*$,,' )
REVISION="0.1"
# Default values
GITLAB_HOSTNAME="localhost"
GITLAB_URL=""
GITLAB_URL_PROTOCOL="https"
NB_CHECKS_RANGE="15:20"
# Include check_range() and STATE_OK, STATE_WARNING, ... variables
. $PROGPATH/utils.sh
# Output
OUTPUT_CPT_CHECKS=0
OUTPUT_EXIT_STATUS=0
OUTPUT_DETAIL_OK=""
OUTPUT_DETAIL_WARNING=""
OUTPUT_DETAIL_CRITICAL=""
OUTPUT_DETAIL_UNKNOWN=""
OUTPUT_PERFDATA=""
# Stop at first uncaught error
set -eu
# Wrapper to use whatever is available to make HTTP queries
fetch_with_curl_wget_or_whatever () {
if which "wget" >/dev/null 2>&1; then
wget -q -O - "$1"
elif which "curl" >/dev/null 2>&1 ; then
curl -s "$1"
else
echo "UNKNOWN: no wget/curl/whatever available to make HTTP queries."
exit $STATE_UNKNOWN
fi
}
check_http_status_200() {
if which "wget" >/dev/null 2>&1; then
wget -q --spider "$1"
elif which "curl" >/dev/null 2>&1 ; then
test "$( curl -s -o /dev/null -I -w "%{http_code}" "$1" )" == "200"
else
echo "UNKNOWN: no wget/curl/whatever available to make HTTP queries."
exit $STATE_UNKNOWN
fi
}
print_full_url() {
if [ -n "$GITLAB_URL" ]; then
echo "$GITLAB_URL"
else
echo "$GITLAB_URL_PROTOCOL://$GITLAB_HOSTNAME/-/readiness?all=1"
fi
}
# Help function
usage() {
cat <<EOF
Usage :
$0 [-H hostname] [-U full_URL] [-sS] [ -c nb_checks_range ]
-c Expected number of checks
-s Use HTTP to query the hostname
-S Use HTTPS to query the hostname (default)
Examples:
./check_gitlab.sh -H forge.example.net
./check_gitlab.sh -U https://forge.example.net/ -c 20:42
Default values:
full_URL: $( print_full_url )
nb_checks_range: $NB_CHECKS_RANGE
EOF
}
# Loop on parameters
while getopts c:hH:sSU: f; do
case "$f" in
'c')
NB_CHECKS_RANGE="$OPTARG"
;;
'h')
usage
exit
;;
'H')
GITLAB_HOSTNAME="$OPTARG"
GITLAB_URL="" # To generate the URL from the hostname, see print_full_url()
;;
's')
GITLAB_URL_PROTOCOL="http"
;;
'S')
GITLAB_URL_PROTOCOL="https"
;;
'U')
GITLAB_URL="$OPTARG"
;;
?)
usage
exit 1
;;
esac
done
if ! which jq >/dev/null; then
echo "UNKNOWN command 'jq' not available."
exit $STATE_UNKNOWN
fi
# First, let's check that the URL responds with 200/OK
#if ! check_http_status_200 "$( print_full_url )"; then
# echo TODO later. Since it needs 2 queries, it could lead to inconsistencies if we get a 200 now and an error later.
# Would the best way be to set another probe ?
# check_https!-H $HOSTNAME$ -f warning -u /-/readiness?all=1
# check_https!-H $HOSTNAME$ -f warning -u /-/health -s "GitLab OK"
#fi
if ! JSONDATA="$( fetch_with_curl_wget_or_whatever "$( print_full_url )" )"; then
echo "UNKNOWN error fetching the URL '$( print_full_url )'"
exit $STATE_UNKNOWN
fi
while read KEY; do
OUTPUT_CPT_CHECKS=$(( $OUTPUT_CPT_CHECKS + 1 ))
if [ "$KEY" == "status" ]; then
STATUS="$( printf "%s\n" "$JSONDATA" | jq ".$KEY" )"
else
STATUS="$( printf "%s\n" "$JSONDATA" | jq ".$KEY[0].status" )"
fi
if [ "$STATUS" != '"ok"' ]; then
OUTPUT_DETAIL_CRITICAL="$( printf "%s Status for key '$KEY': %s" "$OUTPUT_DETAIL_CRITICAL" "$STATUS" )"
OUTPUT_EXIT_STATUS=$STATE_CRITICAL
fi
done <<EOF
$( printf "%s\n" "$JSONDATA" | jq 'keys[]' | sed 's/\(^"\|"$\)//g' )
EOF
# Let's check that the number of checks checks out
set +e
check_range "$OUTPUT_CPT_CHECKS" "$NB_CHECKS_RANGE"
STATUS="$?"
set -e
if [ "$STATUS" -eq 0 ]; then
if [ "$OUTPUT_EXIT_STATUS" == "$STATE_OK" ]; then
OUTPUT_EXIT_STATUS=$STATE_WARNING
OUTPUT_DETAIL_WARNING="$OUTPUT_DETAIL_WARNING Nb checks outside of range: $OUTPUT_CPT_CHECKS / $NB_CHECKS_RANGE"
fi
elif [ "$STATUS" -eq 2 ]; then
if [ "$OUTPUT_EXIT_STATUS" != "$STATE_CRITICAL" ]; then
OUTPUT_EXIT_STATUS=$STATE_UNKNOWN
OUTPUT_DETAIL_UNKNOWN="$OUTPUT_DETAIL_UNKNOWN check_range() on error: $OUTPUT_CPT_CHECKS / $NB_CHECKS_RANGE"
fi
fi
# final output
case "$OUTPUT_EXIT_STATUS" in
"$STATE_OK")
printf "OK $OUTPUT_DETAIL_OK"
;;
"$STATE_WARNING")
printf "WARNING $OUTPUT_DETAIL_WARNING"
;;
"$STATE_CRITICAL")
printf "CRITICAL $OUTPUT_DETAIL_CRITICAL"
;;
"$STATE_UNKNOWN")
printf "UNKNOWN $OUTPUT_DETAIL_UNKNOWN"
;;
*)
printf "WTF"
;;
esac
# Add the checks counter to the perfdata
OUTPUT_PERFDATA="$( printf "%s\n%s" "$OUTPUT_PERFDATA" "nb_checks=$OUTPUT_CPT_CHECKS;$NB_CHECKS_RANGE;;0" )"
# We sort the perfdata because of a weird bug in some icinga/pnp4nagios interactions
printf "|%s\n" "$( printf "%s" "$OUTPUT_PERFDATA" | sort | grep -v "^$" | tr "\n" " " )"
exit "$OUTPUT_EXIT_STATUS"