#!/bin/sh

# Small script to check that all servers behind the glue records :
# - are reachable (IPv4 and IPv6 alike),
# - show the same SOA record.
# GPL v3+

# Stop at the first non-catched error
set -e

# For monitoring plugins
PROGPATH=$( echo $0 | sed -e 's,[\\/][^\\/][^\\/]*$,,' )
REVISION="0.1"

# Include check_range()
#. $PROGPATH/utils.sh
# No need for check_range() at the moment, we just copy
# the states to be standalone (easier to use that way)
STATE_OK=0
STATE_WARNING=1
STATE_CRITICAL=2
STATE_UNKNOWN=3
STATE_DEPENDENT=4

# Defaults
CHECK_SOA=1

# Output
OUTPUT_EXIT_STATUS=$STATE_OK
OUTPUT_DETAIL_OK=""
OUTPUT_DETAIL_WARNING=""
OUTPUT_DETAIL_CRITICAL=""
#OUTPUT_PERFDATA=""

#
# Help function
#
usage() {
	cat <<EOF
Usage :
  $0 [-sS] domain.com [[-sS] domain.net] ...

	-s : don't check for SOA records discrepancies
	-S : nevermind, do check for SOA discrepancies (default)
EOF
}

# Some early checks
if ! which dig >/dev/null 2>&1 ; then
	echo "UNKNOWN 'dig' not found"
	exit $STATE_UNKNOWN
fi

# We loop until there is no more parameters, be it
# either options or domains
while [ "$#" -gt 0 ]; do
	# Parameters management
	while getopts hsS OPT; do
		case "$OPT" in
			'h')
				usage
				exit
				;;

			's')
				CHECK_SOA=0;
				;;

			'S')
				CHECK_SOA=1;
				;;

			\?)
				usage
				exit 1
				;;
		esac
	done
	shift $( expr $OPTIND - 1 )
	DOMAIN="$1"
	shift

	# Exit if no domain has been specified
	# (it's kind of weird and I don't like it)
	if [ -z "$DOMAIN" ]; then
		echo "UNKNOWN: no domain tested."
		exit $STATE_UNKNOWN
	fi

	# Get the TLD of the domain (example.net -> net)
	TLD="$( echo "$DOMAIN" | sed 's/[^.]*\.\([^.]\)/\1/' )"
	# ...and get one random server for this TLD
	NS_TLD="$( dig +short "$TLD" NS | sort -R | tail -n 1 )"

	# Query this TLD server on our domain and loop on each IP address "additionally"
	# given, aka. the glue records
	LIST_IP_NS_SERVERS="$( dig +norec +nocomments +noquestion +nostats +nocmd @"$NS_TLD" "$DOMAIN" NS | sed -n 's/.*IN[[:space:]]\+\(A\|AAAA\)[[:space:]]\+\(.*\)$/\2/p' )"
	if [ -z "$LIST_IP_NS_SERVERS" ] && [ "$OUTPUT_EXIT_STATUS" -ne "$STATE_CRITICAL" ]; then
		OUTPUT_EXIT_STATUS=$STATE_WARNING
		OUTPUT_DETAIL_WARNING="$OUTPUT_DETAIL_WARNING No glue records for domain $DOMAIN ?"
		continue
	fi
	for IPADDR in $LIST_IP_NS_SERVERS; do
		# Query our server
		if OUTPUT=$( dig @"$IPADDR" $DOMAIN SOA +short 2>&1 ); then
			# The server responded, store the SOA for later analyze
			LIST_SOA="$( printf "%s\n%s" "$LIST_SOA" "$OUTPUT" | grep -v "^$" )"
		else
			# No response ?
			if [ "$OUTPUT_EXIT_STATUS" -ne $STATE_CRITICAL ]; then
				OUTPUT_DETAIL_CRITICAL="Problematic server behind IP"
				OUTPUT_EXIT_STATUS=$STATE_CRITICAL
			fi
			OUTPUT_DETAIL_CRITICAL="$OUTPUT_DETAIL_CRITICAL $IPADDR"
		fi
	done

	# Check that SOA records are all the same
	if [ "$CHECK_SOA" -ne 0 ] && [ "$OUTPUT_EXIT_STATUS" -ne "$STATE_CRITICAL" ] && [ "$( echo "$LIST_SOA" | uniq | wc -l )" -ne 1 ]; then
		OUTPUT_EXIT_STATUS=$STATE_WARNING
		OUTPUT_DETAIL_WARNING="$OUTPUT_DETAIL_WARNING SOA records discrepancies for domain $DOMAIN : $LIST_SOA"
	fi

	# Clean up after each domain
	unset LIST_SOA
	OUTPUT_DETAIL_OK="$OUTPUT_DETAIL_OK $DOMAIN"
done

case "$OUTPUT_EXIT_STATUS" in
	'0')
		printf "OK%s" "$OUTPUT_DETAIL_OK"
		;;
	'1')
		printf "WARNING %s" "$OUTPUT_DETAIL_WARNING"
		;;
	'2')
		printf "CRITICAL %s" "$OUTPUT_DETAIL_CRITICAL"
		;;
	*)
		printf "UNKNOWN"
		;;
esac

# Perfdata
#printf "|%s\n" "$OUTPUT_PERFDATA"
printf "\n"

# Exit with return status
exit $OUTPUT_EXIT_STATUS