extra features; extra mysql config file, detect disconnected nodes (#2)

* added
        -m MySQL extra my.cnf configuration file
        -s Create state file, to detect when a node gets disconnected

* description updated
This commit is contained in:
staf
2018-06-05 10:18:29 +02:00
committed by Guillaume Coré
parent a2c3e53856
commit 9669a033f6
2 changed files with 146 additions and 37 deletions
+5 -1
View File
@@ -3,7 +3,7 @@ nagios-plugin-check_galera_cluster
A nagios plugin to check status of a galera cluster A nagios plugin to check status of a galera cluster
Version 1.1, Guillaume Coré <fridim@onfi.re>, Ales Nosek <ales.nosek@gmail.com> Version 1.1.4, Guillaume Coré <fridim@onfi.re>, Ales Nosek <ales.nosek@gmail.com>, Staf Wagemakers <staf@wagemakers.be>
check_galera_cluster is a Nagios plugin to monitor Galera cluster status. check_galera_cluster is a Nagios plugin to monitor Galera cluster status.
@@ -18,6 +18,8 @@ A nagios plugin to check status of a galera cluster
MySQL host. MySQL host.
P) P)
MySQL port. MySQL port.
m)
MySQL extra my.cnf configuration file
w) w)
Sets minimum number of nodes in the cluster when WARNING is raised. (default is same as critical). Sets minimum number of nodes in the cluster when WARNING is raised. (default is same as critical).
c) c)
@@ -26,3 +28,5 @@ A nagios plugin to check status of a galera cluster
Sets critical value of wsrep_flow_control_paused (default is 0.1). Sets critical value of wsrep_flow_control_paused (default is 0.1).
0) 0)
Rise CRITICAL if the node is not primary Rise CRITICAL if the node is not primary
s)
Create state file, detect disconnected nodes
+141 -36
View File
@@ -1,13 +1,20 @@
#!/bin/bash #!/bin/bash
PROGNAME=`basename $0` PROGNAME=`basename $0`
VERSION="Version 1.1.3" VERSION="Version 1.1.4"
AUTHOR="Guillaume Coré <fridim@onfi.re>, Ales Nosek <ales.nosek@gmail.com>" AUTHOR="Guillaume Coré <fridim@onfi.re>, Ales Nosek <ales.nosek@gmail.com>, Staf Wagemakers <staf@wagemakers.be>"
ST_OK=0 ST_OK=0
ST_WR=1 ST_WR=1
ST_CR=2 ST_CR=2
ST_UK=3 ST_UK=3
warnAlerts=0
critAlerts=0
unknAlerts=0
print_version() { print_version() {
echo "$VERSION $AUTHOR" echo "$VERSION $AUTHOR"
} }
@@ -17,7 +24,7 @@ print_help() {
echo "" echo ""
echo "$PROGNAME is a Nagios plugin to monitor Galera cluster status." echo "$PROGNAME is a Nagios plugin to monitor Galera cluster status."
echo "" echo ""
echo "$PROGNAME [-u USER] [-p PASSWORD] [-H HOST] [-P PORT] [-w SIZE] [-c SIZE] [-f FLOAT] [-0]" echo "$PROGNAME [-u USER] [-p PASSWORD] [-H HOST] [-P PORT] [-m file] [-w SIZE] [-c SIZE] [-s statefile] [-f FLOAT] [-0]"
echo "" echo ""
echo "Options:" echo "Options:"
echo " u)" echo " u)"
@@ -28,6 +35,8 @@ print_help() {
echo " MySQL host." echo " MySQL host."
echo " P)" echo " P)"
echo " MySQL port." echo " MySQL port."
echo " m)"
echo " MySQL extra my.cnf configuration file."
echo " w)" echo " w)"
echo " Sets minimum number of nodes in the cluster when WARNING is raised. (default is same as critical)." echo " Sets minimum number of nodes in the cluster when WARNING is raised. (default is same as critical)."
echo " c)" echo " c)"
@@ -36,6 +45,8 @@ print_help() {
echo " Sets critical value of wsrep_flow_control_paused (default is 0.1)." echo " Sets critical value of wsrep_flow_control_paused (default is 0.1)."
echo " 0)" echo " 0)"
echo " Rise CRITICAL if the node is not primary" echo " Rise CRITICAL if the node is not primary"
echo " s)"
echo " Create state file, detect disconnected nodes"
exit $ST_UK exit $ST_UK
} }
@@ -58,7 +69,7 @@ check_executable() {
check_executable mysql check_executable mysql
check_executable bc check_executable bc
while getopts “hvu:p:H:P:w:c:f:0” OPTION; do while getopts “hvu:p:H:P:w:c:f:m:s:0” OPTION; do
case $OPTION in case $OPTION in
h) h)
print_help print_help
@@ -80,6 +91,9 @@ while getopts “hvu:p:H:P:w:c:f:0” OPTION; do
P) P)
port=$OPTARG port=$OPTARG
;; ;;
m)
myconfig=$OPTARG
;;
w) w)
warn=$OPTARG warn=$OPTARG
;; ;;
@@ -92,6 +106,9 @@ while getopts “hvu:p:H:P:w:c:f:0” OPTION; do
0) 0)
primary='TRUE' primary='TRUE'
;; ;;
s)
stateFile=$OPTARG
;;
?) ?)
echo "Unknown argument: $1" echo "Unknown argument: $1"
print_help print_help
@@ -114,57 +131,145 @@ param_mysqlhost=$(create_param -h "$mysqlhost")
param_port=$(create_param -P "$port") param_port=$(create_param -P "$port")
param_mysqluser=$(create_param -u "$mysqluser") param_mysqluser=$(create_param -u "$mysqluser")
param_password=$(create_param -p "$password") param_password=$(create_param -p "$password")
param_configfile=$(create_param --defaults-extra-file= "$myconfig")
r1=$(mysql $param_mysqlhost $param_port $param_mysqluser $param_password -B -N -e "show status like 'wsrep_cluster_size'"|cut -f 2) # 3 (GALERA_CLUSTER_SIZE) param_mysql="$param_mysqlhost $param_port $param_mysqluser $param_password $param_configfile"
r2=$(mysql $param_mysqlhost $param_port $param_mysqluser $param_password -B -N -e "show status like 'wsrep_cluster_status'"|cut -f 2) # Primary
r3=$(mysql $param_mysqlhost $param_port $param_mysqluser $param_password -B -N -e "show status like 'wsrep_flow_control_paused'"|cut -f 2) # < 0.1
r4=$(mysql $param_mysqlhost $param_port $param_mysqluser $param_password -B -N -e "show status like 'wsrep_ready'"|cut -f 2) # ON
r5=$(mysql $param_mysqlhost $param_port $param_mysqluser $param_password -B -N -e "show status like 'wsrep_connected'"|cut -f 2) # ON
r6=$(mysql $param_mysqlhost $param_port $param_mysqluser $param_password -B -N -e "show status like 'wsrep_local_state_comment'"|cut -f 2) # Synced
if [ -z "$r3" ]; then #
echo "UNKNOWN: wsrep_flow_control_paused is empty" # verify the database connection
ST_FINAL=$ST_UK #
mysql $param_mysql -B -N -e '\s;' >/dev/null 2>&1 || {
echo "CRITICAL: mysql connection check failed"
exit $ST_CR
}
#
# verify that the node is part of a cluster
#
rClusterStateUuid=$(mysql $param_mysql -B -N -e "show status like 'wsrep_cluster_state_uuid'; "|cut -f 2)
if [ -z "$rClusterStateUuid" ]; then
echo "CRITICAL: node is not part of a cluster"
exit $ST_CR
fi fi
if [ $(echo "$r3 > $fcp" | bc) = 1 ]; then rClusterSize=$(mysql $param_mysql -B -N -e "show status like 'wsrep_cluster_size'"|cut -f 2)
rClusterStatus=$(mysql $param_mysql -B -N -e "show status like 'wsrep_cluster_status'"|cut -f 2) # Primary
rFlowControl=$(mysql $param_mysql -B -N -e "show status like 'wsrep_flow_control_paused'"|cut -f 2) # < 0.1
rReady=$(mysql $param_mysql -B -N -e "show status like 'wsrep_ready'"|cut -f 2) # ON
rConnected=$(mysql $param_mysql -B -N -e "show status like 'wsrep_connected'"|cut -f 2) # ON
rLocalStateComment=$(mysql $param_mysql -B -N -e "show status like 'wsrep_local_state_comment'"|cut -f 2) # Synced
rIncommingAddresses=$(mysql $param_mysql -B -N -e "show global status like 'wsrep_incoming_addresses';"|cut -f 2)
if [ -z "$rFlowControl" ]; then
echo "UNKNOWN: wsrep_flow_control_paused is empty"
unknAlerts=$(($unknAlerts+1))
fi
if [ $(echo "$rFlowControl > $fcp" | bc) = 1 ]; then
echo "CRITICAL: wsrep_flow_control_paused is > $fcp" echo "CRITICAL: wsrep_flow_control_paused is > $fcp"
ST_FINAL=$ST_CR critAlerts=$(($criticalAlerts+1))
fi fi
if [ "$primary" = 'TRUE' ]; then if [ "$primary" = 'TRUE' ]; then
if [ "$r2" != 'Primary' ]; then if [ "$rClusterStatus" != 'Primary' ]; then
echo "CRITICAL: node is not primary (wsrep_cluster_status)" echo "CRITICAL: node is not primary (wsrep_cluster_status)"
ST_FINAL=$ST_CR critAlerts=$(($criticalAlerts+1))
fi fi
fi fi
if [ "$r4" != 'ON' ]; then if [ "$rReady" != 'ON' ]; then
echo "CRITICAL: node is not ready (wsrep_ready)" echo "CRITICAL: node is not ready (wsrep_ready)"
ST_FINAL=$ST_CR critAlerts=$(($criticalAlerts+1))
fi fi
if [ "$r5" != 'ON' ]; then if [ "$rConnected" != 'ON' ]; then
echo "CRITICAL: node is not connected (wsrep_connected)" echo "CRITICAL: node is not connected (wsrep_connected)"
ST_FINAL=$ST_CR critAlerts=$(($criticalAlerts+1))
fi fi
if [ "$r6" != 'Synced' ]; then if [ "$rLocalStateComment" != 'Synced' ]; then
echo "CRITICAL: node is not synced - actual state is: $r6 (wsrep_local_state_comment)" echo "CRITICAL: node is not synced - actual state is: $rLocalStateComment (wsrep_local_state_comment)"
ST_FINAL=$ST_CR critAlerts=$(($criticalAlerts+1))
fi fi
if [ $r1 -gt $warn ]; then if [ $rClusterSize -gt $warn ]; then
echo "OK: number of NODES = $r1 (wsrep_cluster_size)" # only display the ok message if the state check not enabled
ST_FINAL=${ST_FINAL-$ST_OK} if [ -z "$stateFile" ]; then
elif [ $r1 -le $crit ]; then echo "OK: number of NODES = $rClusterSize (wsrep_cluster_size)"
echo "CRITICAL: number of NODES = $r1 (wsrep_cluster_size)" fi
ST_FINAL=$ST_CR elif [ $rClusterSize -le $crit ]; then
elif [ $r1 -le $warn ]; then echo "CRITICAL: number of NODES = $rClusterSize (wsrep_cluster_size)"
echo "WARNING: number of NODES = $r1 (wsrep_cluster_size)" critAlerts=$(($criticalAlerts+1))
ST_FINAL=${ST_FINAL-$ST_WR} elif [ $rClusterSize -le $warn ]; then
else echo "WARNING: number of NODES = $rClusterSize (wsrep_cluster_size)"
exit $ST_UK warnAlerts=$(($warnAlerts+1))
else
exit $ST_UK
fi fi
exit $ST_FINAL #
# detect is the connection is lost automatically
#
if [ ! -z "$stateFile" ]; then
touch $stateFile
if [ $? != "0" ]; then
echo "UNKNOWN: stateFile \"$stateFile\" is not writeable"
unknAlerts=$(($unknAlerts+1))
else
if [ "$rConnected" = "ON" ]; then
# get the current connected Nodes
currentNodes=$(echo $rIncommingAddresses | tr "," "\n" | sort -u)
if [ -f "$stateFile" ]; then
# get the nodes added to the cluster
newNodes=$(echo $currentNodes | tr " " "\n" | comm -2 -3 - $stateFile)
# get the nodes that were removed from the cluster
missingNodes=$(echo $currentNodes | tr " " "\n" | comm -1 -3 - $stateFile)
if [ ! -z "$newNodes" ]; then
# add the new nodes to the cluster to the state file
echo $newNodes | tr " " "\n" >> $stateFile
fi
else
# there is no state file yet, creating new one.
echo $currentNodes | tr " " "\n" > $stateFile
fi # -f stateFile
# get the numeber of nodes that were part of the cluster before
maxClusterSize=$(cat $stateFile | wc -l)
if [ $maxClusterSize -eq $rClusterSize ]; then
if [ $maxClusterSize -eq 1 ]; then
if [ $crit -eq 0 -a $warn -eq 0 ]; then
echo "OK: running single-node database cluster"
fi
else
echo "OK: running redundant $rClusterSize online / $maxClusterSize total"
fi
else
echo "WARNING: redundant $rClusterSize online / $maxClusterSize total, missing peers: $missingNodes"
warnAlerts=$(($warnAlerts+1))
fi
fi # rConnected
fi # -w stateFile
fi # -z stateFile
#
# exit
#
[ "$critAlerts" -gt "0" ] && exit $ST_CR
[ "$unknAlerts" -gt "0" ] && exit $ST_UK
[ "$warnAlerts" -gt "0" ] && exit $ST_WR
exit 0