extra features; extra mysql config file, detect disconnected nodes (#2)

* added
        -m MySQL extra my.cnf configuration file
        -s Create state file, to detect when a node gets disconnected

* description updated
This commit is contained in:
staf
2018-06-05 10:18:29 +02:00
committed by Guillaume Coré
parent a2c3e53856
commit 9669a033f6
2 changed files with 146 additions and 37 deletions
+141 -36
View File
@@ -1,13 +1,20 @@
#!/bin/bash
PROGNAME=`basename $0`
VERSION="Version 1.1.3"
AUTHOR="Guillaume Coré <fridim@onfi.re>, Ales Nosek <ales.nosek@gmail.com>"
VERSION="Version 1.1.4"
AUTHOR="Guillaume Coré <fridim@onfi.re>, Ales Nosek <ales.nosek@gmail.com>, Staf Wagemakers <staf@wagemakers.be>"
ST_OK=0
ST_WR=1
ST_CR=2
ST_UK=3
warnAlerts=0
critAlerts=0
unknAlerts=0
print_version() {
echo "$VERSION $AUTHOR"
}
@@ -17,7 +24,7 @@ print_help() {
echo ""
echo "$PROGNAME is a Nagios plugin to monitor Galera cluster status."
echo ""
echo "$PROGNAME [-u USER] [-p PASSWORD] [-H HOST] [-P PORT] [-w SIZE] [-c SIZE] [-f FLOAT] [-0]"
echo "$PROGNAME [-u USER] [-p PASSWORD] [-H HOST] [-P PORT] [-m file] [-w SIZE] [-c SIZE] [-s statefile] [-f FLOAT] [-0]"
echo ""
echo "Options:"
echo " u)"
@@ -28,6 +35,8 @@ print_help() {
echo " MySQL host."
echo " P)"
echo " MySQL port."
echo " m)"
echo " MySQL extra my.cnf configuration file."
echo " w)"
echo " Sets minimum number of nodes in the cluster when WARNING is raised. (default is same as critical)."
echo " c)"
@@ -36,6 +45,8 @@ print_help() {
echo " Sets critical value of wsrep_flow_control_paused (default is 0.1)."
echo " 0)"
echo " Rise CRITICAL if the node is not primary"
echo " s)"
echo " Create state file, detect disconnected nodes"
exit $ST_UK
}
@@ -58,7 +69,7 @@ check_executable() {
check_executable mysql
check_executable bc
while getopts “hvu:p:H:P:w:c:f:0” OPTION; do
while getopts “hvu:p:H:P:w:c:f:m:s:0” OPTION; do
case $OPTION in
h)
print_help
@@ -80,6 +91,9 @@ while getopts “hvu:p:H:P:w:c:f:0” OPTION; do
P)
port=$OPTARG
;;
m)
myconfig=$OPTARG
;;
w)
warn=$OPTARG
;;
@@ -92,6 +106,9 @@ while getopts “hvu:p:H:P:w:c:f:0” OPTION; do
0)
primary='TRUE'
;;
s)
stateFile=$OPTARG
;;
?)
echo "Unknown argument: $1"
print_help
@@ -114,57 +131,145 @@ param_mysqlhost=$(create_param -h "$mysqlhost")
param_port=$(create_param -P "$port")
param_mysqluser=$(create_param -u "$mysqluser")
param_password=$(create_param -p "$password")
param_configfile=$(create_param --defaults-extra-file= "$myconfig")
r1=$(mysql $param_mysqlhost $param_port $param_mysqluser $param_password -B -N -e "show status like 'wsrep_cluster_size'"|cut -f 2) # 3 (GALERA_CLUSTER_SIZE)
r2=$(mysql $param_mysqlhost $param_port $param_mysqluser $param_password -B -N -e "show status like 'wsrep_cluster_status'"|cut -f 2) # Primary
r3=$(mysql $param_mysqlhost $param_port $param_mysqluser $param_password -B -N -e "show status like 'wsrep_flow_control_paused'"|cut -f 2) # < 0.1
r4=$(mysql $param_mysqlhost $param_port $param_mysqluser $param_password -B -N -e "show status like 'wsrep_ready'"|cut -f 2) # ON
r5=$(mysql $param_mysqlhost $param_port $param_mysqluser $param_password -B -N -e "show status like 'wsrep_connected'"|cut -f 2) # ON
r6=$(mysql $param_mysqlhost $param_port $param_mysqluser $param_password -B -N -e "show status like 'wsrep_local_state_comment'"|cut -f 2) # Synced
param_mysql="$param_mysqlhost $param_port $param_mysqluser $param_password $param_configfile"
if [ -z "$r3" ]; then
echo "UNKNOWN: wsrep_flow_control_paused is empty"
ST_FINAL=$ST_UK
#
# verify the database connection
#
mysql $param_mysql -B -N -e '\s;' >/dev/null 2>&1 || {
echo "CRITICAL: mysql connection check failed"
exit $ST_CR
}
#
# verify that the node is part of a cluster
#
rClusterStateUuid=$(mysql $param_mysql -B -N -e "show status like 'wsrep_cluster_state_uuid'; "|cut -f 2)
if [ -z "$rClusterStateUuid" ]; then
echo "CRITICAL: node is not part of a cluster"
exit $ST_CR
fi
if [ $(echo "$r3 > $fcp" | bc) = 1 ]; then
rClusterSize=$(mysql $param_mysql -B -N -e "show status like 'wsrep_cluster_size'"|cut -f 2)
rClusterStatus=$(mysql $param_mysql -B -N -e "show status like 'wsrep_cluster_status'"|cut -f 2) # Primary
rFlowControl=$(mysql $param_mysql -B -N -e "show status like 'wsrep_flow_control_paused'"|cut -f 2) # < 0.1
rReady=$(mysql $param_mysql -B -N -e "show status like 'wsrep_ready'"|cut -f 2) # ON
rConnected=$(mysql $param_mysql -B -N -e "show status like 'wsrep_connected'"|cut -f 2) # ON
rLocalStateComment=$(mysql $param_mysql -B -N -e "show status like 'wsrep_local_state_comment'"|cut -f 2) # Synced
rIncommingAddresses=$(mysql $param_mysql -B -N -e "show global status like 'wsrep_incoming_addresses';"|cut -f 2)
if [ -z "$rFlowControl" ]; then
echo "UNKNOWN: wsrep_flow_control_paused is empty"
unknAlerts=$(($unknAlerts+1))
fi
if [ $(echo "$rFlowControl > $fcp" | bc) = 1 ]; then
echo "CRITICAL: wsrep_flow_control_paused is > $fcp"
ST_FINAL=$ST_CR
critAlerts=$(($criticalAlerts+1))
fi
if [ "$primary" = 'TRUE' ]; then
if [ "$r2" != 'Primary' ]; then
if [ "$rClusterStatus" != 'Primary' ]; then
echo "CRITICAL: node is not primary (wsrep_cluster_status)"
ST_FINAL=$ST_CR
critAlerts=$(($criticalAlerts+1))
fi
fi
if [ "$r4" != 'ON' ]; then
if [ "$rReady" != 'ON' ]; then
echo "CRITICAL: node is not ready (wsrep_ready)"
ST_FINAL=$ST_CR
critAlerts=$(($criticalAlerts+1))
fi
if [ "$r5" != 'ON' ]; then
if [ "$rConnected" != 'ON' ]; then
echo "CRITICAL: node is not connected (wsrep_connected)"
ST_FINAL=$ST_CR
critAlerts=$(($criticalAlerts+1))
fi
if [ "$r6" != 'Synced' ]; then
echo "CRITICAL: node is not synced - actual state is: $r6 (wsrep_local_state_comment)"
ST_FINAL=$ST_CR
if [ "$rLocalStateComment" != 'Synced' ]; then
echo "CRITICAL: node is not synced - actual state is: $rLocalStateComment (wsrep_local_state_comment)"
critAlerts=$(($criticalAlerts+1))
fi
if [ $r1 -gt $warn ]; then
echo "OK: number of NODES = $r1 (wsrep_cluster_size)"
ST_FINAL=${ST_FINAL-$ST_OK}
elif [ $r1 -le $crit ]; then
echo "CRITICAL: number of NODES = $r1 (wsrep_cluster_size)"
ST_FINAL=$ST_CR
elif [ $r1 -le $warn ]; then
echo "WARNING: number of NODES = $r1 (wsrep_cluster_size)"
ST_FINAL=${ST_FINAL-$ST_WR}
else
exit $ST_UK
if [ $rClusterSize -gt $warn ]; then
# only display the ok message if the state check not enabled
if [ -z "$stateFile" ]; then
echo "OK: number of NODES = $rClusterSize (wsrep_cluster_size)"
fi
elif [ $rClusterSize -le $crit ]; then
echo "CRITICAL: number of NODES = $rClusterSize (wsrep_cluster_size)"
critAlerts=$(($criticalAlerts+1))
elif [ $rClusterSize -le $warn ]; then
echo "WARNING: number of NODES = $rClusterSize (wsrep_cluster_size)"
warnAlerts=$(($warnAlerts+1))
else
exit $ST_UK
fi
exit $ST_FINAL
#
# detect is the connection is lost automatically
#
if [ ! -z "$stateFile" ]; then
touch $stateFile
if [ $? != "0" ]; then
echo "UNKNOWN: stateFile \"$stateFile\" is not writeable"
unknAlerts=$(($unknAlerts+1))
else
if [ "$rConnected" = "ON" ]; then
# get the current connected Nodes
currentNodes=$(echo $rIncommingAddresses | tr "," "\n" | sort -u)
if [ -f "$stateFile" ]; then
# get the nodes added to the cluster
newNodes=$(echo $currentNodes | tr " " "\n" | comm -2 -3 - $stateFile)
# get the nodes that were removed from the cluster
missingNodes=$(echo $currentNodes | tr " " "\n" | comm -1 -3 - $stateFile)
if [ ! -z "$newNodes" ]; then
# add the new nodes to the cluster to the state file
echo $newNodes | tr " " "\n" >> $stateFile
fi
else
# there is no state file yet, creating new one.
echo $currentNodes | tr " " "\n" > $stateFile
fi # -f stateFile
# get the numeber of nodes that were part of the cluster before
maxClusterSize=$(cat $stateFile | wc -l)
if [ $maxClusterSize -eq $rClusterSize ]; then
if [ $maxClusterSize -eq 1 ]; then
if [ $crit -eq 0 -a $warn -eq 0 ]; then
echo "OK: running single-node database cluster"
fi
else
echo "OK: running redundant $rClusterSize online / $maxClusterSize total"
fi
else
echo "WARNING: redundant $rClusterSize online / $maxClusterSize total, missing peers: $missingNodes"
warnAlerts=$(($warnAlerts+1))
fi
fi # rConnected
fi # -w stateFile
fi # -z stateFile
#
# exit
#
[ "$critAlerts" -gt "0" ] && exit $ST_CR
[ "$unknAlerts" -gt "0" ] && exit $ST_UK
[ "$warnAlerts" -gt "0" ] && exit $ST_WR
exit 0