extra features; extra mysql config file, detect disconnected nodes (#2)
* added
-m MySQL extra my.cnf configuration file
-s Create state file, to detect when a node gets disconnected
* description updated
This commit is contained in:
@@ -3,7 +3,7 @@ nagios-plugin-check_galera_cluster
|
|||||||
|
|
||||||
A nagios plugin to check status of a galera cluster
|
A nagios plugin to check status of a galera cluster
|
||||||
|
|
||||||
Version 1.1, Guillaume Coré <fridim@onfi.re>, Ales Nosek <ales.nosek@gmail.com>
|
Version 1.1.4, Guillaume Coré <fridim@onfi.re>, Ales Nosek <ales.nosek@gmail.com>, Staf Wagemakers <staf@wagemakers.be>
|
||||||
|
|
||||||
check_galera_cluster is a Nagios plugin to monitor Galera cluster status.
|
check_galera_cluster is a Nagios plugin to monitor Galera cluster status.
|
||||||
|
|
||||||
@@ -18,6 +18,8 @@ A nagios plugin to check status of a galera cluster
|
|||||||
MySQL host.
|
MySQL host.
|
||||||
P)
|
P)
|
||||||
MySQL port.
|
MySQL port.
|
||||||
|
m)
|
||||||
|
MySQL extra my.cnf configuration file
|
||||||
w)
|
w)
|
||||||
Sets minimum number of nodes in the cluster when WARNING is raised. (default is same as critical).
|
Sets minimum number of nodes in the cluster when WARNING is raised. (default is same as critical).
|
||||||
c)
|
c)
|
||||||
@@ -26,3 +28,5 @@ A nagios plugin to check status of a galera cluster
|
|||||||
Sets critical value of wsrep_flow_control_paused (default is 0.1).
|
Sets critical value of wsrep_flow_control_paused (default is 0.1).
|
||||||
0)
|
0)
|
||||||
Rise CRITICAL if the node is not primary
|
Rise CRITICAL if the node is not primary
|
||||||
|
s)
|
||||||
|
Create state file, detect disconnected nodes
|
||||||
|
|||||||
+141
-36
@@ -1,13 +1,20 @@
|
|||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
|
|
||||||
|
|
||||||
PROGNAME=`basename $0`
|
PROGNAME=`basename $0`
|
||||||
VERSION="Version 1.1.3"
|
VERSION="Version 1.1.4"
|
||||||
AUTHOR="Guillaume Coré <fridim@onfi.re>, Ales Nosek <ales.nosek@gmail.com>"
|
AUTHOR="Guillaume Coré <fridim@onfi.re>, Ales Nosek <ales.nosek@gmail.com>, Staf Wagemakers <staf@wagemakers.be>"
|
||||||
|
|
||||||
ST_OK=0
|
ST_OK=0
|
||||||
ST_WR=1
|
ST_WR=1
|
||||||
ST_CR=2
|
ST_CR=2
|
||||||
ST_UK=3
|
ST_UK=3
|
||||||
|
|
||||||
|
warnAlerts=0
|
||||||
|
critAlerts=0
|
||||||
|
unknAlerts=0
|
||||||
|
|
||||||
|
|
||||||
print_version() {
|
print_version() {
|
||||||
echo "$VERSION $AUTHOR"
|
echo "$VERSION $AUTHOR"
|
||||||
}
|
}
|
||||||
@@ -17,7 +24,7 @@ print_help() {
|
|||||||
echo ""
|
echo ""
|
||||||
echo "$PROGNAME is a Nagios plugin to monitor Galera cluster status."
|
echo "$PROGNAME is a Nagios plugin to monitor Galera cluster status."
|
||||||
echo ""
|
echo ""
|
||||||
echo "$PROGNAME [-u USER] [-p PASSWORD] [-H HOST] [-P PORT] [-w SIZE] [-c SIZE] [-f FLOAT] [-0]"
|
echo "$PROGNAME [-u USER] [-p PASSWORD] [-H HOST] [-P PORT] [-m file] [-w SIZE] [-c SIZE] [-s statefile] [-f FLOAT] [-0]"
|
||||||
echo ""
|
echo ""
|
||||||
echo "Options:"
|
echo "Options:"
|
||||||
echo " u)"
|
echo " u)"
|
||||||
@@ -28,6 +35,8 @@ print_help() {
|
|||||||
echo " MySQL host."
|
echo " MySQL host."
|
||||||
echo " P)"
|
echo " P)"
|
||||||
echo " MySQL port."
|
echo " MySQL port."
|
||||||
|
echo " m)"
|
||||||
|
echo " MySQL extra my.cnf configuration file."
|
||||||
echo " w)"
|
echo " w)"
|
||||||
echo " Sets minimum number of nodes in the cluster when WARNING is raised. (default is same as critical)."
|
echo " Sets minimum number of nodes in the cluster when WARNING is raised. (default is same as critical)."
|
||||||
echo " c)"
|
echo " c)"
|
||||||
@@ -36,6 +45,8 @@ print_help() {
|
|||||||
echo " Sets critical value of wsrep_flow_control_paused (default is 0.1)."
|
echo " Sets critical value of wsrep_flow_control_paused (default is 0.1)."
|
||||||
echo " 0)"
|
echo " 0)"
|
||||||
echo " Rise CRITICAL if the node is not primary"
|
echo " Rise CRITICAL if the node is not primary"
|
||||||
|
echo " s)"
|
||||||
|
echo " Create state file, detect disconnected nodes"
|
||||||
exit $ST_UK
|
exit $ST_UK
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -58,7 +69,7 @@ check_executable() {
|
|||||||
check_executable mysql
|
check_executable mysql
|
||||||
check_executable bc
|
check_executable bc
|
||||||
|
|
||||||
while getopts “hvu:p:H:P:w:c:f:0” OPTION; do
|
while getopts “hvu:p:H:P:w:c:f:m:s:0” OPTION; do
|
||||||
case $OPTION in
|
case $OPTION in
|
||||||
h)
|
h)
|
||||||
print_help
|
print_help
|
||||||
@@ -80,6 +91,9 @@ while getopts “hvu:p:H:P:w:c:f:0” OPTION; do
|
|||||||
P)
|
P)
|
||||||
port=$OPTARG
|
port=$OPTARG
|
||||||
;;
|
;;
|
||||||
|
m)
|
||||||
|
myconfig=$OPTARG
|
||||||
|
;;
|
||||||
w)
|
w)
|
||||||
warn=$OPTARG
|
warn=$OPTARG
|
||||||
;;
|
;;
|
||||||
@@ -92,6 +106,9 @@ while getopts “hvu:p:H:P:w:c:f:0” OPTION; do
|
|||||||
0)
|
0)
|
||||||
primary='TRUE'
|
primary='TRUE'
|
||||||
;;
|
;;
|
||||||
|
s)
|
||||||
|
stateFile=$OPTARG
|
||||||
|
;;
|
||||||
?)
|
?)
|
||||||
echo "Unknown argument: $1"
|
echo "Unknown argument: $1"
|
||||||
print_help
|
print_help
|
||||||
@@ -114,57 +131,145 @@ param_mysqlhost=$(create_param -h "$mysqlhost")
|
|||||||
param_port=$(create_param -P "$port")
|
param_port=$(create_param -P "$port")
|
||||||
param_mysqluser=$(create_param -u "$mysqluser")
|
param_mysqluser=$(create_param -u "$mysqluser")
|
||||||
param_password=$(create_param -p "$password")
|
param_password=$(create_param -p "$password")
|
||||||
|
param_configfile=$(create_param --defaults-extra-file= "$myconfig")
|
||||||
|
|
||||||
r1=$(mysql $param_mysqlhost $param_port $param_mysqluser $param_password -B -N -e "show status like 'wsrep_cluster_size'"|cut -f 2) # 3 (GALERA_CLUSTER_SIZE)
|
param_mysql="$param_mysqlhost $param_port $param_mysqluser $param_password $param_configfile"
|
||||||
r2=$(mysql $param_mysqlhost $param_port $param_mysqluser $param_password -B -N -e "show status like 'wsrep_cluster_status'"|cut -f 2) # Primary
|
|
||||||
r3=$(mysql $param_mysqlhost $param_port $param_mysqluser $param_password -B -N -e "show status like 'wsrep_flow_control_paused'"|cut -f 2) # < 0.1
|
|
||||||
r4=$(mysql $param_mysqlhost $param_port $param_mysqluser $param_password -B -N -e "show status like 'wsrep_ready'"|cut -f 2) # ON
|
|
||||||
r5=$(mysql $param_mysqlhost $param_port $param_mysqluser $param_password -B -N -e "show status like 'wsrep_connected'"|cut -f 2) # ON
|
|
||||||
r6=$(mysql $param_mysqlhost $param_port $param_mysqluser $param_password -B -N -e "show status like 'wsrep_local_state_comment'"|cut -f 2) # Synced
|
|
||||||
|
|
||||||
if [ -z "$r3" ]; then
|
#
|
||||||
echo "UNKNOWN: wsrep_flow_control_paused is empty"
|
# verify the database connection
|
||||||
ST_FINAL=$ST_UK
|
#
|
||||||
|
|
||||||
|
mysql $param_mysql -B -N -e '\s;' >/dev/null 2>&1 || {
|
||||||
|
echo "CRITICAL: mysql connection check failed"
|
||||||
|
exit $ST_CR
|
||||||
|
}
|
||||||
|
|
||||||
|
#
|
||||||
|
# verify that the node is part of a cluster
|
||||||
|
#
|
||||||
|
|
||||||
|
rClusterStateUuid=$(mysql $param_mysql -B -N -e "show status like 'wsrep_cluster_state_uuid'; "|cut -f 2)
|
||||||
|
|
||||||
|
if [ -z "$rClusterStateUuid" ]; then
|
||||||
|
echo "CRITICAL: node is not part of a cluster"
|
||||||
|
exit $ST_CR
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if [ $(echo "$r3 > $fcp" | bc) = 1 ]; then
|
rClusterSize=$(mysql $param_mysql -B -N -e "show status like 'wsrep_cluster_size'"|cut -f 2)
|
||||||
|
rClusterStatus=$(mysql $param_mysql -B -N -e "show status like 'wsrep_cluster_status'"|cut -f 2) # Primary
|
||||||
|
rFlowControl=$(mysql $param_mysql -B -N -e "show status like 'wsrep_flow_control_paused'"|cut -f 2) # < 0.1
|
||||||
|
rReady=$(mysql $param_mysql -B -N -e "show status like 'wsrep_ready'"|cut -f 2) # ON
|
||||||
|
rConnected=$(mysql $param_mysql -B -N -e "show status like 'wsrep_connected'"|cut -f 2) # ON
|
||||||
|
rLocalStateComment=$(mysql $param_mysql -B -N -e "show status like 'wsrep_local_state_comment'"|cut -f 2) # Synced
|
||||||
|
rIncommingAddresses=$(mysql $param_mysql -B -N -e "show global status like 'wsrep_incoming_addresses';"|cut -f 2)
|
||||||
|
|
||||||
|
if [ -z "$rFlowControl" ]; then
|
||||||
|
echo "UNKNOWN: wsrep_flow_control_paused is empty"
|
||||||
|
unknAlerts=$(($unknAlerts+1))
|
||||||
|
fi
|
||||||
|
|
||||||
|
if [ $(echo "$rFlowControl > $fcp" | bc) = 1 ]; then
|
||||||
echo "CRITICAL: wsrep_flow_control_paused is > $fcp"
|
echo "CRITICAL: wsrep_flow_control_paused is > $fcp"
|
||||||
ST_FINAL=$ST_CR
|
critAlerts=$(($criticalAlerts+1))
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if [ "$primary" = 'TRUE' ]; then
|
if [ "$primary" = 'TRUE' ]; then
|
||||||
if [ "$r2" != 'Primary' ]; then
|
if [ "$rClusterStatus" != 'Primary' ]; then
|
||||||
echo "CRITICAL: node is not primary (wsrep_cluster_status)"
|
echo "CRITICAL: node is not primary (wsrep_cluster_status)"
|
||||||
ST_FINAL=$ST_CR
|
critAlerts=$(($criticalAlerts+1))
|
||||||
fi
|
fi
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if [ "$r4" != 'ON' ]; then
|
if [ "$rReady" != 'ON' ]; then
|
||||||
echo "CRITICAL: node is not ready (wsrep_ready)"
|
echo "CRITICAL: node is not ready (wsrep_ready)"
|
||||||
ST_FINAL=$ST_CR
|
critAlerts=$(($criticalAlerts+1))
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if [ "$r5" != 'ON' ]; then
|
if [ "$rConnected" != 'ON' ]; then
|
||||||
echo "CRITICAL: node is not connected (wsrep_connected)"
|
echo "CRITICAL: node is not connected (wsrep_connected)"
|
||||||
ST_FINAL=$ST_CR
|
critAlerts=$(($criticalAlerts+1))
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if [ "$r6" != 'Synced' ]; then
|
if [ "$rLocalStateComment" != 'Synced' ]; then
|
||||||
echo "CRITICAL: node is not synced - actual state is: $r6 (wsrep_local_state_comment)"
|
echo "CRITICAL: node is not synced - actual state is: $rLocalStateComment (wsrep_local_state_comment)"
|
||||||
ST_FINAL=$ST_CR
|
critAlerts=$(($criticalAlerts+1))
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if [ $r1 -gt $warn ]; then
|
if [ $rClusterSize -gt $warn ]; then
|
||||||
echo "OK: number of NODES = $r1 (wsrep_cluster_size)"
|
# only display the ok message if the state check not enabled
|
||||||
ST_FINAL=${ST_FINAL-$ST_OK}
|
if [ -z "$stateFile" ]; then
|
||||||
elif [ $r1 -le $crit ]; then
|
echo "OK: number of NODES = $rClusterSize (wsrep_cluster_size)"
|
||||||
echo "CRITICAL: number of NODES = $r1 (wsrep_cluster_size)"
|
fi
|
||||||
ST_FINAL=$ST_CR
|
elif [ $rClusterSize -le $crit ]; then
|
||||||
elif [ $r1 -le $warn ]; then
|
echo "CRITICAL: number of NODES = $rClusterSize (wsrep_cluster_size)"
|
||||||
echo "WARNING: number of NODES = $r1 (wsrep_cluster_size)"
|
critAlerts=$(($criticalAlerts+1))
|
||||||
ST_FINAL=${ST_FINAL-$ST_WR}
|
elif [ $rClusterSize -le $warn ]; then
|
||||||
else
|
echo "WARNING: number of NODES = $rClusterSize (wsrep_cluster_size)"
|
||||||
exit $ST_UK
|
warnAlerts=$(($warnAlerts+1))
|
||||||
|
else
|
||||||
|
exit $ST_UK
|
||||||
fi
|
fi
|
||||||
|
|
||||||
exit $ST_FINAL
|
#
|
||||||
|
# detect is the connection is lost automatically
|
||||||
|
#
|
||||||
|
|
||||||
|
if [ ! -z "$stateFile" ]; then
|
||||||
|
|
||||||
|
touch $stateFile
|
||||||
|
|
||||||
|
if [ $? != "0" ]; then
|
||||||
|
|
||||||
|
echo "UNKNOWN: stateFile \"$stateFile\" is not writeable"
|
||||||
|
unknAlerts=$(($unknAlerts+1))
|
||||||
|
|
||||||
|
else
|
||||||
|
|
||||||
|
if [ "$rConnected" = "ON" ]; then
|
||||||
|
# get the current connected Nodes
|
||||||
|
currentNodes=$(echo $rIncommingAddresses | tr "," "\n" | sort -u)
|
||||||
|
if [ -f "$stateFile" ]; then
|
||||||
|
# get the nodes added to the cluster
|
||||||
|
newNodes=$(echo $currentNodes | tr " " "\n" | comm -2 -3 - $stateFile)
|
||||||
|
# get the nodes that were removed from the cluster
|
||||||
|
missingNodes=$(echo $currentNodes | tr " " "\n" | comm -1 -3 - $stateFile)
|
||||||
|
if [ ! -z "$newNodes" ]; then
|
||||||
|
# add the new nodes to the cluster to the state file
|
||||||
|
echo $newNodes | tr " " "\n" >> $stateFile
|
||||||
|
fi
|
||||||
|
else
|
||||||
|
# there is no state file yet, creating new one.
|
||||||
|
echo $currentNodes | tr " " "\n" > $stateFile
|
||||||
|
fi # -f stateFile
|
||||||
|
# get the numeber of nodes that were part of the cluster before
|
||||||
|
maxClusterSize=$(cat $stateFile | wc -l)
|
||||||
|
|
||||||
|
if [ $maxClusterSize -eq $rClusterSize ]; then
|
||||||
|
if [ $maxClusterSize -eq 1 ]; then
|
||||||
|
if [ $crit -eq 0 -a $warn -eq 0 ]; then
|
||||||
|
echo "OK: running single-node database cluster"
|
||||||
|
fi
|
||||||
|
else
|
||||||
|
echo "OK: running redundant $rClusterSize online / $maxClusterSize total"
|
||||||
|
fi
|
||||||
|
else
|
||||||
|
echo "WARNING: redundant $rClusterSize online / $maxClusterSize total, missing peers: $missingNodes"
|
||||||
|
warnAlerts=$(($warnAlerts+1))
|
||||||
|
fi
|
||||||
|
|
||||||
|
fi # rConnected
|
||||||
|
|
||||||
|
fi # -w stateFile
|
||||||
|
|
||||||
|
fi # -z stateFile
|
||||||
|
|
||||||
|
|
||||||
|
#
|
||||||
|
# exit
|
||||||
|
#
|
||||||
|
|
||||||
|
[ "$critAlerts" -gt "0" ] && exit $ST_CR
|
||||||
|
[ "$unknAlerts" -gt "0" ] && exit $ST_UK
|
||||||
|
[ "$warnAlerts" -gt "0" ] && exit $ST_WR
|
||||||
|
|
||||||
|
exit 0
|
||||||
|
|||||||
Reference in New Issue
Block a user