Check réplication mysql

Ce script vérifie les positions master et slave de deux noeuds mysql en réplication maitre-maitre.

# Ce script loggue les positions sur le master local, le master distant et le slave
# Avec en colonnes
# 1 timestamp 
# 2 remote_master_master_log_file
# 3 remote_master_master_log_pos
# 4 local_slave_master_log_file
# 5 local_slave_master_log_pos
# 6 local_master_master_log_file
# 7 local_master_master_log_pos
# 8 remote_slave_master_log_file
# 9 remote_slave_master_log_pos 
# 10 date en plus clair
# 11 local_delay
# 12 remote_delay
 
# le user mysql qui réplique doit avoir les droits REPLICATION CLIENT, REPLICATION SLAVE  et PROCESS
 
LOG=/var/log/mysql/replication.log
ERRLOG=/var/log/mysql/replication.err
LOCK=/var/lib/mysql/replication_pb.lock
TMP=/tmp/$(basename $0).tmp
DEBUG=0
 
# On regarde si on est sql1 ou sql2, à adapter suivant vos hostnames
# ici on regarde le 3e caractère du hostname
here=${HOSTNAME:2:1}
[ $here -gt 1 ] && there=1 || there=2
local_host=<prefix>${here}.domaine.tld
remote_host=<prefix>${there}.domaine.tld
 
TS=$(date '+%s') # timestamp
DATE=$(date -d "1970-01-01 UTC +$TS seconds" "+%Y-%m-%d_%H:%M:%S")
 
user=$(sed -ne '5p'</var/lib/mysql/master.info)
pass=$(sed -ne '6p'</var/lib/mysql/master.info)
 
init_vars() {
  # marche pas si le $user a seulement les droits de réplication slave, il faut client
  # (sinon passer par ssh mais celui qui lance ce script doit avoir une clé pour lire /etc/mysql/debian.cnf sur le remote host)
 
  # on rapproche le plus possible les mesures master/slave
  remote_master_status=$(/usr/bin/mysql -u$user -p$pass -h$remote_host -e 'show master status')
  local_slave_status=$(/usr/bin/mysql --defaults-file=/etc/mysql/debian.cnf -e 'show slave status\G')
  local_master_status=$(/usr/bin/mysql --defaults-file=/etc/mysql/debian.cnf -e 'show master status')
  remote_slave_status=$(/usr/bin/mysql -u$user -p$pass -h$remote_host -e 'show slave status\G')
 
  # et on extrait les valeurs qui nous intéressent
  remote_master_master_log_file=$(echo -e "$remote_master_status"|awk 'END {print $1}')
  remote_master_master_log_file_index=$(expr "$remote_master_master_log_file" : '[^\.]\+\.0*\([1-9]\+\)')
  remote_master_master_log_pos=$(echo -e "$remote_master_status"|awk 'END {print $2}')
  local_slave_master_log_file=$(echo -e "$local_slave_status"|awk '/ Master_Log_File/ {print $2}')
  local_slave_master_log_file_index=$(expr "local_slave_master_log_file" : '[^\.]\+\.0*\([1-9]\+\)')
  local_slave_master_log_pos=$(echo -e "$local_slave_status"|awk '/Read_Master_Log_Pos/ {print $2}')
  local_slave_delay=$(echo -e "$local_slave_status"|awk '/Seconds_Behind_Master/ {print $2}')
  local_slave_error=$(echo -e "$local_slave_status"|awk '/Last_Error/ {print $2}')
 
  local_master_master_log_file=$(echo -e "$local_master_status"|awk 'END {print $1}')
  local_master_master_log_file_index=$(expr "$local_master_master_log_file" : '[^\.]\+\.0*\([1-9]\+\)')
  local_master_master_log_pos=$(echo -e "$local_master_status"|awk 'END {print $2}')
  remote_slave_master_log_file=$(echo -e "$remote_slave_status"|awk '/ Master_Log_File/ {print $2}')    
  remote_slave_master_log_file_index=$(expr "$remote_slave_master_log_file" : '[^\.]\+\.0*\([1-9]\+\)')
  remote_slave_master_log_pos=$(echo -e "$remote_slave_status"|awk '/Read_Master_Log_Pos/ {print $2}') 
  remote_slave_delay=$(echo -e "$remote_slave_status"|awk '/Seconds_Behind_Master/ {print $2}')
  remote_slave_error=$(echo -e "$remote_slave_status"|awk '/Last_Error/ {print $2}')
}
 
if [ $DEBUG -gt 0 ]; then
  init_vars
  echo "remote_master et local_slave"
  echo -e "remote_master_master_log_file\t$remote_master_master_log_file"
  echo -e "remote_master_master_log_pos\t$remote_master_master_log_pos"
  echo -e "local_slave_master_log_file\t$local_slave_master_log_file"
  echo -e "local_slave_master_log_pos\t$local_slave_master_log_pos"
  echo "local_slave_delay $local_slave_delay"
  echo "local_slave_error $local_slave_error"
  echo -e "\nlocal_master et remote_slave"
  echo -e "local_master_master_log_file\t$local_master_master_log_file"
  echo -e "local_master_master_log_pos\t$local_master_master_log_pos"
  echo -e "remote_slave_master_log_file\t$remote_slave_master_log_file"
  echo -e "remote_slave_master_log_pos\t$remote_slave_master_log_pos"
  echo "remote_slave_delay $remote_slave_delay"
  echo "remote_slave_error $remote_slave_error"
  echo "Fin debug, on sort"
  exit
fi
 
init_vars
echo "$TS $remote_master_master_log_file $remote_master_master_log_pos $local_slave_master_log_file $local_slave_master_log_pos $local_master_master_log_file $local_master_master_log_pos $remote_slave_master_log_file $remote_slave_master_log_pos $DATE $local_delay $remote_delay" >> $LOG
>$TMP
 
# Check souci qcq
if [ -z "$remote_master_master_log_file" -o -z "$remote_master_master_log_pos" ] || \
   [ -z "$local_slave_master_log_file" -o -z "$local_slave_master_log_pos" ] || \
   [ -z "$local_master_master_log_file" -o -z "$local_master_master_log_pos" ] || \
   [ -z "$remote_slave_master_log_file" -o -z "$remote_slave_master_log_pos" ] || \
   [ -z "$local_slave_delay" -o -z "$remote_slave_delay" ] || \
   [ "$remote_master_master_log_file" != "$local_slave_master_log_file" ] || \
   [ "$local_master_master_log_file" != "$remote_slave_master_log_file" ] || \
   [ "$remote_master_master_log_pos" -gt "$local_slave_master_log_pos" ] || [ "$local_slave_delay" -gt 1 ] || \
   [ "$local_master_master_log_pos" -gt "$remote_slave_master_log_pos" ] || [ "$remote_slave_delay" -gt 1 ]
then
  # 2e chance
  sleep 5
  init_vars
fi
 
# connexion distante
if [ -z "$remote_master_master_log_file" -o -z "$remote_master_master_log_pos" ]; then
  echo "Problème pour se connecter au master mysql distant de $remote_host" >> $TMP
# connexion locale
elif [ -z "$local_slave_master_log_file" -o -z "$local_slave_master_log_pos" ]; then
  echo "Problème pour se connecter au slave mysql local de  $local_host" >> $TMP
# fichier slave local trop vieux
elif [ "$remote_master_master_log_file" != "$local_slave_master_log_file" ]; then
  echo "Le fichier de log est différent sur le master $remote_host ($remote_master_master_log_file) que sur le slave $local_host ($local_slave_master_log_file)" >> $TMP
  echo -e "Sur le slave de $local_host on a l'erreur :\n$local_slave_error" >> $TMP
  echo "On restart le slave de $local_host" >> $TMP
  /usr/bin/mysql --defaults-file=/etc/mysql/debian.cnf -e 'stop slave; start slave;' >> $TMP 2>&1
# position slave local en retard
elif [ "$remote_master_master_log_pos" -gt "$local_slave_master_log_pos" ] || [ "$local_slave_delay" -gt 1 ]; then
  echo "Le slave de $local_host est en retard de ${local_slave_delay}s (slave $local_slave_master_log_pos vs $remote_master_master_log_pos pour le master distant) :
    master $remote_host : $remote_master_master_log_file $remote_master_master_log_pos
    slave  $local_host : $local_slave_master_log_file $local_slave_master_log_pos" >> $TMP
  echo -e "\nSur le slave de $local_host on a l'erreur :\n$local_error" >> $TMP
  echo "\nOn restart le slave de $local_host" >> $TMP 
  /usr/bin/mysql --defaults-file=/etc/mysql/debian.cnf -e 'stop slave; start slave;' >> $TMP 2>&1
fi
 
# on recommence avec le slave distant
if [ -z "$local_master_master_log_file" -o -z "$local_master_master_log_pos" ]; then
  echo "Problème pour se connecter au master mysql local de $local_host" >> $TMP
elif [ -z "$remote_slave_master_log_file" -o -z "$remote_slave_master_log_pos" ]; then
  echo "Problème pour se connecter au slave mysql distant de  $remote_host" >> $TMP
elif [ "$local_master_master_log_file" != "$remote_slave_master_log_file" ]; then
  echo "Le fichier de log est différent sur le master $local_host ($local_master_master_log_file) que sur le slave de $remote_host ($remote_slave_master_log_file)" >> $TMP
  echo -e "Sur le slave de $remote_host on a l'erreur :\n$remote_slave_error" >> $TMP
  echo "On restart le slave de $remote_host" >> $TMP
  /usr/bin/mysql -u$user -p$pass -h$remote_host -e 'stop slave; start slave;' >> $TMP
elif [ "$local_master_master_log_pos" -gt "$remote_slave_master_log_pos" ] || [ "$remote_slave_delay" -gt 1 ]; then
  echo "Le slave de $remote_host est en retard de ${$remote_slave_delay}s (slave $remote_slave_master_log_pos vs $local_master_master_log_pos pour le master local) :
    master $local_host : $local_master_master_log_file $local_master_master_log_pos
    slave  $remote_host : $remote_slave_master_log_file $remote_slave_master_log_pos" >> $TMP
  echo -e "Sur le slave de $remote_host on a l'erreur :\n$remote_slave_error" >> $TMP
  echo "On restart le slave de $remote_host" >> $TMP
  /usr/bin/mysql -u$user -p$pass -h$remote_host -e 'stop slave; start slave;' >> $TMP
fi
 
# et mail en cas de pb
if [ $(wc -l <$TMP) -gt 0 ]; then
  # on ajoute le check des Slave_running ($user doit avoir les droits PROCESS)
  echo -e "\nSur $local_host on a $(/usr/bin/mysqladmin -u$user -p$pass extended-status|grep Slave_running)" >> $TMP
  echo "Sur $remote_host on a $(/usr/bin/mysqladmin -u$user -p$pass -h$remote_host extended-status|grep Slave_running)" >> $TMP
  echo -e "\nFin du script de check ($DATE)" >> $TMP
  # log
  cat $TMP >> $ERRLOG
  # on envoie un mail
  mail -a "Content-Type: text/plain; charset=UTF-8" -s "$DATE Replication mysql cassee ($0 sur $HOSTNAME)" $DEST < $TMP
  # et on pose un lock
  cat $TMP >> $LOCK
else
  [ -f $LOCK ] && \
    echo -e "\n$DATE : 1re constatation de la résolution du problème, synchro mysql OK" >> $LOCK && \
    mail -a "Content-Type: text/plain; charset=UTF-8" -s "$DATE Replication mysql OK ($0 sur $HOSTNAME)" $DEST < $LOCK && \
    rm -f $LOCK
fi