#!/bin/bash
#zk服务器问题修复脚本
#zkServer服务所在路径
zkDir="/usr/local/zookeeper/bin/"
zkLog='zookeeper.out'
#进入zkServer.sh所在的目录
cd $zkDir
flag=1
#当$flag=0时才停止循环设置无线循环
until [ $flag -eq 0 ]
do
#查看zookeeper.out日志是否有错误信息
while tail -f $zkLog | grep "ERROR"
do
#查看QuorumPeerMain服务是否启动
for pid in `jps | grep "QuorumPeerMain"`
do
#如果启动pid不为空且pid不为零
if [ -n "$pid" ] && [ "$pid" != "QuorumPeerMain" ]
then
#关闭该进程
kill -9 $pid
#记录重启时间
date >>$zkDir/reStartDate.txt
#关闭zkServer
./zkServer.sh stop
#运行zkServer.sh命令启动zk服务
if ./zkServer.sh start | grep "STARTED"
then
#查看服务状态
if ./zkServer.sh status | grep "Mode"
then
if jps | grep "QuorumPeerMain"
then
echo "restart Succeed!" >>$zkDir/reStartDate.txt
fi
fi
fi
fi
done
done
done
改进版:
#!/usr/bin/env bash
#hadoop的sbin路径
Hadoop=/usr/local/hadoop/sbin
#hbase的bin路径
Hbase=/usr/local/hbase/bin
#zookeeper的bin路径
Zk=/usr/local/zookeeper/bin
cd $Hadoop
#关闭history与proxy服务
StopHistory=`./mr-jobhistory-daemon.sh stop historyserver`
StopProxy=`./yarn-daemon.sh stop proxyserver`
#启动history与proxy服务
StartHistory=`./mr-jobhistory-daemon.sh start historyserver`
StartProxy=`./yarn-daemon.sh start proxyserver`
if [ -n "$StartHistory" ] && [ -n "$StartProxy" ]
then
echo "historyserver and proxyserver restart Succeed!"
fi
cd $Hbase
#关闭hbase服务
StopHbase=`./stop-hbase.sh`
#启动hbase服务
StartHbase=`./start-hbase.sh`
if [ -n "$StartHbase" ]
then
echo "hbase restart Succeed!"
fi
#关闭服务并重启
cd $Zk
#关闭zk服务
stopZK=`./zkServer.sh stop`
startZK=`./zkServer.sh start`
statusZK=`./zkServer.sh status`
echo $stopZK > /dev/null
if echo $startZK | grep "STARTED"
then
if echo $status | grep "Mode"
then
if jps | grep "QuorumPeerMain"
then
echo "zk server start Succeed!"
fi
else
for pid in `jps | grep "QuorumPeerMain"`
do
#如果启动pid不为空且pid不为零
if [ -n "$pid" ] && [ "$pid" != "QuorumPeerMain" ]
then
#关闭该进程
kill -9 $pid
#关闭zkServer
stopZK=`./zkServer.sh stop`
echo $stopZK >/dev/null
#运行zkServer.sh命令启动zk服务
startZk=`./zkServer.sh start`
if echo $startZk | grep "STARTED"
then
#查看服务状态
statusZK=`./zkServer.sh status`
if echo $
statusZK | grep "Mode"
then
QuoPM=`jps | grep "QuorumPeerMain"`
if [ -n "$QuoPM" ]
then
echo "zookeeper restart Succeed!"
fi
fi
fi
fi
done
fi
fi
然后执行:corntab -e
在文件中添加:30 2 * * * /usr/local/Reboot >>/usr/local/reboot/log
上面意思是:每天的2:30跑脚本,重新启动zk、hadoop、hbase服务