在生产环境部署了Keeplived+Nginx高可用,业务说VIP不稳定,经常出现访问异常的情况,于是查看了Keeplived的配置文件以及VIP。发现VIP出现在01和02节点上,并且Nginx log两台节点都有,虽然icmp协议是正常,但是流量发生频繁切换的问题。于是开始排查配置文件以及VIP的问题
首先查看配置文件
主节点配置文件
[root@abcdocker01 keepalived]# cat keepalived.conf
! Configuration File for keepalived
global_defs {
router_id 10.111.167.10 #节点ip,master每个节点配置自己的IP
script_user root
enable_script_security
}
vrrp_script chk_nginx {
script "/etc/keepalived/check_port.sh 80"
interval 2
weight -20
}
vrrp_instance VI_1 {
state BACKUP
interface eth0 #网卡
virtual_router_id 251
priority 100
nopreempt
advert_int 1
mcast_src_ip 10.111.167.10 #节点IP
authentication {
auth_type PASS
auth_pass 11111111
}
#unicast_src_ip 10.52.80.10
#unicast_peer {
# 10.52.80.11
#}
track_script {
chk_nginx
}
virtual_ipaddress {
10.111.167.111 #VIP
}
}
备节点配置文件
[root@abcdocker02 keepalived]# cat keepalived.conf
! Configuration File for keepalived
global_defs {
router_id 10.111.167.11 #节点ip,master每个节点配置自己的IP
script_user root
enable_script_security
}
vrrp_script chk_nginx {
script "/etc/keepalived/check_port.sh 80"
interval 2
weight -20
}
vrrp_instance VI_1 {
state BACKUP
interface eth0
virtual_router_id 251
priority 90
nopreempt
advert_int 1
mcast_src_ip 10.111.167.11 #节点IP
authentication {
auth_type PASS
auth_pass 11111111
}
#unicast_src_ip 10.52.80.11
#unicast_peer {
# 10.52.80.10
#}
track_script {
chk_nginx
}
virtual_ipaddress {
10.111.167.111 #VIP
}
}
健康检查脚本如下
[root@abcdocker keepalived]# cat check_port.sh
CHK_PORT=$1
if [ -n "$CHK_PORT" ];then
PORT_PROCESS=`netstat -lntup|grep $CHK_PORT|wc -l`
if [ $PORT_PROCESS -eq 0 ];then
echo "Port $CHK_PORT Is Not Used,End."
systemctl stop keepalived
exit 1
fi
else
echo "Check Port Cant Be Empty!"
fi
异常现象
发现抓包抓取不到任何数据,并且Keeplived互相vrrp心跳失败,其中一台启动马上就会脑裂
解决办法
因为是idc环境,和网络组沟通后,是网络组关闭了arp组播协议。
修改配置文件,将配置文件修改为非抢占式并且配置问单播
#keepalived 01
! Configuration File for keepalived
global_defs {
router_id 10.52.80.10 #节点ip,master每个节点配置自己的IP
script_user root
enable_script_security
}
vrrp_script chk_nginx {
script "/etc/keepalived/check_port.sh 443" #脚本参数,可以根据需求更换端口
interval 2
weight -20
}
vrrp_instance VI_1 {
state BACKUP #设置非抢占式需要将state设置为BACKUP
interface ens192 #Keeplived 网卡
virtual_router_id 251
priority 100 #权重100,默认为主节点
nopreempt
advert_int 1
mcast_src_ip 10.52.80.10 #节点IP
authentication {
auth_type PASS
auth_pass 11111111
}
unicast_src_ip 10.52.80.10 #开启单播,src_ip为Keeplived自身ip
unicast_peer {
10.52.80.11 #另外一台Keeplived ip
}
track_script {
chk_nginx
}
virtual_ipaddress {
10.52.80.15 #VIP地址
}
}
#keepalived 02
[root@utodo-prd-proxy ~]# cat /etc/keepalived/keepalived.conf
! Configuration File for keepalived
global_defs {
router_id 10.52.80.11 #节点ip,master每个节点配置自己的IP
script_user root
enable_script_security
}
vrrp_script chk_nginx {
script "/etc/keepalived/check_port.sh 443"
interval 2
weight -20
}
vrrp_instance VI_1 {
state BACKUP #设置非抢占式需要将state设置为BACKUP
interface ens192 #网卡文件
virtual_router_id 251
priority 90 #权重
nopreempt #设置非抢占式
advert_int 1
mcast_src_ip 10.52.80.11 #节点IP
authentication {
auth_type PASS
auth_pass 11111111
}
unicast_src_ip 10.52.80.11 #开启单播,src_ip为Keeplived自身ip
unicast_peer {
10.52.80.10 #另外一台Keeplived ip
}
track_script {
chk_nginx
}
virtual_ipaddress {
10.52.80.15 #VIP
}
}
2台Keeplived配置文件修改完毕后,重启服务。 通过抓包检查
Keeplived 01节点抓包数据
同样在02节点抓包也正常
在查看VIP,只会在01节点上,当02节点挂了后,ip漂移到02节点上。在启动01节点也不会抢占。02节点VIP也会自动下掉,VIP始终只会有一台节点存在
大佬,我的keepalived 启动后过几天两台服务器上都没有vip了,刚安装好时候vip都是可以正常切换的,日志没有任何报错。帮忙分析一下呗
上传一下日志和配置文件
[…] […]