├── README.md ├── active_standby.sh ├── install.txt ├── keepalived.conf └── pg_moniter.sh /README.md: -------------------------------------------------------------------------------- 1 | Keepalived + PostgreSQL 流复制方式实现高可用 HA 2 | ==================== 3 | 4 | 一 项目简介 5 | --- 6 | 7 | HA 是数据库领域一个永恒的议题,同时也是最复杂的方案之一,PostgreSQL 本身并不提供任何高可用方案, 本文通过 Keepalived + PostgreSQL 流复制方式实现高可用 HA,故障切换逻辑和部分脚本参考德哥 sky_postgresql_cluster (https://github.com/digoal/sky_postgresql_cluster) 的 HA 项目,本文仅分享一种 PostgreSQL的高可用方案,脚本的切换逻辑可根据实际情况调整,如果您对此方案有更好的建议或补充,欢迎探讨。 8 | 9 | Email: francs3@163.com 10 | 11 | 二 需求 12 | --- 13 | 14 | - PostgreSQL 9.4 版本:其它版本脚本需做相应调整 15 | - 硬件:2 台带远程管理卡的物理主机:在数据库发生切换时需要用到远程管理卡 16 | - 两台物理主机安装 Keepalived 程序 17 | -------------------------------------------------------------------------------- /active_standby.sh: -------------------------------------------------------------------------------- 1 | #/bin/bash 2 | 3 | # 环境变量 4 | export PGPORT=1921 5 | export PGUSER=sky_pg_cluster 6 | export PG_OS_USER=pg94 7 | export PGDBNAME=sky_pg_cluster 8 | export PGDATA=/opt/database/pg94/pg_root 9 | export LANG=en_US.utf8 10 | export PGHOME=/opt/pgsql 11 | export LD_LIBRARY_PATH=$PGHOME/lib:/lib64:/usr/lib64:/usr/local/lib64:/lib:/usr/lib:/usr/local/lib 12 | export PATH=/opt/pgbouncer/bin:$PGHOME/bin:$PGPOOL_HOME/bin:$PATH:. 13 | 14 | # 配置信息, LAG_MINUTES 配置允许的延迟时间 15 | LAG_MINUTES=3 16 | HOST_IP=`hostname -i` 17 | NOTICE_EMAIL="francs3@163.com" 18 | FAILOVE_LOG='/tmp/failover.log' 19 | 20 | SQL1="select 'this_is_standby' as cluster_role from ( select pg_is_in_recovery() as std ) t where t.std is true;" 21 | SQL2="select 'standby_in_allowed_lag' as cluster_lag from cluster_status where now()-last_alive < interval '$LAG_MINUTES min';" 22 | 23 | # 配置 fence 设备地址和用户密码 24 | FENCE_IP=192.168.1.21 25 | FENCE_USER=xxxx 26 | FENCE_PWD=xxxx 27 | 28 | # VIP 已发生漂移,记录到日志文件 29 | echo -e "`date +%F\ %T`: keepalived VIP switchover!" >> $FAILOVE_LOG 30 | 31 | # VIP 已漂移,邮件通知 32 | #echo -e "`date +%F\ %T`: ${HOST_IP}/${PGPORT} VIP 发生漂移,需排查问题!\n\nAuthor: francs(DBA)" | mutt -s "Error: 数据库 VIP 发生漂移 " ${NOTICE_EMAIL} 33 | 34 | 35 | # pg_failover 函数,用于主库故障时激活从库 36 | pg_failover() 37 | { 38 | # FENCE_STATUS 表示 fence 后成功标志,1 表示失败,0 表示成功 39 | # PROMOTE_STATUS 表示激活备库成功标志,1 表示失败,0 表示成功 40 | FENCE_STATUS=1 41 | PROMOTE_STATUS=1 42 | 43 | # 激活备库前需 Fence 关闭主库 44 | for ((k=0;k<10;k++)) 45 | do 46 | # fence命令, 设备不同的话, fence命令可能不一样. 47 | ipmitool -I lanplus -L OPERATOR -H $FENCE_IP -U $FENCE_USER -P $FENCE_PWD power reset 48 | if [ $? -eq 0 ]; then 49 | echo -e "`date +%F\ %T`: fence primary db host success." 50 | FENCE_STATUS=0 51 | break 52 | fi 53 | sleep 1 54 | done 55 | 56 | if [ $FENCE_STATUS -ne 0 ]; then 57 | echo -e "`date +%F\ %T`: fence failed. Standby will not promote, please fix it manually." 58 | return $FENCE_STATUS 59 | fi 60 | 61 | # 激活备库 62 | su - $PG_OS_USER -c "pg_ctl promote" 63 | if [ $? -eq 0 ]; then 64 | echo -e "`date +%F\ %T`: `hostname` promote standby success. " 65 | PROMOTE_STATUS=0 66 | fi 67 | 68 | if [ $PROMOTE_STATUS -ne 0 ]; then 69 | echo -e "`date +%F\ %T`: promote standby failed." 70 | return $PROMOTE_STATUS 71 | fi 72 | 73 | echo -e "`date +%F\ %T`: pg_failover() function call success." 74 | return 0 75 | } 76 | 77 | 78 | # 故障切换过程 79 | # standby是否正常的标记(is in recovery), CNT=1 表示正常. 80 | CNT=`echo $SQL1 | psql -At -h 127.0.0.1 -p $PGPORT -U $PGUSER -d $PGDBNAME -f - | grep -c this_is_standby` 81 | echo -e "CNT: $CNT" 82 | # 判断 standby lag 是否在接受范围内的标记, LAG=1 表示正常. 83 | LAG=`echo $SQL2 | psql -At -h 127.0.0.1 -p $PGPORT -U $PGUSER -d $PGDBNAME | grep -c standby_in_allowed_lag` 84 | echo -e "LAG: $LAG" 85 | 86 | if [ $CNT -eq 1 ] && [ $LAG -eq 1 ]; then 87 | pg_failover >> $FAILOVE_LOG 88 | if [ $? -ne 0 ]; then 89 | echo -e "`date +%F\ %T`: pg_failover failed." >> $FAILOVE_LOG 90 | exit 1 91 | fi 92 | else 93 | echo -e "`date +%F\ %T`: `hostname` standby is not ok or laged far $LAG_MINUTES mintues from primary , failover not allowed! " >> $FAILOVE_LOG 94 | exit 1 95 | fi 96 | 97 | 98 | # 判断是否要进入failover过程 99 | # 1. standby 正常 (is in recovery) 100 | # 2. standby lag 在接受范围内 101 | 102 | # failover过程 103 | # 1. fence 关闭主服务器 104 | # 2. 激活standby数据库 105 | -------------------------------------------------------------------------------- /install.txt: -------------------------------------------------------------------------------- 1 | 一 环境信息 2 | 机器:2 台 HP ProLiant DL360 G6 3 | 系统版本: RHEL 5.5 4 | PostgreSQL 版本: 9.4.1 5 | 主节点 IP: 192.168.1.15 6 | 备节点 IP: 192.168.1.11 7 | Keepalived VIP: 192.168.1.26 8 | FENCE DEVICE: HP 9 | 10 | 二 系统配置 ( 主备节点 ) 11 | -- 创建用户 12 | groupadd pg94 13 | useradd pg94 -g pg94 14 | passwd pg94 15 | 16 | --创建目录 17 | mkdir -p /data01/pg_data/ 18 | chown -R pg94:pg94 /data01/pg_data/ 19 | su - pg94 20 | mkdir -p /data01/pg_data/pg_root 21 | chmod 0700 /data01/pg_data/pg_root 22 | 23 | --安装 Package 24 | yum -y install gcc readline readline-devel zlib zlib-devel python-devel ipmitool 25 | 26 | 27 | 三 PostgreSQL 软件安装 ( 主备节点 ) 28 | --下载 29 | wget -c https://ftp.postgresql.org/pub/source/v9.4.1/postgresql-9.4.1.tar.bz2 30 | 31 | --安装 PostgreSQL 软件 32 | tar jxvf postgresql-9.4.1.tar.bz2 33 | cd postgresql-9.4.1 34 | ./configure --prefix=/opt/pgsql_9.4.1 --with-wal-blocksize=16 --with-pgport=1921 35 | gmake world 36 | gmake install-world 37 | ln -s /opt/pgsql_9.4.1 /opt/pgsql 38 | 39 | --配置环境变量 40 | vim /home/pg94/.bash_profile 41 | 42 | export PGPORT=1921 43 | export PGUSER=postgres 44 | export PGDATA=/data01/pg_data/pg_root 45 | export LANG=en_US.utf8 46 | 47 | export PGHOME=/opt/pgsql 48 | export LD_LIBRARY_PATH=$PGHOME/lib:/lib64:/usr/lib64:/usr/local/lib64:/lib:/usr/lib:/usr/local/lib 49 | export DATE=`date +"%Y%m%d%H%M"` 50 | export PATH=$PGHOME/bin:$PATH:. 51 | export MANPATH=$PGHOME/share/man:$MANPATH 52 | alias rm='rm -i' 53 | alias ll='ls -lh' 54 | 55 | 56 | --初始化数据库 ( 主节点 ) 57 | initdb -E UTF8 --locale=C -D /data01/pg_data/pg_root -U postgres -W 58 | Ww0pIXSrFACFwIdMBTfo08F5d 59 | 60 | -- postgresql.conf 参数配置 ( 主节点 ) 61 | listen_addresses = '*' # what IP address(es) to listen on; 62 | port = 1921 # (change requires restart) 63 | max_connections = 1000 # (change requires restart) 64 | unix_socket_directories = '/tmp' # comma-separated list of directories 65 | unix_socket_permissions = 0777 # begin with 0 to use octal notation 66 | shared_buffers = 1024MB # min 128kB 67 | work_mem = 4MB # min 64kB 68 | maintenance_work_mem = 1024MB # min 1MB 69 | dynamic_shared_memory_type = posix # the default is the first option 70 | bgwriter_delay = 10ms # 10-10000ms between rounds 71 | wal_level = hot_standby # minimal, archive, hot_standby, or logical 72 | synchronous_commit = off # synchronization level; 73 | wal_buffers = -1 # min 32kB, -1 sets based on shared_buffers 74 | checkpoint_segments = 32 # in logfile segments, min 1, 16MB each 75 | checkpoint_timeout = 5min # range 30s-1h 76 | archive_mode = on # allows archiving to be done 77 | archive_command = '/bin/date' # command to use to archive a logfile segment 78 | max_wal_senders = 8 # max number of walsender processes 79 | wal_keep_segments = 128 # in logfile segments, 16MB each; 0 disables 80 | hot_standby = on # "on" allows queries during recovery 81 | default_statistics_target = 100 # range 1-10000 82 | constraint_exclusion = partition # on, off, or partition 83 | log_destination = 'csvlog' # Valid values are combinations of 84 | logging_collector = on # Enable capturing of stderr and csvlog 85 | log_directory = 'pg_log' # directory where log files are written, 86 | log_filename = 'postgresql-%Y-%m-%d_%H%M%S.log' # log file name pattern, 87 | log_file_mode = 0600 # creation mode for log files, 88 | log_rotation_age = 1d # Automatic rotation of logfiles will 89 | log_rotation_size = 10MB # Automatic rotation of logfiles will 90 | log_timezone = 'PRC' 91 | datestyle = 'iso, mdy' 92 | timezone = 'PRC' 93 | lc_messages = 'C' # locale for system error message 94 | lc_monetary = 'C' # locale for monetary formatting 95 | lc_numeric = 'C' # locale for number formatting 96 | lc_time = 'C' # locale for time formatting 97 | 98 | 99 | 四 流复制搭建 100 | --创建流复制用户( 主节点 ) 101 | CREATE USER repuser 102 | REPLICATION 103 | LOGIN 104 | CONNECTION LIMIT 5 105 | ENCRYPTED PASSWORD 'Hgo7haSIASGAJYSsFqbYfyx2q'; 106 | 107 | -- 配置.pgpass 密码文件 (主,备节点) 108 | vim /home/pg94/.pgpass 109 | 192.168.1.15:1921:replication:repuser:Hgo7haSIASGAJYSsFqbYfyx2q 110 | 192.168.1.11:1921:replication:repuser:Hgo7haSIASGAJYSsFqbYfyx2q 111 | 192.168.1.26:1921:replication:repuser:Hgo7haSIASGAJYSsFqbYfyx2q 112 | 113 | chmod 0600 /home/pg94/.pgpass 114 | 115 | --配置 pg_hba.conf ( 主节点) 116 | # replication 117 | host replication repuser 192.168.1.15/32 md5 118 | host replication repuser 192.168.1.11/32 md5 119 | host replication repuser 192.168.1.26/32 md5 120 | 121 | --主节点配置 recovery.done (主节点) 122 | cp /opt/pgsql_9.4.1/share/recovery.conf.sample recovery.done 123 | recovery_target_timeline = 'latest' 124 | standby_mode = on 125 | primary_conninfo = 'host=192.168.1.11 port=1921 user=repuser' 126 | 127 | --pg_basebackup ( 备节点 ) 128 | pg_basebackup -D /data01/pg_data/pg_root -Fp -Xs -v -P -h 192.168.1.11 -p 1921 -U repuser 129 | cp ~/recovery.conf ~/pg_hba.conf $PGDATA 130 | 131 | --备节点配置 recovery.conf (备节点) 132 | mv recovery.done recovery.conf 133 | recovery_target_timeline = 'latest' 134 | standby_mode = on 135 | primary_conninfo = 'host=192.168.1.15 port=1921 user=repuser' 136 | 137 | --启备库 138 | pg_ctl start 139 | 备注:启动备库并观察流复制是否搭建成功。 140 | 141 | 五 sky_pg_cluster 数据库配置 142 | --初始数据部署 143 | create role sky_pg_cluster superuser nocreatedb nocreaterole noinherit login encrypted password 'qXzpigJCRmamDf6vv0uFpf4Qg'; 144 | create database sky_pg_cluster with template template0 encoding 'UTF8' owner sky_pg_cluster; 145 | \c sky_pg_cluster sky_pg_cluster 146 | create schema sky_pg_cluster ; 147 | create table cluster_status (id int unique default 1, last_alive timestamp(0) without time zone); 148 | 149 | --限制cluster_status表有且只有一行 : 150 | CREATE FUNCTION cannt_delete () 151 | RETURNS trigger 152 | LANGUAGE plpgsql AS $$ 153 | BEGIN 154 | RAISE EXCEPTION 'You can not delete!'; 155 | END; $$; 156 | 157 | CREATE TRIGGER cannt_delete BEFORE DELETE ON cluster_status FOR EACH ROW EXECUTE PROCEDURE cannt_delete(); 158 | CREATE TRIGGER cannt_truncate BEFORE TRUNCATE ON cluster_status FOR STATEMENT EXECUTE PROCEDURE cannt_delete(); 159 | 160 | -- 插入初始数据 161 | insert into cluster_status values (1, now()); 162 | 163 | --pg_hba.conf 164 | # sky_pg_cluster 165 | host sky_pg_cluster sky_pg_cluster 127.0.0.1/32 md5 166 | host sky_pg_cluster sky_pg_cluster 192.168.1.15/32 md5 167 | host sky_pg_cluster sky_pg_cluster 192.168.1.11/32 md5 168 | host sky_pg_cluster sky_pg_cluster 192.168.2.138/32 md5 169 | 170 | 备注:结合 keepalived 的 interval 配置,每 10 秒更新 cluster_status 表。 171 | 172 | 六 安装 Keepalived 173 | --下载 174 | wget http://www.keepalived.org/software/keepalived-1.2.15.tar.gz 175 | 176 | -- 安装 OpenSSL , popt 包 177 | yum -y install openssl openssl-devel popt 178 | 179 | --解压安装 180 | cd keepalived-1.2.15 181 | tar xvf keepalived-1.2.15.tar.gz 182 | # ./configure --prefix=/usr/local/keepalived --sysconf=/etc 183 | # make 184 | # make install 185 | 186 | --报错 187 | keepalived configure: error: No SO_MARK declaration in headers 188 | 备注:解决方法 --disable-fwmark 189 | 190 | --遇到的错误 191 | [root@db1 ~]# /etc/init.d/keepalived start 192 | Starting keepalived: /bin/bash: keepalived: command not found 193 | [FAILED] 194 | 195 | --解决方法 196 | ln -s /usr/local/keepalived/sbin/keepalived /usr/sbin/ 197 | 198 | --启动 keepalived 199 | [root@db1 etc]# service keepalived restart 200 | Stopping keepalived: [ OK ] 201 | Starting keepalived: [ OK ] 202 | 203 | --设置 keepalived 开机关闭 204 | [root@db1 etc]# chkconfig keepalived off 205 | 206 | 207 | 七 配置 Keepalived 208 | --/etc/keepalived/keepalived.conf 209 | ! Configuration File for keepalived 210 | 211 | global_defs { 212 | notification_email { 213 | francs3@163.com 214 | } 215 | smtp_server 127.0.0.1 216 | smtp_connect_timeout 30 217 | router_id DB1_PG_HA 218 | } 219 | 220 | vrrp_script check_pg_alived { 221 | script "/usr/local/bin/pg_moniter.sh" 222 | interval 10 223 | fall 5 # require 5 failures for KO 224 | } 225 | 226 | vrrp_instance VI_1 { 227 | state BACKUP 228 | nopreempt 229 | interface em1 230 | virtual_router_id 10 231 | priority 100 232 | advert_int 1 233 | authentication { 234 | auth_type PASS 235 | auth_pass t9rveMP0Z9S1 236 | } 237 | track_script { 238 | check_pg_alived 239 | } 240 | virtual_ipaddress { 241 | 192.168.1.26 242 | } 243 | smtp_alert 244 | notify_master /usr/local/bin/active_standby.sh 245 | } 246 | 247 | 备注:备节点的 keepalived 的配置和主节点的基本一样,除了 priority 改成 90。 248 | -------------------------------------------------------------------------------- /keepalived.conf: -------------------------------------------------------------------------------- 1 | ! Configuration File for keepalived 2 | 3 | global_defs { 4 | notification_email { 5 | francs3@163.com 6 | } 7 | smtp_server 127.0.0.1 8 | smtp_connect_timeout 30 9 | router_id DB1_PG_HA 10 | } 11 | 12 | vrrp_script check_pg_alived { 13 | script "/usr/local/bin/pg_moniter.sh" 14 | interval 10 15 | fall 5 # require 5 failures for KO 16 | } 17 | 18 | vrrp_instance VI_1 { 19 | state BACKUP 20 | nopreempt 21 | interface em1 22 | virtual_router_id 10 23 | priority 100 24 | advert_int 1 25 | authentication { 26 | auth_type PASS 27 | auth_pass t9rveMP0Z9S1 28 | } 29 | track_script { 30 | check_pg_alived 31 | } 32 | virtual_ipaddress { 33 | 192.168.1.26 34 | } 35 | smtp_alert 36 | notify_master /usr/local/bin/active_standby.sh 37 | } 38 | 39 | 备注:备节点的 keepalived 的配置和主节点的基本一样,除了 priority 改成 90。 40 | -------------------------------------------------------------------------------- /pg_moniter.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Load Env 4 | export PGPORT=1921 5 | export PGUSER=sky_pg_cluster 6 | export PGDBNAME=sky_pg_cluster 7 | export PGDATA=/opt/database/pg94/pg_root 8 | export LANG=en_US.utf8 9 | export PGHOME=/opt/pgsql 10 | export LD_LIBRARY_PATH=$PGHOME/lib:/lib64:/usr/lib64:/usr/local/lib64:/lib:/usr/lib:/usr/local/lib 11 | export PATH=/opt/pgbouncer/bin:$PGHOME/bin:$PGPOOL_HOME/bin:$PATH:. 12 | 13 | MONITOR_LOG="/tmp/pg_monitor.log" 14 | SQL1="update cluster_status set last_alive = now();" 15 | SQL2='select 1;' 16 | 17 | # 如果是备库,则退出,此脚本不检查备库存活状态 18 | standby_flg=`psql -p $PGPORT -U postgres -At -c "select pg_is_in_recovery();"` 19 | if [ ${standby_flg} == 't' ]; then 20 | echo -e "`date +%F\ %T`: This is a standby database, exit!\n" >> $MONITOR_LOG 21 | exit 0 22 | fi 23 | 24 | # 主库上更新 cluster_state 表 25 | echo $SQL1 | psql -At -h 127.0.0.1 -p $PGPORT -U $PGUSER -d $PGDBNAME >> $MONITOR_LOG 26 | 27 | 28 | # 判断主库是否可用 29 | echo $SQL2 | psql -At -h 127.0.0.1 -p $PGPORT -U $PGUSER -d $PGDBNAME 30 | if [ $? -eq 0 ]; then 31 | echo -e "`date +%F\ %T`: Primary db is health." >> $MONITOR_LOG 32 | exit 0 33 | else 34 | echo -e "`date +%F\ %T`: Attention: Primary db is not health!" >> $MONITOR_LOG 35 | exit 1 36 | fi 37 | 38 | 39 | --------------------------------------------------------------------------------