├── README.md
├── active_standby.sh
├── install.txt
├── keepalived.conf
└── pg_moniter.sh


/README.md:
--------------------------------------------------------------------------------
 1 | ﻿Keepalived + PostgreSQL 流复制方式实现高可用 HA
 2 | ====================
 3 | 
 4 | 一 项目简介
 5 | ---
 6 | 
 7 |   HA 是数据库领域一个永恒的议题，同时也是最复杂的方案之一，PostgreSQL 本身并不提供任何高可用方案， 本文通过 Keepalived + PostgreSQL 流复制方式实现高可用 HA，故障切换逻辑和部分脚本参考德哥 sky_postgresql_cluster (https://github.com/digoal/sky_postgresql_cluster) 的 HA 项目，本文仅分享一种 PostgreSQL的高可用方案，脚本的切换逻辑可根据实际情况调整，如果您对此方案有更好的建议或补充，欢迎探讨。
 8 |        
 9 | Email: francs3@163.com
10 | 
11 | 二 需求
12 | ---
13 | 
14 | - PostgreSQL 9.4 版本：其它版本脚本需做相应调整
15 | - 硬件：2 台带远程管理卡的物理主机：在数据库发生切换时需要用到远程管理卡
16 | - 两台物理主机安装 Keepalived 程序
17 | 


--------------------------------------------------------------------------------
/active_standby.sh:
--------------------------------------------------------------------------------
  1 | #/bin/bash
  2 | 
  3 | # 环境变量
  4 | export PGPORT=1921
  5 | export PGUSER=sky_pg_cluster
  6 | export PG_OS_USER=pg94
  7 | export PGDBNAME=sky_pg_cluster
  8 | export PGDATA=/opt/database/pg94/pg_root
  9 | export LANG=en_US.utf8
 10 | export PGHOME=/opt/pgsql
 11 | export LD_LIBRARY_PATH=$PGHOME/lib:/lib64:/usr/lib64:/usr/local/lib64:/lib:/usr/lib:/usr/local/lib
 12 | export PATH=/opt/pgbouncer/bin:$PGHOME/bin:$PGPOOL_HOME/bin:$PATH:.
 13 | 
 14 | # 配置信息, LAG_MINUTES 配置允许的延迟时间
 15 | LAG_MINUTES=3
 16 | HOST_IP=`hostname -i`
 17 | NOTICE_EMAIL="francs3@163.com"
 18 | FAILOVE_LOG='/tmp/failover.log'
 19 | 
 20 | SQL1="select 'this_is_standby' as cluster_role from ( select pg_is_in_recovery() as std ) t where t.std is true;"
 21 | SQL2="select 'standby_in_allowed_lag' as cluster_lag from cluster_status where now()-last_alive < interval '$LAG_MINUTES min';"
 22 | 
 23 | # 配置 fence 设备地址和用户密码
 24 | FENCE_IP=192.168.1.21
 25 | FENCE_USER=xxxx
 26 | FENCE_PWD=xxxx
 27 | 
 28 | # VIP 已发生漂移，记录到日志文件
 29 | echo -e "`date +%F\ %T`: keepalived VIP switchover!" >> $FAILOVE_LOG
 30 | 
 31 | # VIP 已漂移，邮件通知
 32 | #echo -e "`date +%F\ %T`: ${HOST_IP}/${PGPORT} VIP 发生漂移，需排查问题！\n\nAuthor: francs(DBA)" | mutt -s "Error: 数据库 VIP 发生漂移 " ${NOTICE_EMAIL}
 33 | 
 34 | 
 35 | # pg_failover 函数，用于主库故障时激活从库
 36 | pg_failover()
 37 | {
 38 | # FENCE_STATUS 表示 fence 后成功标志，1 表示失败，0 表示成功
 39 | # PROMOTE_STATUS 表示激活备库成功标志，1 表示失败，0 表示成功
 40 | FENCE_STATUS=1
 41 | PROMOTE_STATUS=1
 42 | 
 43 | # 激活备库前需 Fence 关闭主库
 44 | for ((k=0;k<10;k++))
 45 | do
 46 | # fence命令, 设备不同的话, fence命令可能不一样.
 47 |   ipmitool -I lanplus -L OPERATOR -H $FENCE_IP -U $FENCE_USER -P $FENCE_PWD power reset
 48 |   if [ $? -eq 0 ]; then
 49 |     echo -e "`date +%F\ %T`: fence primary db host success."
 50 |     FENCE_STATUS=0
 51 |     break
 52 |   fi
 53 | sleep 1
 54 | done
 55 | 
 56 | if [ $FENCE_STATUS -ne 0 ]; then
 57 |   echo -e "`date +%F\ %T`: fence failed. Standby will not promote, please fix it manually."
 58 | return $FENCE_STATUS
 59 | fi
 60 | 
 61 | # 激活备库
 62 | su - $PG_OS_USER -c "pg_ctl promote"
 63 | if [ $? -eq 0 ]; then
 64 |    echo -e "`date +%F\ %T`: `hostname` promote standby success. " 
 65 |    PROMOTE_STATUS=0
 66 | fi
 67 | 
 68 | if [ $PROMOTE_STATUS -ne 0 ]; then
 69 |   echo -e "`date +%F\ %T`: promote standby failed."
 70 |   return $PROMOTE_STATUS
 71 | fi
 72 | 
 73 |  echo -e "`date +%F\ %T`: pg_failover() function call success."
 74 |  return 0
 75 | }
 76 | 
 77 | 
 78 | # 故障切换过程
 79 | # standby是否正常的标记(is in recovery), CNT=1 表示正常.
 80 | CNT=`echo $SQL1 | psql -At -h 127.0.0.1 -p $PGPORT -U $PGUSER -d $PGDBNAME -f - | grep -c this_is_standby`
 81 | echo -e "CNT: $CNT"
 82 | # 判断 standby lag 是否在接受范围内的标记, LAG=1 表示正常.
 83 | LAG=`echo $SQL2 | psql -At -h 127.0.0.1 -p $PGPORT -U $PGUSER -d $PGDBNAME | grep -c standby_in_allowed_lag`
 84 | echo -e "LAG: $LAG"
 85 | 
 86 | if [ $CNT -eq 1 ] && [ $LAG -eq 1 ]; then
 87 |   pg_failover >> $FAILOVE_LOG
 88 |   if [ $? -ne 0 ]; then
 89 |     echo -e "`date +%F\ %T`: pg_failover failed." >> $FAILOVE_LOG
 90 |     exit 1
 91 |   fi 
 92 | else
 93 |   echo -e "`date +%F\ %T`: `hostname` standby is not ok or laged far $LAG_MINUTES mintues from primary , failover not allowed! " >> $FAILOVE_LOG
 94 |   exit 1
 95 | fi
 96 | 
 97 | 
 98 | # 判断是否要进入failover过程
 99 | # 1. standby 正常 (is in recovery)
100 | # 2. standby lag 在接受范围内 
101 | 
102 | # failover过程
103 | # 1. fence 关闭主服务器
104 | # 2. 激活standby数据库
105 | 


--------------------------------------------------------------------------------
/install.txt:
--------------------------------------------------------------------------------
  1 | 一 环境信息
  2 | 机器：2 台  HP  ProLiant DL360 G6
  3 | 系统版本:  RHEL 5.5
  4 | PostgreSQL 版本： 9.4.1
  5 | 主节点 IP： 192.168.1.15
  6 | 备节点 IP： 192.168.1.11
  7 | Keepalived VIP：   192.168.1.26
  8 | FENCE DEVICE： HP
  9 | 
 10 | 二 系统配置 ( 主备节点 )
 11 | -- 创建用户
 12 | groupadd pg94
 13 | useradd pg94 -g pg94
 14 | passwd pg94
 15 | 
 16 | --创建目录
 17 | mkdir -p /data01/pg_data/
 18 | chown -R pg94:pg94 /data01/pg_data/
 19 | su - pg94
 20 | mkdir -p /data01/pg_data/pg_root
 21 | chmod 0700 /data01/pg_data/pg_root
 22 | 
 23 | --安装 Package
 24 | yum  -y install gcc readline readline-devel zlib zlib-devel python-devel ipmitool
 25 | 
 26 | 
 27 | 三 PostgreSQL 软件安装  ( 主备节点 )
 28 | --下载
 29 | wget -c https://ftp.postgresql.org/pub/source/v9.4.1/postgresql-9.4.1.tar.bz2
 30 | 
 31 | --安装 PostgreSQL 软件
 32 | tar jxvf postgresql-9.4.1.tar.bz2 
 33 | cd postgresql-9.4.1
 34 | ./configure --prefix=/opt/pgsql_9.4.1 --with-wal-blocksize=16 --with-pgport=1921
 35 | gmake world
 36 | gmake install-world
 37 | ln -s /opt/pgsql_9.4.1 /opt/pgsql
 38 | 
 39 | --配置环境变量
 40 | vim /home/pg94/.bash_profile 
 41 | 
 42 | export PGPORT=1921
 43 | export PGUSER=postgres
 44 | export PGDATA=/data01/pg_data/pg_root
 45 | export LANG=en_US.utf8
 46 | 
 47 | export PGHOME=/opt/pgsql
 48 | export LD_LIBRARY_PATH=$PGHOME/lib:/lib64:/usr/lib64:/usr/local/lib64:/lib:/usr/lib:/usr/local/lib
 49 | export DATE=`date +"%Y%m%d%H%M"`
 50 | export PATH=$PGHOME/bin:$PATH:.
 51 | export MANPATH=$PGHOME/share/man:$MANPATH
 52 | alias rm='rm -i'
 53 | alias ll='ls -lh'
 54 | 
 55 | 
 56 | --初始化数据库 ( 主节点 )
 57 | initdb -E UTF8 --locale=C  -D /data01/pg_data/pg_root -U postgres -W
 58 | Ww0pIXSrFACFwIdMBTfo08F5d
 59 | 
 60 | -- postgresql.conf 参数配置  ( 主节点 )
 61 | listen_addresses = '*'          # what IP address(es) to listen on;
 62 | port = 1921                             # (change requires restart)
 63 | max_connections = 1000                  # (change requires restart)
 64 | unix_socket_directories = '/tmp'        # comma-separated list of directories
 65 | unix_socket_permissions = 0777          # begin with 0 to use octal notation
 66 | shared_buffers = 1024MB                 # min 128kB
 67 | work_mem = 4MB                          # min 64kB
 68 | maintenance_work_mem = 1024MB           # min 1MB
 69 | dynamic_shared_memory_type = posix      # the default is the first option
 70 | bgwriter_delay = 10ms                   # 10-10000ms between rounds
 71 | wal_level = hot_standby                 # minimal, archive, hot_standby, or logical
 72 | synchronous_commit = off                # synchronization level;
 73 | wal_buffers = -1                        # min 32kB, -1 sets based on shared_buffers
 74 | checkpoint_segments = 32                # in logfile segments, min 1, 16MB each
 75 | checkpoint_timeout = 5min               # range 30s-1h
 76 | archive_mode = on               # allows archiving to be done
 77 | archive_command = '/bin/date'           # command to use to archive a logfile segment
 78 | max_wal_senders = 8             # max number of walsender processes
 79 | wal_keep_segments = 128          # in logfile segments, 16MB each; 0 disables
 80 | hot_standby = on                        # "on" allows queries during recovery
 81 | default_statistics_target = 100 # range 1-10000
 82 | constraint_exclusion = partition        # on, off, or partition
 83 | log_destination = 'csvlog'              # Valid values are combinations of
 84 | logging_collector = on          # Enable capturing of stderr and csvlog
 85 | log_directory = 'pg_log'                # directory where log files are written,
 86 | log_filename = 'postgresql-%Y-%m-%d_%H%M%S.log' # log file name pattern,
 87 | log_file_mode = 0600                    # creation mode for log files,
 88 | log_rotation_age = 1d                   # Automatic rotation of logfiles will
 89 | log_rotation_size = 10MB                # Automatic rotation of logfiles will
 90 | log_timezone = 'PRC'
 91 | datestyle = 'iso, mdy'
 92 | timezone = 'PRC'
 93 | lc_messages = 'C'                       # locale for system error message
 94 | lc_monetary = 'C'                       # locale for monetary formatting
 95 | lc_numeric = 'C'                        # locale for number formatting
 96 | lc_time = 'C'                           # locale for time formatting
 97 | 
 98 | 
 99 | 四 流复制搭建
100 | --创建流复制用户( 主节点 )
101 | CREATE USER repuser
102 |   REPLICATION 
103 |   LOGIN
104 |   CONNECTION LIMIT 5
105 |   ENCRYPTED PASSWORD 'Hgo7haSIASGAJYSsFqbYfyx2q';
106 | 
107 | -- 配置.pgpass 密码文件 (主，备节点)
108 | vim /home/pg94/.pgpass
109 | 192.168.1.15:1921:replication:repuser:Hgo7haSIASGAJYSsFqbYfyx2q
110 | 192.168.1.11:1921:replication:repuser:Hgo7haSIASGAJYSsFqbYfyx2q
111 | 192.168.1.26:1921:replication:repuser:Hgo7haSIASGAJYSsFqbYfyx2q
112 | 
113 | chmod 0600 /home/pg94/.pgpass 
114 | 
115 | --配置 pg_hba.conf ( 主节点)
116 | # replication 
117 | host replication repuser 192.168.1.15/32 md5
118 | host replication repuser 192.168.1.11/32 md5
119 | host replication repuser 192.168.1.26/32 md5
120 | 
121 | --主节点配置 recovery.done (主节点)
122 | cp /opt/pgsql_9.4.1/share/recovery.conf.sample recovery.done
123 | recovery_target_timeline = 'latest'
124 | standby_mode = on
125 | primary_conninfo = 'host=192.168.1.11 port=1921 user=repuser'
126 | 
127 | --pg_basebackup ( 备节点 )
128 | pg_basebackup -D /data01/pg_data/pg_root -Fp -Xs -v -P -h 192.168.1.11 -p 1921 -U repuser
129 | cp ~/recovery.conf ~/pg_hba.conf $PGDATA
130 | 
131 | --备节点配置 recovery.conf (备节点)
132 | mv recovery.done recovery.conf
133 | recovery_target_timeline = 'latest'
134 | standby_mode = on
135 | primary_conninfo = 'host=192.168.1.15 port=1921 user=repuser'
136 | 
137 | --启备库
138 | pg_ctl start 
139 | 备注：启动备库并观察流复制是否搭建成功。
140 | 
141 | 五 sky_pg_cluster 数据库配置
142 | --初始数据部署
143 | create role sky_pg_cluster superuser nocreatedb nocreaterole noinherit login encrypted password 'qXzpigJCRmamDf6vv0uFpf4Qg';
144 | create database sky_pg_cluster with template template0 encoding 'UTF8' owner sky_pg_cluster;
145 | \c sky_pg_cluster  sky_pg_cluster
146 | create schema sky_pg_cluster ;
147 | create table cluster_status (id int unique default 1, last_alive timestamp(0) without time zone);
148 | 
149 | --限制cluster_status表有且只有一行 :
150 | CREATE FUNCTION cannt_delete ()
151 | RETURNS trigger
152 | LANGUAGE plpgsql AS $$
153 | BEGIN
154 | RAISE EXCEPTION 'You can not delete!';
155 | END; $$;
156 | 
157 | CREATE TRIGGER cannt_delete BEFORE DELETE ON cluster_status FOR EACH ROW EXECUTE PROCEDURE cannt_delete();
158 | CREATE TRIGGER cannt_truncate BEFORE TRUNCATE ON cluster_status FOR STATEMENT EXECUTE PROCEDURE cannt_delete();
159 | 
160 | -- 插入初始数据
161 | insert into cluster_status values (1, now());
162 | 
163 | --pg_hba.conf
164 | # sky_pg_cluster
165 | host sky_pg_cluster sky_pg_cluster 127.0.0.1/32 md5
166 | host sky_pg_cluster sky_pg_cluster 192.168.1.15/32 md5
167 | host sky_pg_cluster sky_pg_cluster 192.168.1.11/32 md5
168 | host sky_pg_cluster sky_pg_cluster 192.168.2.138/32 md5
169 | 
170 | 备注：结合 keepalived 的 interval 配置，每 10 秒更新 cluster_status 表。
171 | 
172 | 六 安装  Keepalived 
173 | --下载
174 | wget http://www.keepalived.org/software/keepalived-1.2.15.tar.gz
175 | 
176 | -- 安装 OpenSSL , popt 包
177 | yum -y install openssl  openssl-devel popt
178 | 
179 | --解压安装
180 | cd keepalived-1.2.15
181 | tar xvf keepalived-1.2.15.tar.gz 
182 | # ./configure --prefix=/usr/local/keepalived --sysconf=/etc
183 | # make
184 | # make install
185 | 
186 | --报错
187 | keepalived configure: error: No SO_MARK declaration in headers
188 | 备注：解决方法 --disable-fwmark
189 | 
190 | --遇到的错误
191 | [root@db1 ~]# /etc/init.d/keepalived start
192 | Starting keepalived: /bin/bash: keepalived: command not found
193 |                                                            [FAILED]
194 |                                                            
195 | --解决方法
196 | ln -s /usr/local/keepalived/sbin/keepalived  /usr/sbin/
197 | 
198 | --启动 keepalived
199 | [root@db1 etc]# service keepalived restart
200 | Stopping keepalived:                                       [  OK  ]
201 | Starting keepalived:                                       [  OK  ]
202 | 
203 | --设置 keepalived 开机关闭
204 | [root@db1 etc]# chkconfig keepalived off
205 | 
206 | 
207 | 七  配置 Keepalived
208 | --/etc/keepalived/keepalived.conf
209 | ! Configuration File for keepalived
210 | 
211 | global_defs {
212 |    notification_email {
213 |      francs3@163.com
214 |    }
215 |    smtp_server 127.0.0.1
216 |    smtp_connect_timeout 30
217 |    router_id DB1_PG_HA
218 | }
219 | 
220 |  vrrp_script check_pg_alived {
221 |     script "/usr/local/bin/pg_moniter.sh" 
222 |     interval 10 
223 |     fall 5    # require 5 failures for KO
224 | } 
225 | 
226 | vrrp_instance VI_1 {
227 |     state BACKUP
228 |     nopreempt
229 |     interface em1
230 |     virtual_router_id 10
231 |     priority 100
232 |     advert_int 1
233 |     authentication {
234 |         auth_type PASS
235 |         auth_pass t9rveMP0Z9S1
236 |     }
237 |       track_script { 
238 |         check_pg_alived
239 |      }
240 |     virtual_ipaddress {
241 |         192.168.1.26
242 |     }
243 |      smtp_alert
244 |      notify_master /usr/local/bin/active_standby.sh
245 | }
246 | 
247 | 备注：备节点的 keepalived 的配置和主节点的基本一样，除了 priority 改成 90。
248 | 


--------------------------------------------------------------------------------
/keepalived.conf:
--------------------------------------------------------------------------------
 1 | ! Configuration File for keepalived
 2 | 
 3 | global_defs {
 4 |    notification_email {
 5 |      francs3@163.com
 6 |    }
 7 |    smtp_server 127.0.0.1
 8 |    smtp_connect_timeout 30
 9 |    router_id DB1_PG_HA
10 | }
11 | 
12 |  vrrp_script check_pg_alived {
13 |     script "/usr/local/bin/pg_moniter.sh" 
14 |     interval 10 
15 |     fall 5    # require 5 failures for KO
16 | } 
17 | 
18 | vrrp_instance VI_1 {
19 |     state BACKUP
20 |     nopreempt
21 |     interface em1
22 |     virtual_router_id 10
23 |     priority 100
24 |     advert_int 1
25 |     authentication {
26 |         auth_type PASS
27 |         auth_pass t9rveMP0Z9S1
28 |     }
29 |       track_script { 
30 |         check_pg_alived
31 |      }
32 |     virtual_ipaddress {
33 |         192.168.1.26
34 |     }
35 |      smtp_alert
36 |      notify_master /usr/local/bin/active_standby.sh
37 | }
38 | 
39 | 备注：备节点的 keepalived 的配置和主节点的基本一样，除了 priority 改成 90。
40 | 


--------------------------------------------------------------------------------
/pg_moniter.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Load Env
 4 | export PGPORT=1921
 5 | export PGUSER=sky_pg_cluster
 6 | export PGDBNAME=sky_pg_cluster
 7 | export PGDATA=/opt/database/pg94/pg_root
 8 | export LANG=en_US.utf8
 9 | export PGHOME=/opt/pgsql
10 | export LD_LIBRARY_PATH=$PGHOME/lib:/lib64:/usr/lib64:/usr/local/lib64:/lib:/usr/lib:/usr/local/lib
11 | export PATH=/opt/pgbouncer/bin:$PGHOME/bin:$PGPOOL_HOME/bin:$PATH:.
12 | 
13 | MONITOR_LOG="/tmp/pg_monitor.log"
14 | SQL1="update cluster_status set last_alive = now();"
15 | SQL2='select 1;'
16 | 
17 | # 如果是备库,则退出，此脚本不检查备库存活状态
18 | standby_flg=`psql -p $PGPORT -U postgres -At -c "select pg_is_in_recovery();"`
19 | if [ ${standby_flg} == 't' ]; then
20 |     echo -e "`date +%F\ %T`: This is a standby database, exit!\n" >> $MONITOR_LOG
21 |     exit 0
22 | fi
23 | 
24 | # 主库上更新 cluster_state 表
25 | echo $SQL1 | psql -At -h 127.0.0.1 -p $PGPORT -U $PGUSER -d $PGDBNAME >> $MONITOR_LOG
26 | 
27 | 
28 | # 判断主库是否可用
29 | echo $SQL2 | psql -At -h 127.0.0.1 -p $PGPORT -U $PGUSER -d $PGDBNAME 
30 | if [ $? -eq 0 ]; then
31 |    echo -e "`date +%F\ %T`:  Primary db is health."  >> $MONITOR_LOG
32 |    exit 0
33 | else
34 |    echo -e "`date +%F\ %T`:  Attention: Primary db is not health!" >> $MONITOR_LOG
35 |    exit 1
36 | fi
37 | 
38 |   
39 | 


--------------------------------------------------------------------------------