├── m_s_1.png ├── m_s_2.png ├── m_s_3.png ├── m_s_4.png ├── m_s_ha_1.png ├── m_s_ha_2.png ├── m_s_ha_3.png ├── m_s_ha_4.png ├── m_s_ha_arch.png ├── check_sky_pg_cluster_alive.sh ├── check_standby_lag.sh ├── base_config.txt ├── port_probe.c ├── keepalive.sql ├── README.md ├── LICENSE ├── INSTALL.txt └── sky_pg_cluster.sh /m_s_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/digoal/PostgreSQL_HA_with_primary_standby_2vip/HEAD/m_s_1.png -------------------------------------------------------------------------------- /m_s_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/digoal/PostgreSQL_HA_with_primary_standby_2vip/HEAD/m_s_2.png -------------------------------------------------------------------------------- /m_s_3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/digoal/PostgreSQL_HA_with_primary_standby_2vip/HEAD/m_s_3.png -------------------------------------------------------------------------------- /m_s_4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/digoal/PostgreSQL_HA_with_primary_standby_2vip/HEAD/m_s_4.png -------------------------------------------------------------------------------- /m_s_ha_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/digoal/PostgreSQL_HA_with_primary_standby_2vip/HEAD/m_s_ha_1.png -------------------------------------------------------------------------------- /m_s_ha_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/digoal/PostgreSQL_HA_with_primary_standby_2vip/HEAD/m_s_ha_2.png -------------------------------------------------------------------------------- /m_s_ha_3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/digoal/PostgreSQL_HA_with_primary_standby_2vip/HEAD/m_s_ha_3.png -------------------------------------------------------------------------------- /m_s_ha_4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/digoal/PostgreSQL_HA_with_primary_standby_2vip/HEAD/m_s_ha_4.png -------------------------------------------------------------------------------- /m_s_ha_arch.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/digoal/PostgreSQL_HA_with_primary_standby_2vip/HEAD/m_s_ha_arch.png -------------------------------------------------------------------------------- /check_sky_pg_cluster_alive.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # nagios(/etc/xinetd.d/nrpe)中配置postgres用户调用此脚本 3 | 4 | export PGHOME=/opt/pgsql 5 | export LANG=en_US.utf8 6 | export LD_LIBRARY_PATH=$PGHOME/lib:/lib64:/usr/lib64:/usr/local/lib64:/lib:/usr/lib:/usr/local/lib:$LD_LIBRARY_PATH 7 | export DATE=`date +"%Y%m%d%H%M"` 8 | export PATH=$PGHOME/bin:$PATH:. 9 | 10 | # FILE需和 sky_pg_clusterd.sh 里面配置的NAGIOS_FILE1 一致. 11 | # ALIVE_MINUTES=1 表示1分钟内$FILE被修改过, 心跳存在. 否则心跳停止(告警). 12 | # 文件对应sky_pg_cluster.sh中的NAGIOS_LOG 13 | FILE=/tmp/sky_pg_clusterd.log 14 | ALIVE_MINUTES=1 15 | EXIST=1 16 | ALIVE_CNT=0 17 | 18 | if [ -f $FILE ]; then 19 | ALIVE_CNT=`find $FILE -mmin -$ALIVE_MINUTES -print|wc -l` 20 | if [ $ALIVE_CNT -eq 1 ]; then 21 | exit 0 22 | else 23 | echo -e "keepalive timeout $ALIVE_MINUTES mintues." 24 | exit 2 25 | fi 26 | else 27 | echo -e "`date +%F%T` file $FILE not exists. " 28 | exit 2 29 | fi 30 | 31 | exit 1 32 | 33 | # Author : Digoal zhou 34 | # Email : digoal@126.com 35 | # Blog : http://blog.163.com/digoal@126/ 36 | -------------------------------------------------------------------------------- /check_standby_lag.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # nagios(/etc/xinetd.d/nrpe)中配置postgres用户调用此脚本 3 | export PGHOME=/opt/pgsql 4 | export LANG=en_US.utf8 5 | export LD_LIBRARY_PATH=$PGHOME/lib:/lib64:/usr/lib64:/usr/local/lib64:/lib:/usr/lib:/usr/local/lib:$LD_LIBRARY_PATH 6 | export DATE=`date +"%Y%m%d%H%M"` 7 | export PATH=$PGHOME/bin:$PATH:. 8 | 9 | # 配置, node1,node2 可能不一致, 并且需配置.pgpass存储以下密码校验信息 10 | # LAG_MINUTES=3 表示3分钟. 延时超过3分钟则告警. 11 | LOCAL_IP=127.0.0.1 12 | PGUSER=sky_pg_cluster 13 | PGPORT=1921 14 | PGDBNAME=sky_pg_cluster 15 | LAG_MINUTES=3 16 | SQL1="set client_min_messages=warning; select 'standby_in_allowed_lag' as cluster_lag from cluster_status where now()-last_alive < interval '$LAG_MINUTES min';" 17 | 18 | # standby lag 在接受范围内的标记, LAG=1 表示正常. 19 | LAG=`echo $SQL1 | psql -h $LOCAL_IP -p $PGPORT -U $PGUSER -d $PGDBNAME -f - | grep -c standby_in_allowed_lag` 20 | if [ $LAG -eq 1 ]; then 21 | exit 0 22 | else 23 | echo -e "standby is laged far $LAG_MINUTES mintues from primary . " 24 | exit 1 25 | fi 26 | 27 | exit 1 28 | 29 | # Author : Digoal zhou 30 | # Email : digoal@126.com 31 | # Blog : http://blog.163.com/digoal@126/ 32 | -------------------------------------------------------------------------------- /base_config.txt: -------------------------------------------------------------------------------- 1 | 基础配置 : 2 | 3 | # 注意配置归档, keep, recovery.conf(restore_command), nfs, mount 4 | # visudo -f /etc/sudoers 5 | # postgres ALL=(ALL) NOPASSWD: /sbin/ifup 6 | # postgres ALL=(ALL) NOPASSWD: /sbin/ifdown 7 | # postgres ALL=(ALL) NOPASSWD: /sbin/arping 8 | # postgres ALL=(ALL) NOPASSWD: /bin/mount 9 | # postgres ALL=(ALL) NOPASSWD: /bin/umount 10 | # 注释 #Defaults requiretty 11 | # 脚本中用sudo调的命令请使用绝对路径 12 | 13 | # fence脚本大部分在cman包中 14 | # yum install -y cman 15 | 16 | 配置postgres用户的ssh无密钥认证 (HOST1, HOST2) 17 | # vi /etc/ssh/sshd_config 18 | PubkeyAuthentication yes 19 | 20 | # service sshd restart 21 | 22 | # su - postgres 23 | $ ssh-keygen -t rsa 24 | 不要输入passphrase 25 | $ cd ~/.ssh 26 | $ 将 id_rsa.pub 内容拷贝到对方主机的postgres用户下的 ~/.ssh/authorized_keys 27 | $ chmod 600 ~/.ssh/authorized_keys 28 | 29 | # 验证无密码配置是否正确 30 | ssh 192.168.111.42 date 31 | Sun Jan 4 15:54:26 CST 2015 32 | 33 | ssh 192.168.111.37 date 34 | Sun Jan 4 15:54:26 CST 2015 35 | 36 | 37 | 38 | # 可能的话, 请使用固定的nfs端口 39 | 40 | nfs : 41 | # vi /etc/exports 42 | /opt/arch 192.168.111.42/24(ro,no_root_squash,sync) 43 | /opt/arch 192.168.111.37/24(ro,no_root_squash,sync) 44 | 45 | # chkconfig nfs on 46 | # service nfs start 47 | 48 | peer 归档DIR : 49 | # mkdir /opt/peer_arch 50 | 51 | postgresql.conf 52 | 数据库归档命令 : 53 | archive_command = 'DIR=/opt/arch/`date +%F`; test ! -d $DIR && mkdir -p $DIR && chmod 755 $DIR; test ! -f $DIR/%f && cp %p $DIR/%f; chmod 755 $DIR/%f' 54 | 55 | 56 | recovery.conf 57 | 恢复命令 : 58 | restore_command = 'PEER_DIR=/opt/peer_arch; cp $PEER_DIR/*/%f %p' 59 | recovery_target_timeline = 'latest' 60 | standby_mode = on 61 | primary_conninfo = 'host=192.168.111.130 port=1921 user=replica keepalives_idle=60' 62 | 63 | 64 | 密码文件 : 65 | # su - postgres 66 | $ vi ~/.pgpass 67 | 192.168.111.130:1921:replication:replica:replica 68 | 192.168.111.130:1921:sky_pg_cluster:sky_pg_cluster:SKY_PG_cluster_321 69 | 192.168.111.131:1921:sky_pg_cluster:sky_pg_cluster:SKY_PG_cluster_321 70 | 192.168.111.37:1921:sky_pg_cluster:sky_pg_cluster:SKY_PG_cluster_321 71 | 192.168.111.42:1921:sky_pg_cluster:sky_pg_cluster:SKY_PG_cluster_321 72 | 127.0.0.1:1921:sky_pg_cluster:sky_pg_cluster:SKY_PG_cluster_321 73 | 74 | $ chmod 400 ~/.pgpass 75 | 76 | # fence命令在sky_pg_cluster.sh fence函数中, 不同的主机, fence需要修改sky_pg_cluster.sh进行定制 77 | # 无限fence, 加参数 force, 加其他参数不强制fence 78 | # ipmitool -I lanplus -L OPERATOR -H $IP -U $USER -P $PWD power reset 79 | # fence_rsa -a $IP -l $USER -p $PWD -o reboot 80 | # fence_ilo -a $IP -l $USER -p $PWD -o reboot 81 | 82 | 83 | # Author : Digoal zhou 84 | # Email : digoal@126.com 85 | # Blog : http://blog.163.com/digoal@126/ 86 | -------------------------------------------------------------------------------- /port_probe.c: -------------------------------------------------------------------------------- 1 | /* 2 | # 用于探测仲裁服务器上的vip端口代理. 3 | # install: 4 | # gcc -O3 -Wall -Wextra -Werror -g -o port_probe ./port_probe.c 5 | 6 | # Author : Digoal zhou 7 | # Email : digoal@126.com 8 | # Blog : http://blog.163.com/digoal@126/ 9 | */ 10 | 11 | 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | #include 20 | #include 21 | #include 22 | #include 23 | 24 | 25 | // 错误函数, 当exit_val=0只输出错误信息, 不退出程序. 其他值输出错误信息并退出程序 26 | void error(char * msg, int exit_val); 27 | 28 | void error(char * msg, int exit_val) { 29 | fprintf(stderr, "%s: %s\n", msg, strerror(errno)); 30 | // if exit_val == 0, not exit the program. 31 | if (exit_val) 32 | exit(exit_val); 33 | } 34 | 35 | int main(int argc,char *argv[]) 36 | { 37 | if(argc == 1) 38 | error("USAGE [program ip port]", 1); 39 | int cfd; 40 | struct sockaddr_in s_add; 41 | 42 | cfd = socket(AF_INET, SOCK_STREAM, 0); 43 | if(-1 == cfd) 44 | error("socket create failed!", -1); 45 | fprintf(stdout, "socket created!\n"); 46 | 47 | bzero(&s_add, sizeof(struct sockaddr_in)); 48 | s_add.sin_family=AF_INET; 49 | s_add.sin_addr.s_addr= inet_addr(argv[1]); 50 | s_add.sin_port=htons(atoi(argv[2])); 51 | 52 | // 设置连接超时, 否则如果端口不通, connect可能会很久. 53 | struct timeval tv_timeout; 54 | tv_timeout.tv_sec = 2; 55 | tv_timeout.tv_usec = 0; 56 | 57 | // 避免本地出现TIME_WAIT 58 | struct linger { 59 | int l_onoff; /* 0 = off, nozero = on */ 60 | int l_linger; /* linger time */ 61 | }; 62 | struct linger so_linger; 63 | so_linger.l_onoff = 1; 64 | so_linger.l_linger = 0; 65 | 66 | if (setsockopt(cfd, SOL_SOCKET, SO_SNDTIMEO, (void *) &tv_timeout, sizeof(struct timeval)) < 0) { 67 | error("setsockopt SO_SNDTIMEO error!", -1); 68 | } 69 | if (setsockopt(cfd, SOL_SOCKET, SO_RCVTIMEO, (void *) &tv_timeout, sizeof(struct timeval)) < 0) { 70 | error("setsockopt SO_RCVTIMEO error!", -1); 71 | } 72 | if (setsockopt(cfd, SOL_SOCKET, SO_LINGER, (void *) &so_linger, sizeof(so_linger)) < 0) { 73 | error("setsockopt SO_LINGER error!", -1); 74 | } 75 | 76 | if(-1 == connect(cfd, (struct sockaddr *)(&s_add), sizeof(struct sockaddr))) { 77 | error("connect failed!", -1); 78 | } 79 | fprintf(stdout, "connect ok!\n"); 80 | 81 | close(cfd); 82 | return 0; 83 | } 84 | -------------------------------------------------------------------------------- /keepalive.sql: -------------------------------------------------------------------------------- 1 | -- 建议使用superuser, 原因见http://blog.163.com/digoal@126/blog/static/163877040201331995623214/ 2 | create role sky_pg_cluster superuser nocreatedb nocreaterole noinherit login encrypted password 'SKY_PG_cluster_321'; 3 | create database sky_pg_cluster with template template0 encoding 'UTF8' owner sky_pg_cluster; 4 | \c sky_pg_cluster sky_pg_cluster 5 | create schema sky_pg_cluster authorization sky_pg_cluster; 6 | create table cluster_status (id int unique default 1, last_alive timestamp(0) without time zone, rep_lag int8); 7 | 8 | -- 限制cluster_status表有且只有一行 : 9 | CREATE FUNCTION cannt_delete () 10 | RETURNS trigger 11 | LANGUAGE plpgsql AS $$ 12 | BEGIN 13 | RAISE EXCEPTION 'You can not delete!'; 14 | END; $$; 15 | 16 | CREATE TRIGGER cannt_delete 17 | BEFORE DELETE ON cluster_status 18 | FOR EACH ROW EXECUTE PROCEDURE cannt_delete(); 19 | 20 | CREATE TRIGGER cannt_truncate 21 | BEFORE TRUNCATE ON cluster_status 22 | FOR STATEMENT EXECUTE PROCEDURE cannt_delete(); 23 | 24 | -- 插入初始数据 25 | insert into cluster_status values (1, now(), 9999999999); 26 | 27 | -- 创建测试函数, 用于测试数据库是否正常, 包括所有表空间的测试 28 | -- (注意原来的函数使用alter table set tablespace来做测试, 产生了较多的xlog, 同时需要排他锁, 现在改成update). 29 | -- 使用update不同的表空间中的数据, 并不能立刻反应表空间的问题. 因为大多数数据在shared_buffer中. 30 | -- 如果表空间对应的文件系统io有问题, 那么在checkpoint时会产生58类的错误. 31 | -- 使用pg_stat_file函数可以立刻暴露io的问题. 32 | create or replace function cluster_keepalive_test(i_peer_ip inet) returns void as $$ 33 | declare 34 | v_spcname text; 35 | v_spcoid oid; 36 | v_nspname name := 'sky_pg_cluster'; 37 | v_rep_lag int8; 38 | v_t timestamp without time zone; 39 | begin 40 | if ( pg_is_in_recovery() ) then 41 | raise notice 'this is standby node.'; 42 | return; 43 | end if; 44 | select pg_xlog_location_diff(pg_current_xlog_insert_location(),sent_location) into v_rep_lag from pg_stat_replication where client_addr=i_peer_ip; 45 | if found then 46 | -- standby 已启动 47 | update cluster_status set last_alive=now(), rep_lag=v_rep_lag; 48 | else 49 | -- standby 未启动 50 | update cluster_status set last_alive=now(); 51 | end if; 52 | 53 | -- 临时禁止检测表空间, return 54 | return; 55 | 56 | -- 表空间相关心跳检测1分钟一次, 减轻更新压力 57 | FOR v_spcname,v_spcoid IN 58 | select spcname,oid from pg_tablespace where spcname <> 'pg_global' 59 | LOOP 60 | perform 1 from pg_class where 61 | ( reltablespace=v_spcoid or reltablespace=0 ) 62 | and relname='t_'||v_spcname 63 | and relkind='r' 64 | and relnamespace=(select oid from pg_namespace where nspname=v_nspname) 65 | limit 1; 66 | if not found then 67 | execute 'create table '||v_nspname||'.t_'||v_spcname||' (crt_time timestamp) tablespace '||v_spcname; 68 | execute 'insert into '||v_nspname||'.t_'||v_spcname||' values ('''||now()||''')'; 69 | perform pg_stat_file(pg_relation_filepath(v_nspname||'.t_'||v_spcname)); 70 | else 71 | execute 'update '||v_nspname||'.t_'||v_spcname||' set crt_time='||''''||now()||''' where now()-crt_time> interval ''1 min'' returning crt_time' into v_t; 72 | if v_t is not null then 73 | perform pg_stat_file(pg_relation_filepath(v_nspname||'.t_'||v_spcname)); 74 | end if; 75 | end if; 76 | END LOOP; 77 | end; 78 | $$ language plpgsql strict; 79 | -- 在创建测试函数后, 最好测试一下是否正常, 因为某些版本的系统表可能不通用, 需要调整. 80 | -- 9.2和9.3是没有问题的. 81 | 82 | 83 | 84 | # Author : Digoal zhou 85 | # Email : digoal@126.com 86 | # Blog : http://blog.163.com/digoal@126/ 87 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | PostgreSQL_HA_with_primary_standby_2vip 2 | ======================================= 3 | 4 | A HA script for PostgreSQL with 2 HOST (one for primary, one for standby), Primary with one VIP, Standby with another VIP. Auto failover and failback. 5 | 6 | 两台主机, 分别负责primary和standby; 7 | 8 | 2个虚拟IP, 分别对应primary和standby; 9 | 10 | 三种状态, primary, standby, primary_standby; 11 | 12 | 三种状态自由切换: 13 | 14 | 当1台主机异常时, 另一台主机承担primary_standby角色, 并启动2个虚拟IP. 15 | 16 | 正常情况下两台主机分别承担primary和standby角色, 分别启动一个虚拟IP. 17 | 18 | 应用程序连接虚拟IP, 其中一个虚拟IP对应的是primary, 另一个虚拟IP对应的是standby. 19 | 20 | 虚拟IP和角色的关系固定, 不会变化, 例如192.168.111.130对应primary角色, 那么不管怎么切换, 他们始终在一起(谁是primary,谁就会启动192.168.111.130). 21 | 22 | 部署视频参考: 23 | 24 | http://www.tudou.com/programs/view/bIbZ85SrsHM/ 25 | 26 | http://www.tudou.com/programs/view/kdRPT6dSp_0/ 27 | 28 | http://www.tudou.com/programs/view/I6bxk2u3xdY/ 29 | 30 | ======================================= 31 | 32 | 数据库角色转变和心跳原理 : 33 | 34 | 1. 根据文件recovery.conf是否存在检测本地节点角色 35 | 36 | 存在(standby), 不存在(master) 37 | 38 | 2. 加载NFS对端归档目录 39 | 40 | 3. 启动数据库 41 | 如果是standby 42 | 启动数据库 43 | 如果是master 44 | 如果其他主机未启动VIPM, 启动数据库 45 | 46 | 4. 启动VIP 47 | 如果是standby 48 | 启动vips 49 | 如果是master 50 | 如果vipm已被其他节点启动 51 | 降级为standby 52 | 启动vips 53 | 如果vipm没有被其他节点启动 54 | 启动vipm 55 | 56 | 5. 触发第一次心跳 57 | 58 | 6. 循环心跳检测 59 | 60 | ======================================= 61 | 62 | 不同的角色, 循环逻辑不同: 63 | 64 | ======================================= 65 | master角色, 循环检查 66 | 67 | 1. 网关检查, 反映本地网络状况 68 | 69 | 2. 本地心跳检查, 反映本地数据库健康状态 70 | 71 | 3. 本地角色对应IP检查 72 | 73 | 4. 检查VIPS,PORT,数据库心跳 74 | 75 | 如果本地健康,对端不健康 76 | 77 | 触发切换 78 | 79 | 1. 主节点fence standby 80 | 81 | 2. 主节点接管VIPS 82 | 83 | 3. 主节点转换master_standby角色 84 | 85 | ======================================= 86 | standby角色, 循环检查 87 | 88 | 1. 网关检查, 反映本地网络状况 89 | 90 | 2. 本地心跳检查, 反映本地数据库健康状态 91 | 92 | 3. 本地角色对应IP检查 93 | 94 | 4. 检查备延迟, 判断是否允许promote 95 | 96 | 5. 检查VIPM,PORT,数据库心跳 97 | 98 | 如果本地健康,对端不健康 99 | 100 | 触发切换 101 | 102 | 1. 备节点fence master 103 | 104 | 2. 备节点停库 105 | 106 | 3. 备节点注释restore_command 107 | 108 | 4. 备节点启动数据库 109 | 110 | 5. 备节点激活数据库, 修改restore_command 111 | 112 | 6. 备节点接管VIPM 113 | 114 | 7. 备节点转换master_standby角色 115 | 116 | ======================================= 117 | master_standby角色, 循环检查 118 | 119 | 1. 检查对端数据库心跳 120 | 121 | 如果对端数据库心跳正常 122 | 123 | 触发释放vips 124 | 125 | 1. 释放vips 126 | 127 | 2. 转换为master角色 128 | 129 | 图片 130 | 131 | 架构 132 | ![架构](https://github.com/digoal/PostgreSQL_HA_with_primary_standby_2vip/raw/master/m_s_ha_1.png) 133 | ![架构](https://github.com/digoal/PostgreSQL_HA_with_primary_standby_2vip/raw/master/m_s_ha_arch.png) 134 | 135 | 主角 136 | ![主角](https://github.com/digoal/PostgreSQL_HA_with_primary_standby_2vip/raw/master/m_s_ha_2.png) 137 | 138 | 备角 139 | ![备角](https://github.com/digoal/PostgreSQL_HA_with_primary_standby_2vip/raw/master/m_s_ha_3.png) 140 | 141 | 主备合一角 142 | ![主备合一](https://github.com/digoal/PostgreSQL_HA_with_primary_standby_2vip/raw/master/m_s_ha_4.png) 143 | 144 | 逻辑 145 | ![逻辑1](https://github.com/digoal/PostgreSQL_HA_with_primary_standby_2vip/blob/master/m_s_1.png) 146 | ![逻辑2](https://github.com/digoal/PostgreSQL_HA_with_primary_standby_2vip/blob/master/m_s_2.png) 147 | ![逻辑3](https://github.com/digoal/PostgreSQL_HA_with_primary_standby_2vip/blob/master/m_s_3.png) 148 | ![逻辑4](https://github.com/digoal/PostgreSQL_HA_with_primary_standby_2vip/blob/master/m_s_4.png) 149 | 150 | # Author : Digoal zhou 151 | # Email : digoal@126.com 152 | # Blog : http://blog.163.com/digoal@126/ 153 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | GNU GENERAL PUBLIC LICENSE 2 | Version 2, June 1991 3 | 4 | Copyright (C) 1989, 1991 Free Software Foundation, Inc., 5 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA 6 | Everyone is permitted to copy and distribute verbatim copies 7 | of this license document, but changing it is not allowed. 8 | 9 | Preamble 10 | 11 | The licenses for most software are designed to take away your 12 | freedom to share and change it. By contrast, the GNU General Public 13 | License is intended to guarantee your freedom to share and change free 14 | software--to make sure the software is free for all its users. This 15 | General Public License applies to most of the Free Software 16 | Foundation's software and to any other program whose authors commit to 17 | using it. (Some other Free Software Foundation software is covered by 18 | the GNU Lesser General Public License instead.) You can apply it to 19 | your programs, too. 20 | 21 | When we speak of free software, we are referring to freedom, not 22 | price. Our General Public Licenses are designed to make sure that you 23 | have the freedom to distribute copies of free software (and charge for 24 | this service if you wish), that you receive source code or can get it 25 | if you want it, that you can change the software or use pieces of it 26 | in new free programs; and that you know you can do these things. 27 | 28 | To protect your rights, we need to make restrictions that forbid 29 | anyone to deny you these rights or to ask you to surrender the rights. 30 | These restrictions translate to certain responsibilities for you if you 31 | distribute copies of the software, or if you modify it. 32 | 33 | For example, if you distribute copies of such a program, whether 34 | gratis or for a fee, you must give the recipients all the rights that 35 | you have. You must make sure that they, too, receive or can get the 36 | source code. And you must show them these terms so they know their 37 | rights. 38 | 39 | We protect your rights with two steps: (1) copyright the software, and 40 | (2) offer you this license which gives you legal permission to copy, 41 | distribute and/or modify the software. 42 | 43 | Also, for each author's protection and ours, we want to make certain 44 | that everyone understands that there is no warranty for this free 45 | software. If the software is modified by someone else and passed on, we 46 | want its recipients to know that what they have is not the original, so 47 | that any problems introduced by others will not reflect on the original 48 | authors' reputations. 49 | 50 | Finally, any free program is threatened constantly by software 51 | patents. We wish to avoid the danger that redistributors of a free 52 | program will individually obtain patent licenses, in effect making the 53 | program proprietary. To prevent this, we have made it clear that any 54 | patent must be licensed for everyone's free use or not licensed at all. 55 | 56 | The precise terms and conditions for copying, distribution and 57 | modification follow. 58 | 59 | GNU GENERAL PUBLIC LICENSE 60 | TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 61 | 62 | 0. This License applies to any program or other work which contains 63 | a notice placed by the copyright holder saying it may be distributed 64 | under the terms of this General Public License. The "Program", below, 65 | refers to any such program or work, and a "work based on the Program" 66 | means either the Program or any derivative work under copyright law: 67 | that is to say, a work containing the Program or a portion of it, 68 | either verbatim or with modifications and/or translated into another 69 | language. (Hereinafter, translation is included without limitation in 70 | the term "modification".) Each licensee is addressed as "you". 71 | 72 | Activities other than copying, distribution and modification are not 73 | covered by this License; they are outside its scope. The act of 74 | running the Program is not restricted, and the output from the Program 75 | is covered only if its contents constitute a work based on the 76 | Program (independent of having been made by running the Program). 77 | Whether that is true depends on what the Program does. 78 | 79 | 1. You may copy and distribute verbatim copies of the Program's 80 | source code as you receive it, in any medium, provided that you 81 | conspicuously and appropriately publish on each copy an appropriate 82 | copyright notice and disclaimer of warranty; keep intact all the 83 | notices that refer to this License and to the absence of any warranty; 84 | and give any other recipients of the Program a copy of this License 85 | along with the Program. 86 | 87 | You may charge a fee for the physical act of transferring a copy, and 88 | you may at your option offer warranty protection in exchange for a fee. 89 | 90 | 2. You may modify your copy or copies of the Program or any portion 91 | of it, thus forming a work based on the Program, and copy and 92 | distribute such modifications or work under the terms of Section 1 93 | above, provided that you also meet all of these conditions: 94 | 95 | a) You must cause the modified files to carry prominent notices 96 | stating that you changed the files and the date of any change. 97 | 98 | b) You must cause any work that you distribute or publish, that in 99 | whole or in part contains or is derived from the Program or any 100 | part thereof, to be licensed as a whole at no charge to all third 101 | parties under the terms of this License. 102 | 103 | c) If the modified program normally reads commands interactively 104 | when run, you must cause it, when started running for such 105 | interactive use in the most ordinary way, to print or display an 106 | announcement including an appropriate copyright notice and a 107 | notice that there is no warranty (or else, saying that you provide 108 | a warranty) and that users may redistribute the program under 109 | these conditions, and telling the user how to view a copy of this 110 | License. (Exception: if the Program itself is interactive but 111 | does not normally print such an announcement, your work based on 112 | the Program is not required to print an announcement.) 113 | 114 | These requirements apply to the modified work as a whole. If 115 | identifiable sections of that work are not derived from the Program, 116 | and can be reasonably considered independent and separate works in 117 | themselves, then this License, and its terms, do not apply to those 118 | sections when you distribute them as separate works. But when you 119 | distribute the same sections as part of a whole which is a work based 120 | on the Program, the distribution of the whole must be on the terms of 121 | this License, whose permissions for other licensees extend to the 122 | entire whole, and thus to each and every part regardless of who wrote it. 123 | 124 | Thus, it is not the intent of this section to claim rights or contest 125 | your rights to work written entirely by you; rather, the intent is to 126 | exercise the right to control the distribution of derivative or 127 | collective works based on the Program. 128 | 129 | In addition, mere aggregation of another work not based on the Program 130 | with the Program (or with a work based on the Program) on a volume of 131 | a storage or distribution medium does not bring the other work under 132 | the scope of this License. 133 | 134 | 3. You may copy and distribute the Program (or a work based on it, 135 | under Section 2) in object code or executable form under the terms of 136 | Sections 1 and 2 above provided that you also do one of the following: 137 | 138 | a) Accompany it with the complete corresponding machine-readable 139 | source code, which must be distributed under the terms of Sections 140 | 1 and 2 above on a medium customarily used for software interchange; or, 141 | 142 | b) Accompany it with a written offer, valid for at least three 143 | years, to give any third party, for a charge no more than your 144 | cost of physically performing source distribution, a complete 145 | machine-readable copy of the corresponding source code, to be 146 | distributed under the terms of Sections 1 and 2 above on a medium 147 | customarily used for software interchange; or, 148 | 149 | c) Accompany it with the information you received as to the offer 150 | to distribute corresponding source code. (This alternative is 151 | allowed only for noncommercial distribution and only if you 152 | received the program in object code or executable form with such 153 | an offer, in accord with Subsection b above.) 154 | 155 | The source code for a work means the preferred form of the work for 156 | making modifications to it. For an executable work, complete source 157 | code means all the source code for all modules it contains, plus any 158 | associated interface definition files, plus the scripts used to 159 | control compilation and installation of the executable. However, as a 160 | special exception, the source code distributed need not include 161 | anything that is normally distributed (in either source or binary 162 | form) with the major components (compiler, kernel, and so on) of the 163 | operating system on which the executable runs, unless that component 164 | itself accompanies the executable. 165 | 166 | If distribution of executable or object code is made by offering 167 | access to copy from a designated place, then offering equivalent 168 | access to copy the source code from the same place counts as 169 | distribution of the source code, even though third parties are not 170 | compelled to copy the source along with the object code. 171 | 172 | 4. You may not copy, modify, sublicense, or distribute the Program 173 | except as expressly provided under this License. Any attempt 174 | otherwise to copy, modify, sublicense or distribute the Program is 175 | void, and will automatically terminate your rights under this License. 176 | However, parties who have received copies, or rights, from you under 177 | this License will not have their licenses terminated so long as such 178 | parties remain in full compliance. 179 | 180 | 5. You are not required to accept this License, since you have not 181 | signed it. However, nothing else grants you permission to modify or 182 | distribute the Program or its derivative works. These actions are 183 | prohibited by law if you do not accept this License. Therefore, by 184 | modifying or distributing the Program (or any work based on the 185 | Program), you indicate your acceptance of this License to do so, and 186 | all its terms and conditions for copying, distributing or modifying 187 | the Program or works based on it. 188 | 189 | 6. Each time you redistribute the Program (or any work based on the 190 | Program), the recipient automatically receives a license from the 191 | original licensor to copy, distribute or modify the Program subject to 192 | these terms and conditions. You may not impose any further 193 | restrictions on the recipients' exercise of the rights granted herein. 194 | You are not responsible for enforcing compliance by third parties to 195 | this License. 196 | 197 | 7. If, as a consequence of a court judgment or allegation of patent 198 | infringement or for any other reason (not limited to patent issues), 199 | conditions are imposed on you (whether by court order, agreement or 200 | otherwise) that contradict the conditions of this License, they do not 201 | excuse you from the conditions of this License. If you cannot 202 | distribute so as to satisfy simultaneously your obligations under this 203 | License and any other pertinent obligations, then as a consequence you 204 | may not distribute the Program at all. For example, if a patent 205 | license would not permit royalty-free redistribution of the Program by 206 | all those who receive copies directly or indirectly through you, then 207 | the only way you could satisfy both it and this License would be to 208 | refrain entirely from distribution of the Program. 209 | 210 | If any portion of this section is held invalid or unenforceable under 211 | any particular circumstance, the balance of the section is intended to 212 | apply and the section as a whole is intended to apply in other 213 | circumstances. 214 | 215 | It is not the purpose of this section to induce you to infringe any 216 | patents or other property right claims or to contest validity of any 217 | such claims; this section has the sole purpose of protecting the 218 | integrity of the free software distribution system, which is 219 | implemented by public license practices. Many people have made 220 | generous contributions to the wide range of software distributed 221 | through that system in reliance on consistent application of that 222 | system; it is up to the author/donor to decide if he or she is willing 223 | to distribute software through any other system and a licensee cannot 224 | impose that choice. 225 | 226 | This section is intended to make thoroughly clear what is believed to 227 | be a consequence of the rest of this License. 228 | 229 | 8. If the distribution and/or use of the Program is restricted in 230 | certain countries either by patents or by copyrighted interfaces, the 231 | original copyright holder who places the Program under this License 232 | may add an explicit geographical distribution limitation excluding 233 | those countries, so that distribution is permitted only in or among 234 | countries not thus excluded. In such case, this License incorporates 235 | the limitation as if written in the body of this License. 236 | 237 | 9. The Free Software Foundation may publish revised and/or new versions 238 | of the General Public License from time to time. Such new versions will 239 | be similar in spirit to the present version, but may differ in detail to 240 | address new problems or concerns. 241 | 242 | Each version is given a distinguishing version number. If the Program 243 | specifies a version number of this License which applies to it and "any 244 | later version", you have the option of following the terms and conditions 245 | either of that version or of any later version published by the Free 246 | Software Foundation. If the Program does not specify a version number of 247 | this License, you may choose any version ever published by the Free Software 248 | Foundation. 249 | 250 | 10. If you wish to incorporate parts of the Program into other free 251 | programs whose distribution conditions are different, write to the author 252 | to ask for permission. For software which is copyrighted by the Free 253 | Software Foundation, write to the Free Software Foundation; we sometimes 254 | make exceptions for this. Our decision will be guided by the two goals 255 | of preserving the free status of all derivatives of our free software and 256 | of promoting the sharing and reuse of software generally. 257 | 258 | NO WARRANTY 259 | 260 | 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY 261 | FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN 262 | OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES 263 | PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED 264 | OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 265 | MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS 266 | TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE 267 | PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, 268 | REPAIR OR CORRECTION. 269 | 270 | 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING 271 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR 272 | REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, 273 | INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING 274 | OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED 275 | TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY 276 | YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER 277 | PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE 278 | POSSIBILITY OF SUCH DAMAGES. 279 | 280 | END OF TERMS AND CONDITIONS 281 | 282 | How to Apply These Terms to Your New Programs 283 | 284 | If you develop a new program, and you want it to be of the greatest 285 | possible use to the public, the best way to achieve this is to make it 286 | free software which everyone can redistribute and change under these terms. 287 | 288 | To do so, attach the following notices to the program. It is safest 289 | to attach them to the start of each source file to most effectively 290 | convey the exclusion of warranty; and each file should have at least 291 | the "copyright" line and a pointer to where the full notice is found. 292 | 293 | {description} 294 | Copyright (C) {year} {fullname} 295 | 296 | This program is free software; you can redistribute it and/or modify 297 | it under the terms of the GNU General Public License as published by 298 | the Free Software Foundation; either version 2 of the License, or 299 | (at your option) any later version. 300 | 301 | This program is distributed in the hope that it will be useful, 302 | but WITHOUT ANY WARRANTY; without even the implied warranty of 303 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 304 | GNU General Public License for more details. 305 | 306 | You should have received a copy of the GNU General Public License along 307 | with this program; if not, write to the Free Software Foundation, Inc., 308 | 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. 309 | 310 | Also add information on how to contact you by electronic and paper mail. 311 | 312 | If the program is interactive, make it output a short notice like this 313 | when it starts in an interactive mode: 314 | 315 | Gnomovision version 69, Copyright (C) year name of author 316 | Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. 317 | This is free software, and you are welcome to redistribute it 318 | under certain conditions; type `show c' for details. 319 | 320 | The hypothetical commands `show w' and `show c' should show the appropriate 321 | parts of the General Public License. Of course, the commands you use may 322 | be called something other than `show w' and `show c'; they could even be 323 | mouse-clicks or menu items--whatever suits your program. 324 | 325 | You should also get your employer (if you work as a programmer) or your 326 | school, if any, to sign a "copyright disclaimer" for the program, if 327 | necessary. Here is a sample; alter the names: 328 | 329 | Yoyodyne, Inc., hereby disclaims all copyright interest in the program 330 | `Gnomovision' (which makes passes at compilers) written by James Hacker. 331 | 332 | {signature of Ty Coon}, 1 April 1989 333 | Ty Coon, President of Vice 334 | 335 | This General Public License does not permit incorporating your program into 336 | proprietary programs. If your program is a subroutine library, you may 337 | consider it more useful to permit linking proprietary applications with the 338 | library. If this is what you want to do, use the GNU Lesser General 339 | Public License instead of this License. 340 | 341 | -------------------------------------------------------------------------------- /INSTALL.txt: -------------------------------------------------------------------------------- 1 | 已测试环境 : 2 | OS : CentOS 6.x x64 3 | DB : PostgreSQL 9.3.x 4 | Server : HP DL360 5 | HOST1 : 192.168.111.37 6 | HOST2 : 192.168.111.42 7 | VIPM : 192.168.111.130 8 | VIPS : 192.168.111.131 9 | HOST1 FENCE DEVICE : fence_ilo 10 | HOST2 FENCE DEVICE : fence_ilo 11 | GateWay IP : 192.168.111.1 12 | 13 | 配置, 测试fence设备 14 | FENCE DEVICE配置 (HOST1, HOST2) : 15 | IP : 192.168.111.37 ilo : 192.168.112.56 USER : digoal PWD : digoal_pwd 16 | IP : 192.168.111.42 ilo : 192.168.112.51 USER : digoal PWD : digoal_pwd 17 | 开启ipmi功能, 用户赋予ipmi可开关机的角色(OPERATOR 或 ADMINISTRATOR, 本例使用的是OPERATOR). 18 | 19 | OS配置(HOST1, HOST2) : 20 | 关闭acpi服务, 避免fence慢或者fence不成功的可能. 21 | chkconfig acpid off 22 | 23 | 增加yum源, 安装需要的包. (HOST1, HOST2) 24 | 视OS版本环境配置, 可能与以下不一致 25 | /etc/yum.repos.d/rhel-sky.repo 26 | [Cluster] 27 | name=Cluster Directory 28 | baseurl=http://192.168.164.38/rhel-server-5.4-i386/Cluster 29 | enabled=1 30 | gpgcheck=0 31 | 32 | [ClusterStorage] 33 | name=ClusterStorage Directory 34 | baseurl=http://192.168.164.38/rhel-server-5.4-i386/ClusterStorage 35 | enabled=1 36 | gpgcheck=0 37 | 38 | [Server] 39 | name=Server Directory 40 | baseurl=http://192.168.164.38/rhel-server-5.4-i386/Server 41 | enabled=1 42 | gpgcheck=0 43 | 44 | [VT] 45 | name=VT Directory 46 | baseurl=http://192.168.164.38/rhel-server-5.4-i386/VT 47 | enabled=1 48 | gpgcheck=0 49 | 50 | 安装需要的包.(HOST1, HOST2) 51 | yum -y install rsync coreutils glib2 lrzsz sysstat e4fsprogs xfsprogs ntp readline-devel zlib zlib-devel openssl openssl-devel pam-devel libxml2-devel libxslt-devel python-devel tcl-devel gcc make smartmontools flex bison perl perl-devel perl-ExtUtils* OpenIPMI-tools openldap openldap-devel cman logrotate 52 | 53 | 系统配置 54 | vi /etc/sysctl.conf (HOST1, HOST2) 55 | # add by digoal.zhou 56 | kernel.shmmax=135497418752 57 | kernel.shmmni = 4096 58 | kernel.sem = 50100 64128000 50100 1280 59 | fs.file-max = 7672460 60 | fs.aio-max-nr = 1048576 61 | net.ipv4.ip_local_port_range = 9000 65000 62 | net.core.rmem_default = 262144 63 | net.core.rmem_max = 4194304 64 | net.core.wmem_default = 262144 65 | net.core.wmem_max = 4194304 66 | net.ipv4.tcp_max_syn_backlog = 4096 67 | net.core.netdev_max_backlog = 10000 68 | net.ipv4.netfilter.ip_conntrack_max = 655360 69 | net.ipv4.tcp_timestamps = 0 70 | net.ipv4.tcp_tw_recycle=1 71 | net.ipv4.tcp_timestamps=1 72 | net.ipv4.tcp_keepalive_time = 72 73 | net.ipv4.tcp_keepalive_probes = 9 74 | net.ipv4.tcp_keepalive_intvl = 7 75 | vm.zone_reclaim_mode=0 76 | vm.dirty_background_bytes = 102400000 77 | vm.dirty_ratio = 80 78 | vm.dirty_expire_centisecs = 6000 79 | vm.dirty_writeback_centisecs = 50 80 | vm.swappiness=0 81 | vm.overcommit_memory = 0 82 | vm.overcommit_ratio = 90 83 | 84 | 85 | # sysctl -p 86 | 87 | vi /etc/security/limits.conf (HOST1, HOST2) 88 | # add by digoal.zhou 89 | * soft nofile 131072 90 | * hard nofile 131072 91 | * soft nproc 131072 92 | * hard nproc 131072 93 | * soft core unlimited 94 | * hard core unlimited 95 | * soft memlock 500000000 96 | * hard memlock 500000000 97 | 98 | 如果是centos 6.x的话还需要修改 /etc/security/limits.d/90-nproc.conf 99 | vi /etc/security/limits.d/90-nproc.conf 100 | #* soft nproc 1024 101 | #root soft nproc unlimited 102 | * soft nproc 131072 103 | * hard nproc 131072 104 | 105 | 同步时钟 (HOST1, HOST2) 106 | crontab -e 107 | 8 * * * * /usr/sbin/ntpdate asia.pool.ntp.org && /sbin/hwclock --systohc 108 | 109 | 配置DNS : (HOST1, HOST2) 110 | vi /etc/resolv.conf 111 | nameserver xxx.xxx.xxx.xxx 112 | 113 | 配置网络, 新增vipm和vips的接口配置 : (HOST1, HOST2) 114 | 注意子接口使用ONBOOT=no, ONPARENT=no 115 | eth0:1对应VIPM 116 | eth0:2对应VIPS 117 | 118 | cd /etc/sysconfig/network-scripts/ 119 | cp ifcfg-eth0 ifcfg-eth0:1 120 | cp ifcfg-eth0 ifcfg-eth0:2 121 | vi ifcfg-eth0:1 122 | 123 | cat /etc/sysconfig/network-scripts/ifcfg-eth0:1 124 | DEVICE=eth0:1 125 | ONBOOT=no 126 | ONPARENT=no 127 | BOOTPROTO=static 128 | HWADDR=D4:BE:D9:AD:9A:B6 129 | IPADDR=192.168.111.130 130 | NETMASK=255.255.255.0 131 | 132 | cat /etc/sysconfig/network-scripts/ifcfg-eth0:2 133 | DEVICE=eth0:2 134 | ONBOOT=no 135 | ONPARENT=no 136 | BOOTPROTO=static 137 | HWADDR=D4:BE:D9:AD:9A:B6 138 | IPADDR=192.168.111.131 139 | NETMASK=255.255.255.0 140 | 141 | 按需配置iptables (HOST1, HOST2) 142 | 允许node1,node2 相互访问PostgreSQL监听端口, 143 | vi /etc/sysconfig/iptables 144 | # 私有网段 145 | -A INPUT -s 192.168.0.0/16 -j ACCEPT 146 | -A INPUT -s 10.0.0.0/8 -j ACCEPT 147 | -A INPUT -s 172.16.0.0/16 -j ACCEPT 148 | 149 | 编译port_probe : (HOST1, HOST2): 150 | gcc -O3 -Wall -Wextra -Werror -g -o port_probe ./port_probe.c 151 | chmod 555 port_probe 152 | mv port_probe /usr/local/bin 153 | 测试port_probe是否正常: 154 | port_probe $node_ip $port 155 | 156 | 新增postgres用户 (HOST1, HOST2) 157 | useradd postgres 158 | 159 | 配置sudo命令 : (HOST1, HOST2) 160 | visudo -f /etc/sudoers 161 | # 注释 requiretty 162 | # Defaults requiretty 163 | # 末尾添加 164 | # add by digoal 165 | postgres ALL=(ALL) NOPASSWD: /sbin/ifup 166 | postgres ALL=(ALL) NOPASSWD: /sbin/ifdown 167 | postgres ALL=(ALL) NOPASSWD: /sbin/arping 168 | postgres ALL=(ALL) NOPASSWD: /bin/mount 169 | postgres ALL=(ALL) NOPASSWD: /bin/umount 170 | 171 | 配置postgres用户的ssh无密钥认证 (HOST1, HOST2) 172 | # vi /etc/ssh/sshd_config 173 | PubkeyAuthentication yes 174 | 175 | # service sshd restart 176 | 177 | # su - postgres 178 | $ ssh-keygen -t rsa 179 | 不要输入passphrase 180 | $ cd ~/.ssh 181 | $ 将 id_rsa.pub 内容拷贝到对方主机的postgres用户下的 ~/.ssh/authorized_keys 182 | $ chmod 600 ~/.ssh/authorized_keys 183 | 184 | # 验证无密码配置是否正确 185 | ssh 192.168.111.42 date 186 | Sun Jan 4 15:54:26 CST 2015 187 | 188 | ssh 192.168.111.37 date 189 | Sun Jan 4 15:54:26 CST 2015 190 | 191 | 192 | # PostgreSQL 安装 : (HOST1, HOST2) 193 | vi /home/postgres/.bash_profile 194 | # add by digoal 195 | export PS1="$USER@`/bin/hostname -s`-> " 196 | export PGPORT=1921 197 | export PGDATA=/opt/pg_root 198 | export LANG=en_US.utf8 199 | export PGHOME=/opt/pgsql 200 | export LD_LIBRARY_PATH=$PGHOME/lib:/lib64:/usr/lib64:/usr/local/lib64:/lib:/usr/lib:/usr/local/lib:$LD_LIBRARY_PATH 201 | export DATE=`date +"%Y%m%d%H%M"` 202 | export PATH=$PGHOME/bin:$PATH:. 203 | export MANPATH=$PGHOME/share/man:$MANPATH 204 | export PGHOST=$PGDATA 205 | export PGDATABASE=postgres 206 | alias rm='rm -i' 207 | alias ll='ls -lh' 208 | unalias vi 209 | 210 | # wget https://ftp.postgresql.org/pub/source/v9.3.5/postgresql-9.3.5.tar.bz2 211 | # tar -jxvf postgresql-9.3.5.tar.bz2 212 | # cd postgresql-9.3.5 213 | # ./configure --prefix=/opt/pgsql9.3.5 --with-pgport=1921 --with-perl --with-python --with-tcl --with-openssl --with-pam --with-ldap --with-libxml --with-libxslt --enable-thread-safety 214 | # gmake world 215 | # gmake install-world 216 | # ln -s /opt/pgsql9.3.5 /opt/pgsql 217 | 218 | 初始化数据库 : (HOST1, HOST2) 219 | 创建数据目录, 归档目录, 挂载对端归档目录的目录. 220 | mkdir /opt/pg_root 221 | mkdir /opt/arch 222 | mkdir /opt/peer_arch 223 | chown postgres:postgres /opt/pg_root 224 | chown postgres:postgres /opt/arch 225 | chown postgres:postgres /opt/peer_arch 226 | 227 | 注意: 228 | 在$PGDATA中用到的软链接必须一致, 例如 以下是不行的, 会导致数据库rsync后异常. 229 | 主机A /opt/pg_root/pg_xlog -> /pg_xlog 230 | 主机B /opt/pg_root/pg_xlog -> /data01/pg_xlog 231 | 主机A /opt/pg_root/pg_tblspc/12345 -> /pg_tbs1 232 | 主机B /opt/pg_root/pg_tblspc/12345 -> /data01/pg_tbs1 233 | 234 | 235 | 配置 NFS : (HOST1, HOST2) 236 | # vi /etc/exports 237 | /opt/arch 192.168.111.37/32(ro,no_root_squash,sync) 238 | /opt/arch 192.168.111.42/32(ro,no_root_squash,sync) 239 | # service nfs start 240 | # chkconfig nfs on 241 | 242 | 加载peer目录 (HOST1, HOST2) 243 | on HOST1: 244 | mount -t nfs -o tcp 192.168.111.42:/opt/arch /opt/peer_arch 245 | on HOST2: 246 | mount -t nfs -o tcp 192.168.111.37:/opt/arch /opt/peer_arch 247 | 248 | 添加到/etc/rc.local 249 | HOST1 250 | vi /etc/rc.local 251 | /bin/mount -t nfs -o tcp 192.168.111.42:/opt/arch /opt/peer_arch 252 | HOST2 253 | /bin/mount -t nfs -o tcp 192.168.111.37:/opt/arch /opt/peer_arch 254 | 255 | 256 | 257 | 初始化数据库 258 | su - postgres 259 | initdb -D $PGDATA -E UTF8 --locale=C -U postgres -W 260 | 261 | 配置流复制 : (HOST1) 262 | 263 | 配置 流复制HBA, 心跳HBA : 264 | 务必同时配置虚拟IP的连接心跳, 除非固定出口IP. 265 | cd $PGDATA 266 | vi pg_hba.conf 267 | host replication replica 192.168.111.37/32 md5 268 | host replication replica 192.168.111.42/32 md5 269 | host replication replica 192.168.111.130/32 md5 270 | host replication replica 192.168.111.131/32 md5 271 | host sky_pg_cluster sky_pg_cluster 192.168.111.37/32 md5 272 | host sky_pg_cluster sky_pg_cluster 192.168.111.42/32 md5 273 | host sky_pg_cluster sky_pg_cluster 192.168.111.130/32 md5 274 | host sky_pg_cluster sky_pg_cluster 192.168.111.131/32 md5 275 | host sky_pg_cluster sky_pg_cluster 127.0.0.1/32 md5 276 | # 其他 277 | host all all 0.0.0.0/0 md5 278 | 279 | 配置归档, hot_standby, sender 280 | 必须配置的项 : 281 | listen_addresses = '0.0.0.0' # what IP address(es) to listen on; 282 | port = 1921 # (change requires restart) 283 | tcp_keepalives_idle = 60 # TCP_KEEPIDLE, in seconds; 284 | tcp_keepalives_interval = 10 # TCP_KEEPINTVL, in seconds; 285 | tcp_keepalives_count = 10 # TCP_KEEPCNT; 286 | wal_level = hot_standby 287 | archive_mode = on 288 | archive_command = 'DIR=/opt/arch/`date +%F`; test ! -d $DIR && mkdir -p $DIR; chmod 755 $DIR; test ! -f $DIR/%f && cp %p $DIR/%f; chmod 755 $DIR/%f' # command to use to archive a logfile segment 289 | max_wal_senders = 10 # max number of walsender processes 290 | hot_standby = on # "on" allows queries during recovery 291 | wal_receiver_status_interval = 1s # send replies at least this often 292 | hot_standby_feedback = on # send info from standby to prevent 293 | 294 | 模板 295 | cd $PGDATA 296 | vi postgresql.conf 297 | listen_addresses = '0.0.0.0' # what IP address(es) to listen on; 298 | port = 1921 # (change requires restart) 299 | max_connections = 1000 # (change requires restart) 300 | superuser_reserved_connections = 13 # (change requires restart) 301 | unix_socket_directories = '.' # comma-separated list of directories 302 | unix_socket_permissions = 0700 # begin with 0 to use octal notation 303 | tcp_keepalives_idle = 60 # TCP_KEEPIDLE, in seconds; 304 | tcp_keepalives_interval = 10 # TCP_KEEPINTVL, in seconds; 305 | tcp_keepalives_count = 10 # TCP_KEEPCNT; 306 | shared_buffers = 2048MB # min 128kB 307 | maintenance_work_mem = 512MB # min 1MB 308 | vacuum_cost_delay = 10 # 0-100 milliseconds 309 | bgwriter_delay = 10ms # 10-10000ms between rounds 310 | wal_level = hot_standby # minimal, archive, or hot_standby 311 | synchronous_commit = off # synchronization level; 312 | wal_writer_delay = 10ms # 1-10000 milliseconds 313 | checkpoint_segments = 128 # in logfile segments, min 1, 16MB each 314 | archive_mode = on # allows archiving to be done 315 | archive_command = 'DIR=/opt/arch/`date +%F`; test ! -d $DIR && mkdir -p $DIR; chmod 755 $DIR; test ! -f $DIR/%f && cp %p $DIR/%f; chmod 755 $DIR/%f' # command to use to archive a logfile segment 316 | max_wal_senders = 10 # max number of walsender processes 317 | hot_standby = on # "on" allows queries during recovery 318 | wal_receiver_status_interval = 1s # send replies at least this often 319 | hot_standby_feedback = on # send info from standby to prevent 320 | effective_cache_size = 8192MB 321 | log_destination = 'csvlog' # Valid values are combinations of 322 | logging_collector = on # Enable capturing of stderr and csvlog 323 | log_directory = 'pg_log' # directory where log files are written, 324 | log_truncate_on_rotation = on # If on, an existing log file with the 325 | log_rotation_age = 1d # Automatic rotation of logfiles will 326 | log_rotation_size = 10MB # Automatic rotation of logfiles will 327 | log_checkpoints = on 328 | log_connections = on 329 | log_disconnections = on 330 | log_error_verbosity = verbose # terse, default, or verbose messages 331 | log_timezone = 'PRC' 332 | datestyle = 'iso, mdy' 333 | timezone = 'PRC' 334 | lc_messages = 'C' # locale for system error message 335 | lc_monetary = 'C' # locale for monetary formatting 336 | lc_numeric = 'C' # locale for number formatting 337 | lc_time = 'C' # locale for time formatting 338 | default_text_search_config = 'pg_catalog.english' 339 | 340 | 配置恢复文件, primary_conninfo中使用vipm连接 : 341 | $ cp /opt/pgsql/share/recovery.conf.sample $PGDATA/recovery.done 342 | $ chmod 700 $PGDATA/recovery.done 343 | $ vi $PGDATA/recovery.done 344 | restore_command = 'PEER_DIR=/opt/peer_arch; cp $PEER_DIR/*/%f %p' # e.g. 'cp /mnt/server/archivedir/%f %p' 345 | recovery_target_timeline = 'latest' 346 | standby_mode = on 347 | primary_conninfo = 'host=192.168.111.130 port=1921 user=replica keepalives_idle=60' # e.g. 'host=localhost port=5432' 348 | 349 | 350 | 配置密码文件 : (HOST1, HOST2) 351 | 流复制1条, 连接到VIPM 352 | 心跳5条, 连接到VIPM, VIPS, LOCAL, PEER_IP 353 | # su - postgres 354 | $ vi ~/.pgpass 355 | 192.168.111.130:1921:replication:replica:REPLICA321 356 | 192.168.111.130:1921:sky_pg_cluster:sky_pg_cluster:SKY_PG_cluster_321 357 | 192.168.111.131:1921:sky_pg_cluster:sky_pg_cluster:SKY_PG_cluster_321 358 | 192.168.111.37:1921:sky_pg_cluster:sky_pg_cluster:SKY_PG_cluster_321 359 | 192.168.111.42:1921:sky_pg_cluster:sky_pg_cluster:SKY_PG_cluster_321 360 | 127.0.0.1:1921:sky_pg_cluster:sky_pg_cluster:SKY_PG_cluster_321 361 | $ chmod 400 .pgpass 362 | 363 | 启动数据库, 添加replication数据库角色 : (HOST1) 364 | pg_ctl start 365 | psql postgres postgres 366 | create extension pg_stat_statements; 367 | create role replica nosuperuser nocreatedb nocreaterole noinherit replication connection limit 32 login encrypted password 'REPLICA321'; 368 | 369 | 启动VIPM接口 : (HOST1) 370 | sudo /sbin/ifup eth0:1 371 | 372 | 启动数据库 : (HOST1) 373 | pg_ctl start 374 | 375 | 376 | 配置心跳用户, 数据库, 表, 函数(HOST1) : 377 | 将记录时间延迟, 流复制延迟的信息到心跳表 : 378 | 379 | -- 建议使用superuser, 原因见http://blog.163.com/digoal@126/blog/static/163877040201331995623214/ 380 | create role sky_pg_cluster superuser nocreatedb nocreaterole noinherit login encrypted password 'SKY_PG_cluster_321'; 381 | create database sky_pg_cluster with template template0 encoding 'UTF8' owner sky_pg_cluster; 382 | \c sky_pg_cluster sky_pg_cluster 383 | create schema sky_pg_cluster authorization sky_pg_cluster; 384 | create table cluster_status (id int unique default 1, last_alive timestamp(0) without time zone, rep_lag int8); 385 | 386 | -- 限制cluster_status表有且只有一行 : 387 | CREATE FUNCTION cannt_delete () 388 | RETURNS trigger 389 | LANGUAGE plpgsql AS $$ 390 | BEGIN 391 | RAISE EXCEPTION 'You can not delete!'; 392 | END; $$; 393 | 394 | CREATE TRIGGER cannt_delete 395 | BEFORE DELETE ON cluster_status 396 | FOR EACH ROW EXECUTE PROCEDURE cannt_delete(); 397 | 398 | CREATE TRIGGER cannt_truncate 399 | BEFORE TRUNCATE ON cluster_status 400 | FOR STATEMENT EXECUTE PROCEDURE cannt_delete(); 401 | 402 | -- 插入初始数据 403 | insert into cluster_status values (1, now(), 9999999999); 404 | 405 | -- 创建测试函数, 用于测试数据库是否正常, 包括所有表空间的测试 406 | -- (注意原来的函数使用alter table set tablespace来做测试, 产生了较多的xlog, 同时需要排他锁, 现在改成update). 407 | -- 使用update不同的表空间中的数据, 并不能立刻反应表空间的问题. 因为大多数数据在shared_buffer中. 408 | -- 如果表空间对应的文件系统io有问题, 那么在checkpoint时会产生58类的错误. 409 | -- 使用pg_stat_file函数可以立刻暴露io的问题. 410 | create or replace function cluster_keepalive_test(i_peer_ip inet) returns void as $$ 411 | declare 412 | v_spcname text; 413 | v_spcoid oid; 414 | v_nspname name := 'sky_pg_cluster'; 415 | v_rep_lag int8; 416 | v_t timestamp without time zone; 417 | begin 418 | if ( pg_is_in_recovery() ) then 419 | raise notice 'this is standby node.'; 420 | return; 421 | end if; 422 | select pg_xlog_location_diff(pg_current_xlog_insert_location(),sent_location) into v_rep_lag from pg_stat_replication where client_addr=i_peer_ip; 423 | if found then 424 | -- standby 已启动 425 | update cluster_status set last_alive=now(), rep_lag=v_rep_lag; 426 | else 427 | -- standby 未启动 428 | update cluster_status set last_alive=now(); 429 | end if; 430 | 431 | -- 临时禁止检测表空间, return 432 | return; 433 | 434 | -- 表空间相关心跳检测1分钟一次, 减轻更新压力 435 | FOR v_spcname,v_spcoid IN 436 | select spcname,oid from pg_tablespace where spcname <> 'pg_global' 437 | LOOP 438 | perform 1 from pg_class where 439 | ( reltablespace=v_spcoid or reltablespace=0 ) 440 | and relname='t_'||v_spcname 441 | and relkind='r' 442 | and relnamespace=(select oid from pg_namespace where nspname=v_nspname) 443 | limit 1; 444 | if not found then 445 | execute 'create table '||v_nspname||'.t_'||v_spcname||' (crt_time timestamp) tablespace '||v_spcname; 446 | execute 'insert into '||v_nspname||'.t_'||v_spcname||' values ('''||now()||''')'; 447 | perform pg_stat_file(pg_relation_filepath(v_nspname||'.t_'||v_spcname)); 448 | else 449 | execute 'update '||v_nspname||'.t_'||v_spcname||' set crt_time='||''''||now()||''' where now()-crt_time> interval ''1 min'' returning crt_time' into v_t; 450 | if v_t is not null then 451 | perform pg_stat_file(pg_relation_filepath(v_nspname||'.t_'||v_spcname)); 452 | end if; 453 | end if; 454 | END LOOP; 455 | end; 456 | $$ language plpgsql strict; 457 | -- 在创建测试函数后, 最好测试一下是否正常, 因为某些版本的系统表可能不通用, 需要调整. 458 | -- 9.2和9.3是没有问题的. 459 | 460 | 461 | 复制主库, 创建peer节点 standby : (HOST2) 462 | su - postgres 463 | pg_basebackup -D $PGDATA -F p -h 192.168.111.130 -p 1921 -U replica 464 | cd $PGDATA 465 | mv recovery.done recovery.conf 466 | 467 | 468 | 启动standby : (HOST2) 469 | su - postgres 470 | pg_ctl start 471 | 正常的话, node1上面会多一个sender进程, node2上面会多一个receiver进程 : 472 | postgres: wal sender process replica 192.168.111.130(45020) streaming 0/4047700 473 | postgres: wal receiver process streaming 0/4047700 474 | 475 | 476 | 477 | 478 | 479 | 配置HA脚本, nagios脚本 (HOST1, HOST2) : 480 | 1. /usr/local/bin/sky_pg_cluster.sh 481 | 2. /usr/local/nagios/libexec/check_sky_pg_cluster_alive.sh 482 | 3. /usr/local/nagios/libexec/check_standby_lag.sh 483 | chmod 555 /usr/local/bin/sky_pg_cluster.sh 484 | chmod 555 /usr/local/nagios/libexec/check_sky_pg_cluster_alive.sh 485 | chmod 555 /usr/local/nagios/libexec/check_standby_lag.sh 486 | 487 | 配置日志清理crontab (HOST1, HOST2) : 488 | # vi /etc/logrotate.d/sky_pg_cluster 489 | /tmp/sky_pg_clusterd.log 490 | { 491 | size 10M 492 | create 493 | start 10 494 | rotate 4 495 | compress 496 | copytruncate 497 | } 498 | 499 | # which logrotate 500 | /usr/sbin/logrotate 501 | 配置为每小时执行一次rotate, 当日志文件大于10M时压缩rotate 502 | # crontab -e 503 | 1 * * * * /usr/sbin/logrotate /etc/logrotate.d/sky_pg_cluster 504 | 505 | 配置HA脚本自启动 506 | chmod +x /etc/rc.d/rc.local 507 | vi /etc/rc.d/rc.local 508 | su - postgres -c "/usr/local/bin/sky_pg_cluster.sh start" 509 | 510 | 511 | 启动HA(按步骤) : (HOST1, HOST2) 512 | 1. HOST1 HOST2 同时执行 : 513 | su - postgres -c "/usr/local/bin/sky_pg_cluster.sh stop" 514 | 515 | 为了测试方便, 初次启动前, 可以手工先启动VIP和postgresql. 然后再启动以下脚本. 516 | HOST1, primary 517 | ifup eth0:1 518 | pg_ctl start 519 | 520 | HOST2, standby 521 | ifup eth0:2 522 | pg_ctl start 523 | 实际生产时, 可以不用以上步骤, 只是启动时间可能较长, 因为要执行keepalived 524 | 525 | 2. HOST1 HOST2 同时执行 : 526 | su - postgres -c "/usr/local/bin/sky_pg_cluster.sh start" 527 | 3. 检查sky_pg_cluster.sh是否正常启动 528 | ps -ewf|grep sky_pg_cluster.sh 529 | 530 | 配置nagios : (HOST1, HOST2) 531 | 监控如下项 : 532 | 1. 端口的监控在nagios服务端配置 533 | HOST1 IP+PORT 534 | HOST2 IP+PORT 535 | VIPM IP+port 536 | VIPS IP+port 537 | 538 | 2. 以下脚本的调用配置在 /usr/local/nagios/etc/nrpe.cfg 539 | sky_pg_clusterd是否存活 (/usr/local/nagios/libexec/check_sky_pg_cluster_alive.sh) 540 | standby同步延时监控 (/usr/local/nagios/libexec/check_standby_lag.sh) 541 | vi /usr/local/nagios/etc/nrpe.cfg 542 | command[check_sky_pg_clusterd]=/usr/local/nagios/libexec/check_sky_pg_cluster_alive.sh 543 | command[check_standby_lag]=/usr/local/nagios/libexec/check_standby_lag.sh 544 | 545 | service xinetd restart 546 | 547 | 注意, 这些nagios监控脚本务必使用postgres数据库启动用户调用. (修改nrpe user, group) 548 | vi /etc/xinetd.d/nrpe 549 | # default: on 550 | # description: NRPE (Nagios Remote Plugin Executor) 551 | service nrpe 552 | { 553 | flags = REUSE 554 | socket_type = stream 555 | port = 5666 556 | wait = no 557 | user = postgres 558 | group = postgres 559 | server = /usr/local/nagios/bin/nrpe 560 | server_args = -c /usr/local/nagios/etc/nrpe.cfg --inetd 561 | log_on_failure += USERID 562 | disable = no 563 | only_from = 127.0.0.1 192.168.1.111 其他略 564 | log_type = FILE /dev/null 565 | } 566 | 567 | service xinetd restart 568 | 569 | 570 | 测试failover : 571 | 1. 模拟网络故障 572 | 1.1 单一主机网络故障 573 | 1.2 双主机同时发生网络故障 574 | 575 | 2. 模拟数据库故障 576 | 2.1 单主机数据库故障 577 | 2.2 双主机同时发生数据库故障 578 | 579 | 580 | # Author : Digoal zhou 581 | # Email : digoal@126.com 582 | # Blog : http://blog.163.com/digoal@126/ 583 | -------------------------------------------------------------------------------- /sky_pg_cluster.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # 需要调试时, 取消set -x注释 4 | # set -x 5 | 6 | . /etc/profile 7 | . /home/postgres/.bash_profile 8 | 9 | # 配置, node1,node2 可能不一致, psql, pg_ctl等命令必须包含在PATH中. 10 | export PGHOME=/opt/pgsql 11 | export LANG=en_US.utf8 12 | export LD_LIBRARY_PATH=$PGHOME/lib:/lib64:/usr/lib64:/usr/local/lib64:/lib:/usr/lib:/usr/local/lib:$LD_LIBRARY_PATH 13 | export DATE=`date +"%Y%m%d%H%M"` 14 | export PATH=$PGHOME/bin:/bin:/sbin:$PATH:. 15 | export PGDATA=/opt/pg_root 16 | 17 | # 配置, node1,node2 可能不一致, 18 | # 配置本节点是否允许为master, 19 | # 可用于主备主机硬件相差悬殊的场景.固定一台主机为主节点, (即另一台主机m_s后, 发现对端正常了, 主动让位) 20 | # true表示本地可以为主节点, 如果两台主机都可以为主, 那都配置为true 21 | # 如果有一台不能为主, 就把那台配置为false 22 | # 注意必须保证有一台是true的. 23 | CAN_MASTER="true" 24 | 25 | # 配置是否等待到低峰开始degrade的同步 26 | RSYNC_WAIT="true" 27 | # 等待到时间点格式 date +%H 28 | RSYNC_UNTIL="00" 29 | 30 | # checkmaster和checkstandby里面的检查次数 31 | CHECK_TIMES=5 32 | 33 | # 时间秒, 字节数 34 | # m_s 判断standby延迟, 合理则释放vips 35 | PROMOTE_RELEASE_VIPS="400 80192000" 36 | # standby 自检, 注意这个延迟时间必须大于checkmaster的超时时间. 检查主备延迟, 判断是否适合激活数据库 37 | PROMOTE_STANDBY_SELF="600 160192000" 38 | # 检查standby是否允许激活, 如果允许激活, 释放VIPM, 等对方切换为m_s 39 | PROMOTE_CANNOT_MASTER="400 80192000" 40 | 41 | # 配置, node1,node2 可能不一致, 42 | # 并且需配置.pgpass存储VIPM, VIPS, LOCAL 心跳用户 密码校验信息. 43 | # 存储VIPM 流复制用户 密码校验信息 44 | # 网关IP, 用于arping检测本地网络是否正常, 如果没有网关, 使用一个广播域内的第三方IP也可行. 45 | VIP_IF=eth0 46 | VIPM_IF=eth0:1 47 | VIPS_IF=eth0:2 48 | VIPM_IP=192.168.111.130 49 | VIPS_IP=192.168.111.131 50 | GATEWAY_IP=192.168.111.1 51 | 52 | # 配置, node1,node2 不一致, 配置(对方)节点的物理IP, 以及fence设备地址和用户密码 53 | PEER_IP=192.168.111.42 54 | FENCE_IP=192.168.112.51 55 | FENCE_USER=digoal 56 | FENCE_PWD="digoal_pwd" 57 | 58 | # 数据库心跳用户, 库名配置 59 | PGUSER=sky_pg_cluster 60 | PGDBNAME=sky_pg_cluster 61 | 62 | # 本地心跳连接配置 63 | LOCAL_IP=127.0.0.1 64 | PGPORT=1921 65 | 66 | # 归档和PEER归档目录, 注意规定postgresql.conf -> archive_command归档命令使用ARCH/$DATA/这样的格式 67 | LOCAL_ARCH_DIR="/opt/arch" 68 | PEER_ARCH_DIR="/opt/peer_arch" 69 | 70 | # 日志输出 71 | NAGIOS_LOG="/tmp/sky_pg_clusterd.log" 72 | 73 | # 脚本名, 用于停止脚本, 必须与脚本名一致 74 | SUB_NAME="$(basename $BASH_SOURCE)" 75 | 76 | # 取消alias, 防止异常 77 | unalias ifup ifdown arping mount umount timeout 78 | 79 | # sudo 命令绝对路径 80 | S_IFUP="`which ifup`" 81 | S_IFDOWN="`which ifdown`" 82 | S_ARPING="`which arping`" 83 | S_MOUNT="`which mount`" 84 | S_UMOUNT="`which umount`" 85 | 86 | # 依赖命令 87 | DEP_CMD="sudo ifup ifdown arping mount umount port_probe pg_ctl psql ipmitool rsync fence_ilo timeout" 88 | 89 | # 9.0 使用触发器文件 90 | # TRIG_FILE='/data01/pgdata/pg_root/.1921.trigger' 91 | 92 | # 检测所有需要用到的命令是否存在 93 | which $DEP_CMD 94 | if [ $? -ne 0 ]; then 95 | echo -e "dep commands: $DEP_CMD not exist." 96 | exit 1 97 | fi 98 | 99 | # 检测当前角色, 通过recovery.xx检查, 记录到变量 100 | if [ -f $PGDATA/recovery.conf ]; then 101 | LOCAL_ROLE="standby" 102 | else 103 | LOCAL_ROLE="master" 104 | fi 105 | echo "this is $LOCAL_ROLE" 106 | 107 | 108 | # 函数 109 | 110 | # 检测IP是否已被其他主机启动, 返回0表示IP已启动. 111 | # 同时通过判断网关ARP返回, 可以用于判断本地网络是否正常. 112 | ipscan() { 113 | ETH=$1 114 | IP=$2 115 | echo "`date +%F%T` detecting $ETH $IP exists, ps: return 0 exist, other not exist." 116 | CN=`sudo $S_ARPING -b -c 5 -w 1 -f -I $ETH $IP|grep response|awk '{print $2}'` 117 | if [ $CN -eq 1 ]; then 118 | return 0 119 | else 120 | return 1 121 | fi 122 | } 123 | 124 | # 启动虚拟IP, 返回0 成功 125 | ifup_vip() { 126 | # 无限启动vip 127 | IF=$1 128 | for ((m=1;m>0;m=1)) 129 | do 130 | echo -e "`date +%F%T` $IF if upping. $m." 131 | sudo $S_IFUP $IF 132 | if [ $? -eq 0 ]; then 133 | echo -e "`date +%F%T` $IF upped success." 134 | break 135 | fi 136 | sleep 1 137 | done 138 | return 0 139 | } 140 | 141 | # 通过BMC接口关闭主机 142 | # 返回0成功, 其他不成功 143 | fence() { 144 | # 无限fence, 加参数 force, 加其他参数不强制fence 145 | # ipmitool -I lanplus -L OPERATOR -H $IP -U $USER -P $PWD power reset 146 | # fence_rsa -a $IP -l $USER -p $PWD -o reboot 147 | # fence_ilo -a $IP -l $USER -p $PWD -o reboot 148 | IP=$1 149 | USER=$2 150 | PWD=$3 151 | F_METHOD=$4 152 | EXCMD="fence_ilo -a $IP -l $USER -p $PWD -o reboot" 153 | if [ $F_METHOD == "force" ]; then 154 | echo "`date +%F%T` force fenceing, waiting..." 155 | for ((m=1;m>0;m++)) 156 | do 157 | $EXCMD 158 | if [ $? -eq 0 ]; then 159 | break 160 | else 161 | sleep 1 162 | fi 163 | done 164 | else 165 | echo "`date +%F%T` normal fenceing, waiting..." 166 | $EXCMD 167 | # 返回fence成功与否 168 | return $? 169 | fi 170 | return 0 171 | } 172 | 173 | 174 | # 将备数据库激活为主库 175 | promote() { 176 | # 修改recovery.conf, 注释restore_command 177 | sed -i -e 's/^restore_command/#digoal_restore_command/' $PGDATA/recovery.conf 178 | # 停库 179 | pg_ctl stop -m fast -w -t 60000 180 | # 启动数据库 181 | pg_ctl start -w -t 60000 182 | 183 | # 开始 promote 184 | echo "`date +%F%T` promoting database ..." 185 | pg_ctl promote 186 | # PostgreSQL 9.0 不能使用pg_ctl promote 187 | # touch $TRIG_FILE 188 | 189 | # 等待激活成功后返回 190 | SQL="set client_min_messages=warning; select 'this_is_primary' as res where not pg_is_in_recovery();" 191 | for ((m=1;m>0;m++)) 192 | do 193 | echo "`date +%F%T` testing promote status" 194 | LAG=`echo $SQL | psql -h $LOCAL_IP -p $PGPORT -U $PGUSER -d $PGDBNAME -f - | grep -c this_is_primary` 195 | if [ $LAG -eq 1 ]; then 196 | echo "`date +%F%T` promote success." 197 | # 还原修改recovery.done, 取消注释restore_command 198 | sed -i -e 's/^#digoal_restore_command/restore_command/' $PGDATA/recovery.done 199 | 200 | # 创建检查点, 切换xlog 201 | psql -h $LOCAL_IP -p $PGPORT -U $PGUSER -d $PGDBNAME -c "checkpoint" 202 | psql -h $LOCAL_IP -p $PGPORT -U $PGUSER -d $PGDBNAME -c "select pg_switch_xlog()" 203 | psql -h $LOCAL_IP -p $PGPORT -U $PGUSER -d $PGDBNAME -c "checkpoint" 204 | psql -h $LOCAL_IP -p $PGPORT -U $PGUSER -d $PGDBNAME -c "select pg_switch_xlog()" 205 | psql -h $LOCAL_IP -p $PGPORT -U $PGUSER -d $PGDBNAME -c "checkpoint" 206 | # 退出循环 207 | break 208 | else 209 | echo "`date +%F%T` promoting..." 210 | sleep 1 211 | fi 212 | done 213 | 214 | return 0 215 | } 216 | 217 | # 主库降级成备库 218 | degrade() { 219 | DATE=`date +%F%T` 220 | 221 | # 停库, rsync pg_root以及表空间,重命名recovery.done,启动数据库 222 | # 需要打通主备数据库的postgres用户ssh认证 223 | echo "`date +%F%T` degrading database ..." 224 | pg_ctl stop -m fast -w -t 60000 225 | 226 | # 等到凌晨开始同步, 请改成您所在系统的低谷再开始 227 | if [ $RSYNC_WAIT == "true" ]; then 228 | for ((i=1;i>0;i=1)) 229 | do 230 | HOU="`date +%H`" 231 | if [ $HOU == $RSYNC_UNTIL ]; then 232 | echo $HOU 233 | echo "ok, then start rsync." 234 | break 235 | fi 236 | echo "waiting to HOUR: $RSYNC_UNTIL , then start rsync." 237 | sleep 10 238 | done 239 | fi 240 | 241 | # 从主节点同步数据库pg_root 242 | # 注意必须遵循结构: $PGDATA中只有pg_xlog可以为软链接, 表空间目录中的表空间可以为软链接. 243 | # 其他不能是软链接. 244 | # 不是很严谨, 以后修改 245 | for ((m=1;m>0;m++)) 246 | do 247 | # 开启pg_start_backup 248 | psql -h $VIPM_IP -p $PGPORT -U $PGUSER -d $PGDBNAME -c "select pg_start_backup(now()::text)" 249 | if [ $? -eq 0 ]; then 250 | # 开始rsync 251 | # 如果可以的话, 建议在同步$PGDATA目录时, 尝试排除pg_xlog目录的同步(假设pg_xlog不是软链接的情况) 252 | rsync -a --bwlimit=80000 --delete --delete-before $PEER_IP:$PGDATA/ $PGDATA/ 253 | rsync -a --bwlimit=80000 --delete --delete-before $PEER_IP:$PGDATA/pg_xlog/ $PGDATA/pg_xlog/ 254 | chown -R postgres:postgres $PGDATA 255 | chmod -R 700 $PGDATA 256 | for file in `ls $PGDATA/pg_tblspc` 257 | do 258 | rsync -a --bwlimit=80000 --delete --delete-before $PEER_IP:$PGDATA/pg_tblspc/$file/ $PGDATA/pg_tblspc/$file/ 259 | chown -R postgres:postgres $PGDATA/pg_tblspc/$file/* 260 | chmod -R 700 $PGDATA/pg_tblspc/$file/* 261 | done 262 | break 263 | else 264 | sleep 1 265 | continue 266 | fi 267 | done 268 | 269 | # rsync结束 270 | psql -h $VIPM_IP -p $PGPORT -U $PGUSER -d $PGDBNAME -c "select pg_stop_backup()" 271 | 272 | # 重命名recovery.done 273 | mv $PGDATA/recovery.done $PGDATA/recovery.conf 274 | 275 | pg_ctl start -w -t 60000 276 | # 返回数据库是否启动成功 277 | return $? 278 | } 279 | 280 | 281 | # 备节点延迟判断, 0表示允许激活, 其他表示不允许激活 282 | # 第一个参数时间秒, 第二个参数字节数 283 | enable_promote() { 284 | SEC=$1 285 | BYTE=$2 286 | echo "`date +%F%T` detecting standby enable promote for lag testing..." 287 | SQL="set client_min_messages=warning; select 'standby_in_allowed_lag' as cluster_lag from cluster_status where now()-last_alive < interval '$SEC second' and rep_lag<=$BYTE;" 288 | # 连接到本地数据库查询延迟 289 | LAG=`echo $SQL | psql -h $LOCAL_IP -p $PGPORT -U $PGUSER -d $PGDBNAME -f - | grep -c standby_in_allowed_lag` 290 | if [ $LAG -eq 1 ]; then 291 | return 0 292 | fi 293 | return 1 294 | } 295 | 296 | # 心跳, 返回0正常, 其他不正常. 297 | keepalive() { 298 | # 区分主库和本地IP, 调用心跳函数 299 | 300 | # 检查主节点时, 使用vipm_ip 301 | # 检查本地节点时, 使用local_ip 302 | DEST_IP=$1 303 | 304 | # 写入心跳数据 305 | SQL="select cluster_keepalive_test('$PEER_IP');" 306 | timeout 60 psql -h $DEST_IP -p $PGPORT -U $PGUSER -d $PGDBNAME -c "$SQL" 307 | # 再给30次机会尝试, 例如数据库负载较高时可能返回异常 308 | if [ $? -ne 0 ]; then 309 | sleep 2 310 | for ((m=1;m<31;m++)) 311 | do 312 | timeout 2400 psql -h $DEST_IP -p $PGPORT -U $PGUSER -d $PGDBNAME -c "$SQL" 313 | if [ $? -eq 0 ]; then 314 | return 0 315 | else 316 | echo "keepalive falied times : $m" 317 | sleep 10 318 | fi 319 | done 320 | return 1 321 | else 322 | return 0 323 | fi 324 | } 325 | 326 | 327 | # 检查主库状态, 返回0正常, 其他不正常 328 | # 异常超过$1秒返回异常 329 | checkmaster() { 330 | TIMEOUT=$1 331 | echo "`date +%F%T` $FUNCNAME." 332 | for ((m=1;m>0;m++)) 333 | do 334 | sleep 1 335 | echo "$FUNCNAME check times: $m" 336 | # 检测vipm是否正常 337 | ipscan $VIP_IF $VIPM_IP 338 | RET=$? 339 | # 超时返回 340 | if [ $m -ge $TIMEOUT ] && [ $RET -ne 0 ]; then 341 | echo "$FUNCNAME ipscan timeout: $TIMEOUT" 342 | return 1 343 | # 网络异常, 但是未超时继续检测 344 | elif [ $RET -ne 0 ]; then 345 | continue 346 | fi 347 | 348 | # 检查数据库监听 349 | port_probe $VIPM_IP $PGPORT 350 | RET=$? 351 | if [ $m -ge $TIMEOUT ] && [ $RET -ne 0 ]; then 352 | echo "$FUNCNAME port_probe timeout: $TIMEOUT" 353 | return 1 354 | # 数据库监听异常, 但是未超时继续检测 355 | elif [ $RET -ne 0 ]; then 356 | continue 357 | fi 358 | 359 | # 全部正常, 退出循环 360 | break 361 | done 362 | 363 | # 全部正常, 返回0 364 | return 0 365 | } 366 | 367 | # 检查备库状态, 返回0正常, 其他不正常 368 | # 异常超过$1秒返回异常 369 | checkstandby() { 370 | TIMEOUT=$1 371 | echo "`date +%F%T` $FUNCNAME." 372 | for ((m=1;m>0;m++)) 373 | do 374 | sleep 1 375 | echo "$FUNCNAME check times: $m" 376 | 377 | # 检测vips是否正常 378 | ipscan $VIP_IF $VIPS_IP 379 | RET=$? 380 | # VIPS异常, 同时超时, 返回异常 381 | if [ $m -ge $TIMEOUT ] && [ $RET -ne 0 ]; then 382 | echo "$FUNCNAME ipscan timeout: $TIMEOUT" 383 | return 1 384 | # VIPS异常, 但是未超时, 继续检测 385 | elif [ $RET -ne 0 ]; then 386 | continue 387 | fi 388 | 389 | # 检查数据库监听 390 | port_probe $VIPS_IP $PGPORT 391 | RET=$? 392 | # 数据库监听异常, 同时超时, 返回异常 393 | if [ $m -ge $TIMEOUT ] && [ $RET -ne 0 ]; then 394 | echo "$FUNCNAME port_probe timeout: $TIMEOUT" 395 | return 1 396 | # 数据库监听异常, 但是未超时, 继续检测 397 | elif [ $RET -ne 0 ]; then 398 | continue 399 | fi 400 | 401 | # 全部正常, 退出循环 402 | break 403 | done 404 | 405 | # 全部正常, 返回0 406 | return 0 407 | } 408 | 409 | # 本地IP地址检查, 检查IP是否启动, 返回0表示已启动 410 | # 用于检查子接口地址是否启动 411 | ipaddrscan() { 412 | S_IF=$1 413 | S_IP=$2 414 | echo "`date +%F%T` detecting $S_IP address up on $S_IF ...." 415 | CNT=`ip addr show dev $S_IF|grep -c "${S_IP}/"` 416 | if [ $CNT -eq 1 ]; then 417 | return 0 418 | fi 419 | return 1 420 | } 421 | 422 | 423 | 424 | start() { 425 | # 初始化............................................................................. 426 | # 根据角色, 进入初始化流程 427 | 428 | keepalive $VIPM_IP 429 | if [ $? -ne 0 ]; then 430 | # 避免standby激活的时间超过服务器启动时间, 设置一个初始化延迟 431 | echo "`date +%F%T` sleep, waiting other host promoting..." 432 | sleep 45 433 | fi 434 | 435 | # 加载peer归档文件 436 | # 如果对端节点未启动, 会卡在这里 437 | # 共享存储不需要配置这个 438 | sudo $S_MOUNT -t nfs -o tcp $PEER_IP:$LOCAL_ARCH_DIR $PEER_ARCH_DIR 439 | 440 | echo "`date +%F%T` this is $LOCAL_ROLE" 441 | 442 | if [ $LOCAL_ROLE == "standby" ]; then 443 | # 判断数据库是否已启动 444 | port_probe $LOCAL_IP $PGPORT 445 | if [ $? -ne 0 ]; then 446 | # 启动数据库 447 | pg_ctl start -w -t 60000 448 | if [ $? -ne 0 ]; then 449 | # 数据库启动不成功, 退出脚本 450 | echo "startup standby db failed." 451 | exit 1 452 | fi 453 | else 454 | echo "database is already startup." 455 | fi 456 | fi 457 | 458 | if [ $LOCAL_ROLE == "master" ]; then 459 | # 判断数据库是否已启动 460 | port_probe $LOCAL_IP $PGPORT 461 | if [ $? -ne 0 ]; then 462 | # 判断VIPM是否已被其他节点启动, 463 | # 如果VIPM已被其他节点启动表明本地节点应该degrade, 所以不需要启动数据库 464 | ipscan $VIP_IF $VIPM_IP 465 | if [ $? -ne 0 ]; then 466 | # -> 启动数据库 467 | pg_ctl start -w -t 60000 468 | if [ $? -ne 0 ]; then 469 | # 数据库启动不成功, 退出脚本 470 | echo "startup master db failed." 471 | exit 1 472 | fi 473 | else 474 | echo "Other host already startup vipm, this node should degrade to standby." 475 | fi 476 | else 477 | echo "database is already startup." 478 | fi 479 | fi 480 | 481 | 482 | # 初始化启动vip 483 | # local_role=standby 484 | # if vips up -> (等待, ifup vips) -> end if 485 | # if vips down -> ifup vips -> end if 486 | 487 | # if vipm down -> 每秒检测vipm状况 488 | # -> if wait<600s && vipm down 继续检测 489 | # -> if wait>=600s && vipm down -> fence 对方 -> ifup vipm, vips -> promote -> 本地角色更新为master+standby 490 | # -> if wait<600s && vipm up -> 退出循环 491 | 492 | if [ $LOCAL_ROLE == "standby" ]; then 493 | # 无限启动vips 494 | for ((m=1;m>0;m=1)) 495 | do 496 | # 判断vips是否已被其他主机启动 497 | ipscan $VIP_IF $VIPS_IP 498 | RET=$? 499 | # 如果vips未启动, 启动vips, 并退出循环 500 | if [ $RET -ne 0 ]; then 501 | echo "`date +%F%T` if upping vips." 502 | ifup_vip $VIPS_IF 503 | break 504 | else 505 | # 无限循环, 等待其他主机vips释放 506 | echo "`date +%F%T` waiting vips released by other host." 507 | sleep 1 508 | fi 509 | done 510 | fi 511 | 512 | # local_role=master -> 513 | # if vipm down -> ifup vipm 514 | # if vips down -> ifup vips -> 本地角色更新为master+standby 515 | # if vipm up -> copy ctl from 对方 -> mv recovery.done to recover.conf -> mv pg_xlog to old_pg_xlog -> restart db -> (循环ifup vips) -> 本地角色更新为standby 516 | 517 | if [ $LOCAL_ROLE == "master" ]; then 518 | # 无限启动vipm 519 | # 判断vipm是否已被其他主机启动 520 | ipscan $VIP_IF $VIPM_IP 521 | RET=$? 522 | # 如果vipm未启动 523 | if [ $RET -ne 0 ]; then 524 | echo "`date +%F%T` if upping vipm." 525 | ifup_vip $VIPM_IF 526 | # 判断vips是否已启动, 未启动则启动, 并将角色转换为m_s 527 | ipscan $VIP_IF $VIPS_IP 528 | RET=$? 529 | if [ $RET -ne 0 ]; then 530 | echo "`date +%F%T` if upping vips. change to m_s role." 531 | ifup_vip $VIPS_IF 532 | LOCAL_ROLE="m_s" 533 | fi 534 | # 如果vipm已启动, 转换成standby, 启动vips, 更新角色 535 | else 536 | echo "`date +%F%T` degrading to standby, ifup vips, change to standby role." 537 | degrade 538 | ifup_vip $VIPS_IF 539 | LOCAL_ROLE="standby" 540 | fi 541 | fi 542 | 543 | 544 | # 触发一次心跳, 更新数据以备测试enable_promote 545 | # 不需要关心结果 546 | keepalive $VIPM_IP 547 | 548 | 549 | # 循环 550 | # local_role=master 551 | # 检测vips -> fence -> ifup vips -> 本地角色更新为master+standby 552 | # local_role=standby 553 | # 检测vipm -> fence -> ifup vipm -> 本地角色更新为master+standby 554 | # local_role=master+standby 555 | # 检测对方IP -> 等待20s -> 释放vips -> 等待对方启动vips -> 本地角色更新为master 556 | 557 | # 无限循环, 558 | # (切换前提: 心跳检测), 559 | # (本地状态: 本地网络监测, 本地心跳检测, 本地角色对应IP检测), 560 | # (日志, 邮件, nagios告警), 561 | # 角色自由切换 562 | for ((m=1;m>0;m=1)) 563 | do 564 | echo "`date +%F%T` this is $LOCAL_ROLE" 565 | sleep 1 566 | 567 | # m_s和standby,master不一样的地方, 不需要依赖本地健康状态, 务必在必要时释放vips. 568 | if [ $LOCAL_ROLE == "m_s" ]; then 569 | # 如果本地不健康, 写日志, 邮件, nagios告警 570 | # 网关检查, 反映本地网络状况, 不影响释放vips, 只做日志输出 571 | ipscan $VIP_IF $GATEWAY_IP 572 | if [ $? -ne 0 ]; then 573 | echo "`date +%F%T` can not connect to gateway." 574 | fi 575 | 576 | # 本地心跳检查, 反映本地数据库健康状态, 不健康则检查对端心跳, 退出本脚本 577 | keepalive $LOCAL_IP 578 | if [ $? -ne 0 ]; then 579 | keepalive $PEER_IP 580 | if [ $? -eq 0 ]; then 581 | echo "`date +%F%T` local database not health, release vipm and vips. exit this script." 582 | # 如果本地数据库不健康, 释放VIPM, VIPS, 等待对方升级为primary 583 | # 不建议关网卡, 为什么呢? 从经验来看, 造成心跳失败的原因很多, 例如连接数满了, 响应超时, 584 | # 这些原因不足以构成数据库异常, 所以不建议这里停网卡, 停脚本, 通过nagios来监控脚本异常再来排查问题. 585 | # sudo $S_IFDOWN $VIPM_IF 586 | # sudo $S_IFDOWN $VIPS_IF 587 | exit 1 588 | fi 589 | fi 590 | 591 | # 本地角色对应IP检查, 不影响释放vips, 只做日志输出 592 | ipaddrscan $VIP_IF $VIPM_IP 593 | if [ $? -ne 0 ]; then 594 | echo "`date +%F%T` vipm not up." 595 | fi 596 | ipaddrscan $VIP_IF $VIPS_IP 597 | if [ $? -ne 0 ]; then 598 | echo "`date +%F%T` vips not up." 599 | fi 600 | 601 | # 检测对端IP数据库心跳是否已健康, 如果健康, 释放vips 602 | echo "`date +%F%T` detecting peer postgresql keepalive." 603 | keepalive $PEER_IP 604 | if [ $? -eq 0 ]; then 605 | # 判断延迟, 合理则释放vips 606 | enable_promote $PROMOTE_RELEASE_VIPS 607 | if [ $? -eq 0 ]; then 608 | # 释放vips 609 | echo "`date +%F%T` release vips." 610 | sudo $S_IFDOWN $VIPS_IF 611 | 612 | # 转变角色 613 | LOCAL_ROLE="master" 614 | fi 615 | fi 616 | fi 617 | 618 | # ============ standby, master角色, 本地状态检查 ==================== 619 | # 如果本地不健康, 写日志, 邮件, nagios告警, continue不进行后续peer节点检查. 620 | # 通常需人工处理本地状态异常. 621 | 622 | # 网关检查, 反映本地网络状况 623 | ipscan $VIP_IF $GATEWAY_IP 624 | if [ $? -ne 0 ]; then 625 | echo "`date +%F%T` can not connect to gateway." 626 | continue 627 | fi 628 | 629 | # 本地心跳检查, 反映本地数据库健康状态, 不健康则检查对端, 退出本脚本. 630 | keepalive $LOCAL_IP 631 | if [ $? -ne 0 ]; then 632 | keepalive $PEER_IP 633 | if [ $? -eq 0 ]; then 634 | echo "`date +%F%T` local database not health." 635 | # 如果本地数据库不健康, 释放VIPM, VIPS, 等待对方处理, 例如升级为primary或m_s 636 | # 不建议停网卡, 原因同m_s章节 637 | # sudo $S_IFDOWN $VIPM_IF 638 | # sudo $S_IFDOWN $VIPS_IF 639 | exit 1 640 | fi 641 | fi 642 | 643 | if [ $LOCAL_ROLE == "standby" ]; then 644 | # 如果本地不健康, 写日志, 邮件, nagios告警, continue不进行后续检查. 645 | # 本地角色对应IP检查 646 | ipaddrscan $VIP_IF $VIPS_IP 647 | if [ $? -ne 0 ]; then 648 | echo "`date +%F%T` vips not up." 649 | continue 650 | fi 651 | 652 | # 检查主备延迟, 判断是否适合激活数据库 653 | # 假设延迟判断, 100秒以及32MB 654 | # 注意这个延迟时间必须大于checkmaster的超时时间. 655 | enable_promote $PROMOTE_STANDBY_SELF 656 | if [ $? -ne 0 ]; then 657 | echo "`date +%F%T` can not promote." 658 | continue 659 | fi 660 | 661 | # 异常超过5次, 触发切换, 角色转变 662 | checkmaster $CHECK_TIMES 663 | if [ $? -ne 0 ]; then 664 | fence $FENCE_IP $FENCE_USER $FENCE_PWD force 665 | RET=$? 666 | # fence成功, 激活, 启动vipm, 并转换角色. 667 | if [ $RET -eq 0 ]; then 668 | # 务必在启动VIPM前promote. 669 | promote 670 | ifup_vip $VIPM_IF 671 | LOCAL_ROLE="m_s" 672 | else 673 | # fence不成功, 可能是fence设备网络异常或fence配置有问题, 则继续探测, 不转换角色. 674 | echo "fence master failed, continue checkmaster." 675 | continue 676 | fi 677 | else 678 | # 检查正常, continue 679 | echo "check master normal." 680 | continue 681 | fi 682 | fi 683 | 684 | if [ $LOCAL_ROLE == "master" ]; then 685 | # 如果本地不健康, 写日志, 邮件, nagios告警, continue不进行后续检查. 686 | # 本地角色对应IP检查 687 | ipaddrscan $VIP_IF $VIPM_IP 688 | if [ $? -ne 0 ]; then 689 | echo "`date +%F%T` vipm not up." 690 | continue 691 | fi 692 | 693 | # 异常超过5次, 触发切换, 角色转变 694 | checkstandby $CHECK_TIMES 695 | if [ $? -ne 0 ]; then 696 | # 这里不使用强制fence 697 | fence $FENCE_IP $FENCE_USER $FENCE_PWD normal 698 | RET=$? 699 | # fence 成功 700 | if [ $RET -eq 0 ]; then 701 | ifup_vip $VIPS_IF 702 | LOCAL_ROLE="m_s" 703 | else 704 | # fence不成功, 可能是fence设备网络异常或fence配置有问题, 则继续探测, 不转换角色. 705 | echo "`date +%F%T` fence standby failed." 706 | continue 707 | fi 708 | else 709 | # 判断本地CAN_MASTER, 这个将来可以放到数据库里面去配置 710 | # 如果CAN_MASTER 不是true, 检查enable_promote, 释放VIPM, 等对方切换为m_s 711 | # 释放vipm, 并退出程序 712 | if [ $CAN_MASTER == "true" ]; then 713 | continue 714 | else 715 | echo "`date +%F%T` this node can not master, will shutdown and wait peer fence it and restart change to slave." 716 | # 心跳, 717 | keepalive $LOCAL_IP 718 | # 检查slave是否允许激活, 如果允许激活, 停库, 停VIPM, 等对方切换为m_s, 退出脚本 719 | enable_promote $PROMOTE_CANNOT_MASTER 720 | if [ $? -eq 0 ]; then 721 | pg_ctl stop -m fast -w -t 6000000 722 | sudo $S_IFDOWN $VIPM_IF 723 | exit 1 724 | else 725 | echo "`date +%F%T` this node can not master, but peer too lag, so continue..." 726 | continue 727 | fi 728 | fi 729 | fi 730 | fi 731 | 732 | done 733 | } 734 | 735 | # 停止本脚本, 数据库, 释放子IP, 释放peer归档目录 736 | stopall() { 737 | pg_ctl stop -m fast 738 | sudo $S_IFDOWN $VIPS_IF 739 | sudo $S_IFDOWN $VIPM_IF 740 | sudo $S_UMOUNT -l $PEER_ARCH_DIR 741 | # 自杀 742 | killall $SUB_NAME 743 | } 744 | 745 | # 停止本脚本 746 | stopscript() { 747 | killall $SUB_NAME 748 | } 749 | 750 | # 停止数据库 751 | stopdb() { 752 | pg_ctl stop -m fast 753 | } 754 | 755 | # 状态 756 | status() { 757 | tail -n 30 $NAGIOS_LOG 758 | ps -ewf|grep $SUB_NAME 759 | } 760 | 761 | # See how we are called 762 | case "$1" in 763 | start) 764 | start >>$NAGIOS_LOG 2>&1 & 765 | RETVAL=$? 766 | ;; 767 | stop) 768 | stopall 769 | RETVAL=$? 770 | ;; 771 | status) 772 | status 773 | ;; 774 | restart) 775 | stopall 776 | start >>$NAGIOS_LOG 2>&1 & 777 | RETVAL=$? 778 | ;; 779 | *) 780 | echo $"Usage: $0 {start|stop|status|restart}" 781 | RETVAL=3 782 | ;; 783 | esac 784 | 785 | exit $RETVAL 786 | 787 | 788 | # Author : Digoal zhou 789 | # Email : digoal@126.com 790 | # Blog : http://blog.163.com/digoal@126/ 791 | --------------------------------------------------------------------------------