├── .gitignore ├── .travis.yml ├── LICENSE ├── README.md ├── defaults └── main.yml ├── files ├── pgsqlms-2.2.0-fix-pg10 ├── pgsqlms-2.2.0-geo-patched ├── pgsqlms-2.2.1-geo-patched ├── resource-agents-paf-1.1.0-1.noarch.rpm ├── resource-agents-paf-2.2.0-1.noarch.rpm └── resource-agents-paf-2.2.1-1.noarch.rpm ├── handlers └── main.yml ├── library ├── pcs_property └── pcs_resource ├── meta └── main.yml ├── tasks ├── constraints.yml ├── finalize.yml ├── main.yml ├── maintenance.yml ├── paf.yml ├── pcs.yml ├── postgresql_sync.yml ├── pre-tasks.yml └── vip.yml ├── templates ├── maint │ └── clone_clusterdb_from_master.sh.j2 ├── pg_hba.conf.j2 └── recovery.conf.pcmk.j2 ├── tests ├── inventory └── test.yml └── vars └── main.yml /.gitignore: -------------------------------------------------------------------------------- 1 | *.swp 2 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | --- 2 | language: python 3 | python: "2.7" 4 | 5 | # Use the new container infrastructure 6 | sudo: false 7 | 8 | # Install ansible 9 | addons: 10 | apt: 11 | packages: 12 | - python-pip 13 | 14 | install: 15 | # Install ansible 16 | - pip install ansible 17 | 18 | # Check ansible version 19 | - ansible --version 20 | 21 | # Create ansible.cfg with correct roles_path 22 | - printf '[defaults]\nroles_path=../' >ansible.cfg 23 | 24 | script: 25 | # Basic role syntax check 26 | - ansible-playbook tests/test.yml -i tests/inventory --syntax-check 27 | 28 | notifications: 29 | webhooks: https://galaxy.ansible.com/api/v1/notifications/ -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "{}" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright {yyyy} {name of copyright owner} 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | postgres-ha 2 | =========== 3 | 4 | With this role, you will transform your standalone postgresql server to N-node postgres cluster with automated failover. You only need one working postgresql server and other hosts with clean CentOS 7 or CentOS 6 minimal install. 5 | 6 | Alternatively, this role can create a database cluster for you from scratch. If no postgres database is detected, it will be created. 7 | 8 | What it will do: 9 | - install the cluster software stack (pcs, corosync, pacemaker) 10 | - add IPs of cluster hosts to /etc/hosts files 11 | - create a pcs cluster from all play hosts 12 | - install database binaries if needed 13 | - init master database if needed 14 | - alter postgresql configuration if needed 15 | - sync slave databases from master host 16 | - make sure the DB replication is working 17 | - create cluster resources for database, floating IP and constraints 18 | - check again that everything is working as expected 19 | 20 | Automated failover is setup using PAF pacemaker module: https://github.com/dalibo/PAF 21 | 22 | What you should know 23 | -------------------- 24 | 25 | - The role is idempotent. I've made many checks to allow running it multiple times without breaking things. You can run it again safely even if the role fails. The only thing you need to check before the run is the `postgres_ha_cluster_master_host` variable. But don't worry, if the specified host is not the master database, the role will fail gracefully without disrupting things. 26 | 27 | - During the run, the role will alter your postgresql.conf and pg_hba.conf to enable replication. You can review the changes to postgresql.conf in [defaults/main.yml](defaults/main.yml) (`postgres_ha_postgresql_conf_vars` variable). In pg_hba.conf, the host ACL statements will be added for every cluster node. They will be added before all previously existing host ACL statements. 28 | 29 | - The postgres replication is asynchronnous by default. If you want synchronnous replication, alter the `postgres_ha_postgresql_conf_vars` variable by adding `synchronous_standby_names` parameter. Please see postgresql manual for more info. Also note that if the last synchronnous replica disconnects from master, the master database will stop serving requests. 30 | 31 | - You should have at least a basic understanding of clustering and how to work with `pcs` command. If the role fails for some reason, it is relatively easy to recover from it.. if you understand what logs are trying to say and/or how to run appropriate recovery actions. See cleanup section for more info. 32 | 33 | - You need to alter firewall settings before running this role. The cluster members need to communicate among each other to form a cluster and to replicate postgres DB. I recommend adding some firewall role before the postgres-ha role. 34 | 35 | - If the master datadir is empty on the first run, the role will init an empty datadir. Slave nodes will then download this empty database. If the datadir is not empty, the initdb will be skipped. This means that you can run this role on clean CentOS installs that don't have any postgresql database installed. The result will be fully working empty database cluster. 36 | 37 | - On the first run, the datadirs on slave nodes will be deleted without prompt. Please make sure you specify the correct `postgres_ha_cluster_master_host` at least for this first run (slave datadirs will NEVER be deleted after first initial sync is done). 38 | 39 | - If you plan to apply the role to higher number of servers (7+) please be aware that the servers are downloading rpms packages simultaneously. This can be identified as DDoS and some repository providers may refuse your downloads. As a result, the role will fail. I recommend setting up your own repository mirror in such cases. 40 | 41 | - Please don't change the cluster resource name parameters after the role has been applied. In next run, it will result in trying to create the new colliding resources. 42 | 43 | - Fencing is not configured by this role. If you need one, you have to configure it manually after running the role. 44 | 45 | Requirements 46 | ------------ 47 | 48 | This role works on CentOS 6 and 7. RHEL was not tested but should work without problem. If you need support for other distribution, I can help. Post an issue. 49 | 50 | The postgresql binaries on your primary server should be installed from the official repository: 51 | 52 | https://yum.postgresql.org/repopackages.php 53 | 54 | Note: If you have binaries from other repo, you need to modify the `postgres_ha_repo_url` variable to change the postgres repository source and maybe also bindir and datadir paths in other role variables. If you need to change the installed package name(s), you need to directly modify `install pg*` task in `tasks/postgresql_sync.yml` file. 55 | 56 | Role Variables 57 | -------------- 58 | 59 | For all variables with description see [defaults/main.yml](defaults/main.yml) 60 | 61 | Variables that must be changed: 62 | - `postgres_ha_cluster_master_host` - the master database host (WARNING: please make sure you fill this correctly, otherwise you may lose data!) 63 | - `postgres_ha_cluster_vip` - a floating IP address that travels with master database 64 | - `postgres_ha_pg_repl_pass` - password for replicating postgresql data 65 | - `postgres_ha_cluster_ha_password` - password for cluster config replication 66 | - `postgres_ha_cluster_ha_password_hash` - password hash of postgres_ha_cluster_ha_password 67 | 68 | Password hash can be generated for example by this command: 69 | 70 | `python -c 'import crypt; print(crypt.crypt("my_cluster_ha_password", crypt.mksalt(crypt.METHOD_SHA512)))'` 71 | 72 | Dependencies 73 | ------------ 74 | 75 | No other roles are required as a dependency. However you can combine this role with some other role that installs a postgresql database. 76 | 77 | Example Playbook 78 | ---------------- 79 | 80 | The usage is relatively simple - install minimal CentOS-es, set the variables and run the role. 81 | 82 | Two settings are required: 83 | - `gather_facts=True` - we need to know the IP addresses of cluster nodes 84 | - `any_errors_fatal=True` - it ensures that error on any node will result in stopping the whole ansible run. Because it doesn't make sense to continue when you lose some of your cluster nodes during transit. 85 | 86 | ``` 87 | - name: install PG HA 88 | hosts: db? 89 | gather_facts: True 90 | any_errors_fatal: True 91 | vars: 92 | postgres_ha_cluster_master_host: db1 93 | postgres_ha_cluster_vip: 10.10.10.10 94 | postgres_ha_pg_repl_pass: MySuperSecretDBPass 95 | postgres_ha_cluster_ha_password: AnotherSuperSecretPass1234 96 | postgres_ha_cluster_ha_password_hash: '$6$mHeZ7/LD1y.........7VJYu.' 97 | pre_tasks: 98 | - name: disable firewall 99 | service: name=firewalld state=stopped enabled=no 100 | roles: 101 | - postgres-ha 102 | ``` 103 | 104 | Cleanup after failure 105 | --------------------- 106 | 107 | If the role fails repeatedly and you want to run it fresh as if it was the first time, you need to clean up some things. 108 | Please note that default resource names are used here. If you change them using variables, you need to change it also in these commands. 109 | 110 | - RUN ON ANY NODE: 111 | ``` 112 | pcs resource delete pg-vip 113 | pcs resource delete postgres 114 | #pcs resource delete postgres-ha # probably not needed 115 | #pcs resource cleanup postgres # probably not needed 116 | 117 | # Make sure no (related) cluster resources are defined. 118 | ``` 119 | - RUN ON ALL SLAVE NODES: 120 | ``` 121 | systemctl stop postgresql-9.6 122 | # Make sure no postgres db is running. 123 | systemctl status postgresql-9.6 124 | ps aux | grep postgres 125 | rm -rf /var/lib/pgsql/9.6/data 126 | rm -f /var/lib/pgsql/9.6/recovery.conf.pgcluster.pcmk 127 | rm -f /var/lib/pgsql/9.6/.*_constraints_processed # name generated from postgres_ha_cluster_pg_res_name 128 | ``` 129 | - RUN ONLY ON MASTER NODE: 130 | ``` 131 | systemctl stop postgresql-9.6 132 | rm -f /var/lib/pgsql/9.6/recovery.conf.pgcluster.pcmk 133 | rm -f /var/lib/pgsql/9.6/.*_constraints_processed 134 | rm -f /var/lib/pgsql/9.6/data/recovery.conf 135 | rm -f /var/lib/pgsql/9.6/data/.synchronized 136 | # Make sure no postgres db is running. 137 | ps aux | grep postgres 138 | systemctl start postgresql-9.6 139 | systemctl status postgresql-9.6 140 | # Check postgres db functionality. 141 | ``` 142 | - START AGAIN 143 | ``` 144 | # Check variables & defaults and run ansible role again. 145 | ``` 146 | 147 | 148 | License 149 | ------- 150 | 151 | BSD 152 | 153 | Author Information 154 | ------------------ 155 | 156 | Created by YanChi. 157 | 158 | Originally part of the Danube Cloud project (https://github.com/erigones/esdc-ce). 159 | 160 | -------------------------------------------------------------------------------- /defaults/main.yml: -------------------------------------------------------------------------------- 1 | --- 2 | # cluster config 3 | postgres_ha_cluster_master_host: "{{ ansible_play_batch[0] }}" # sync all DB slaves from this master node (set to first node if not defined) 4 | postgres_ha_cluster_name: 'pgcluster' # name of the pcs cluster 5 | postgres_ha_cluster_vip: 10.20.30.40 # floating IP that will be used to connect to clustered DB (always follows the master) 6 | postgres_ha_cluster_vip_mask: 24 # floating IP netmask 7 | postgres_ha_cluster_vip_res_name: 'pg-vip' # resource name of the floating IP 8 | postgres_ha_cluster_pg_res_name: 'postgres' # slave DB cluster resource name 9 | postgres_ha_cluster_pg_HA_res_name: 'postgres-ha' # master DB cluster resource name (this name is used to manage postgres resource) 10 | postgres_ha_pg_port: 5432 11 | 12 | postgres_ha_monitor_interval_pgmaster: '15s' # frequency of checking if master PG instance is alive 13 | postgres_ha_monitor_interval_pgslave: '16s' # frequency of checking if slave PG instance is alive 14 | 15 | # auth config 16 | postgres_ha_pg_repl_user: replicator # database user used for replication 17 | postgres_ha_pg_repl_pass: rybKath3KeckGov1 18 | postgres_ha_cluster_ha_password: 'fropFav7epAbOch2' # password for joining the pcs cluster 19 | postgres_ha_cluster_ha_password_hash: '$6$MHAki4YS$Nk7O3FEC2G.INznoSUj4ByFgdwFJ8mcI9.Ks3XAoLLe9f9GB36G8hZe9o8ygDySJwvnLVCn0LGPzcOapK42/A/' # fropFav7epAbOch2 20 | 21 | # postgres config 22 | postgres_ha_pg_version: 9.6 23 | postgres_ha_import_repo: true # Enable download of postgresql repo before install 24 | postgres_ha_repo_url: 'https://download.postgresql.org/pub/repos/yum/{{ postgres_ha_pg_version }}/redhat/rhel-7-x86_64/{{ pg_pkg_name }}' 25 | postgres_ha_pg_systemd_svcname: "postgresql-{{ postgres_ha_pg_version }}" # the name of the original posgres DB resource in systemd 26 | postgres_ha_pg_data: "/var/lib/pgsql/{{ postgres_ha_pg_version }}/data" # where can I find PG datadir? 27 | postgres_ha_pg_bindir: "/usr/pgsql-{{ postgres_ha_pg_version }}/bin" # where are the PG binaries? 28 | 29 | postgres_ha_postgresql_conf_vars: # When altering this variable, please copy all postgresql.conf items specified here \ 30 | listen_addresses: "'*'" # because this whole variable will be overloaded by your new definition. \ 31 | max_wal_senders: "{{ ansible_play_batch|length * 2 }}" # You can change also other postgresql.conf variables here, it will work. 32 | max_replication_slots: "{{ ansible_play_batch|length * 2 }}" 33 | wal_level: "hot_standby" 34 | hot_standby: "on" 35 | wal_log_hints: "on" 36 | 37 | postgres_ha_maint_scripts_path: /var/lib/pgsql/pg-maint # location where to create user scripts for database administration 38 | 39 | # PAF vars 40 | postgres_ha_paf_version: 2.2.1 41 | postgres_ha_paf_geo_patch: False # Apply a patch to PAF to better handle network splits in geographically split clusters. 42 | # This patch is from creator of this ansible role and is not official. 43 | # It allows having a postgresql master-slave cluster without the need of fencing configured 44 | # (which is hard to have in geographically split clusters). 45 | # It adds additional safety mechanism of tracking the highest seen database timeline. 46 | # See diff here: https://github.com/YanChii/PAF/compare/master...YanChii:master-geo-ha?expand=1 47 | 48 | postgres_ha_network_rings: # Use multiple networks for cluster interconnection. Defaults to primary NIC. 49 | # ring0: "eth0" # Primary network. The "lowest" string (ASCII compare) will be selected as the primary network ring. 50 | # ring1: "eth1" # Additional network. You can use any names instead of "ring*". These will be appended to a hostname. 51 | 52 | postgres_ha_mcast_enable: False # Enable cluster communication using multicast (sets transport=udp, rrpmode=passive). \ 53 | # For mcast to work, you need to set postgres_ha_network_rings. \ 54 | # Centos6: It is recommended to enable multicast. You need at least 2 postgres_ha_network_rings. 55 | 56 | postgres_ha_pcs_advanced_params: # Additional parameters for "pcs cluster setup" command. 57 | # token: 1000 58 | # mcast0: '239.255.1.1' # these are the default mcast addresses for the respective cluster network rings 59 | # mcast1: '255.239.2.1' 60 | # mcastport0: 5405 61 | # mcastport1: 5405 62 | #Ignored params: 'addr0', 'addr1', 'addr2', 'addr3', 'transport' (they are set by postgres_ha_mcast_enable) 63 | 64 | postgres_ha_gui_enable: False # enable GUI on all hosts (one host is sufficient for GUI to work \ 65 | # but when it goes down, you loose the GUI) on https port 2224 66 | -------------------------------------------------------------------------------- /files/pgsqlms-2.2.0-fix-pg10: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | # This program is open source, licensed under the PostgreSQL License. 3 | # For license terms, see the LICENSE file. 4 | # 5 | # Copyright (C) 2016-2017: Jehan-Guillaume de Rorthais and Mael Rimbault 6 | 7 | =head1 NAME 8 | 9 | ocf_heartbeat_pgsqlms - A PostgreSQL multi-state resource agent for Pacemaker 10 | 11 | =head1 SYNOPSIS 12 | 13 | B [start | stop | monitor | promote | demote | notify | reload | methods | meta-data | validate-all] 14 | 15 | =head1 DESCRIPTION 16 | 17 | Resource script for PostgreSQL in replication. It manages PostgreSQL servers using streaming replication as an HA resource. 18 | 19 | =cut 20 | 21 | use strict; 22 | use warnings; 23 | use 5.008; 24 | 25 | use POSIX qw(locale_h); 26 | use Scalar::Util qw(looks_like_number); 27 | use File::Spec; 28 | use File::Temp; 29 | use Data::Dumper; 30 | 31 | use FindBin; 32 | use lib "$FindBin::RealBin/../lib/"; 33 | use lib "$FindBin::RealBin/../../lib/heartbeat/"; 34 | 35 | use OCF_ReturnCodes; 36 | use OCF_Directories; 37 | use OCF_Functions; 38 | 39 | our $VERSION = 'v2.2.0'; 40 | our $PROGRAM = 'pgsqlms'; 41 | 42 | # OCF environment 43 | my $OCF_RESOURCE_INSTANCE = $ENV{'OCF_RESOURCE_INSTANCE'}; 44 | my $OCF_ACTION = $ARGV[0]; 45 | my $OCF_RUNNING_SLAVE = $OCF_SUCCESS; 46 | my %OCF_NOTIFY_ENV = ocf_notify_env() if $OCF_ACTION eq 'notify'; 47 | 48 | # Default parameters values 49 | my $system_user_default = "postgres"; 50 | my $bindir_default = "/usr/bin"; 51 | my $pgdata_default = "/var/lib/pgsql/data"; 52 | my $pghost_default = "/tmp"; 53 | my $pgport_default = 5432; 54 | my $start_opts_default = ""; 55 | my $maxlag_default = "0"; 56 | 57 | # Set default values if not found in environment 58 | my $system_user = $ENV{'OCF_RESKEY_system_user'} || $system_user_default; 59 | my $bindir = $ENV{'OCF_RESKEY_bindir'} || $bindir_default; 60 | my $pgdata = $ENV{'OCF_RESKEY_pgdata'} || $pgdata_default; 61 | my $datadir = $ENV{'OCF_RESKEY_datadir'} || $pgdata; 62 | my $pghost = $ENV{'OCF_RESKEY_pghost'} || $pghost_default; 63 | my $pgport = $ENV{'OCF_RESKEY_pgport'} || $pgport_default; 64 | my $start_opts = $ENV{'OCF_RESKEY_start_opts'} || $start_opts_default; 65 | my $maxlag = $ENV{'OCF_RESKEY_maxlag'} || $maxlag_default; 66 | my $recovery_tpl = $ENV{'OCF_RESKEY_recovery_template'} 67 | || "$pgdata/recovery.conf.pcmk"; 68 | 69 | 70 | # PostgreSQL commands path 71 | my $PGCTL = "$bindir/pg_ctl"; 72 | my $PGPSQL = "$bindir/psql"; 73 | my $PGCTRLDATA = "$bindir/pg_controldata"; 74 | my $PGISREADY = "$bindir/pg_isready"; 75 | my $PGWALDUMP = "$bindir/pg_waldump"; 76 | 77 | # pacemaker commands path 78 | my $CRM_MASTER = "$HA_SBIN_DIR/crm_master --lifetime forever"; 79 | my $CRM_NODE = "$HA_SBIN_DIR/crm_node"; 80 | my $CRM_RESOURCE = "$HA_SBIN_DIR/crm_resource"; 81 | my $ATTRD_PRIV = "$HA_SBIN_DIR/attrd_updater --private --lifetime reboot"; 82 | 83 | # Global vars 84 | my $nodename; 85 | my $exit_code = 0; 86 | # numeric pgsql versions 87 | my $PGVERNUM; 88 | my $PGVER_10 = 100000; 89 | 90 | # Run a query using psql. 91 | # 92 | # This function returns an array with psql return code as first element and 93 | # the result as second one. 94 | # 95 | sub _query { 96 | my $query = shift; 97 | my $res = shift; 98 | my $connstr = "dbname=postgres"; 99 | my $RS = chr(30); # ASCII RS (record separator) 100 | my $FS = chr(3); # ASCII ETX (end of text) 101 | my $postgres_uid = getpwnam( $system_user ); 102 | my $oldeuid = $>; 103 | my $tmpfile; 104 | my @res; 105 | my $ans; 106 | my $pid; 107 | my $rc; 108 | 109 | unless ( defined $res and defined $query and $query ne '' ) { 110 | ocf_log( 'debug', '_query: wrong parameters!' ); 111 | return -1; 112 | } 113 | 114 | unless ( $tmpfile = File::Temp->new( 115 | TEMPLATE => 'pgsqlms-XXXXXXXX', 116 | DIR => $HA_RSCTMP 117 | ) ) 118 | { 119 | ocf_exit_reason( 'Could not create or write in a temp file' ); 120 | exit $OCF_ERR_INSTALLED; 121 | } 122 | 123 | print $tmpfile $query; 124 | chmod 0644, $tmpfile; 125 | 126 | ocf_log( 'debug', '_query: %s', $query ); 127 | 128 | # Change the effective user to the given system_user so after forking 129 | # the given uid to the process should allow psql to connect w/o password 130 | $> = $postgres_uid; 131 | 132 | # Forking + piping 133 | $pid = open(my $KID, "-|"); 134 | 135 | if ( $pid == 0 ) { # child 136 | exec $PGPSQL, '--set', 'ON_ERROR_STOP=1', '-qXAtf', $tmpfile, 137 | '-R', $RS, '-F', $FS, '--port', $pgport, '--host', $pghost, 138 | $connstr; 139 | } 140 | 141 | # parent 142 | $> = $oldeuid; 143 | 144 | { 145 | local $/; 146 | $ans = <$KID>; 147 | } 148 | 149 | close $KID; 150 | $rc = $? >> 8; 151 | 152 | ocf_log( 'debug', '_query: psql return code: %d', $rc ); 153 | 154 | if ( defined $ans ) { 155 | chop $ans; 156 | 157 | push @{ $res }, [ split(chr(3) => $_, -1) ] 158 | foreach split (chr(30) => $ans, -1); 159 | 160 | ocf_log( 'debug', '_query: @res: %s', 161 | Data::Dumper->new( [ $res ] )->Terse(1)->Dump ); 162 | } 163 | 164 | # Possible return codes: 165 | # -1: wrong parameters 166 | # 0: OK 167 | # 1: failed to get resources (memory, missing file, ...) 168 | # 2: unable to connect 169 | # 3: query failed 170 | return $rc; 171 | } 172 | 173 | # Get the last received location on a standby 174 | # if the first argument is true, returns the value as decimal 175 | # if the first argument is false, returns the value as LSN 176 | # Returns undef if query failed 177 | sub _get_last_received_lsn { 178 | my ( $dec ) = @_; 179 | my $pg_last_wal_receive_lsn = 'pg_last_wal_receive_lsn()'; 180 | my $pg_wal_lsn_diff = 'pg_wal_lsn_diff'; 181 | my $query; 182 | my $rc; 183 | my @rs; 184 | 185 | if ( $PGVERNUM < $PGVER_10 ) { 186 | $pg_last_wal_receive_lsn = 'pg_last_xlog_receive_location()'; 187 | $pg_wal_lsn_diff = 'pg_xlog_location_diff'; 188 | } 189 | 190 | if ( $dec ) { 191 | $query = "SELECT $pg_wal_lsn_diff( $pg_last_wal_receive_lsn, '0/0' )"; 192 | } 193 | else { 194 | $query = "SELECT $pg_last_wal_receive_lsn"; 195 | } 196 | 197 | $rc = _query( $query, \@rs ); 198 | 199 | return $rs[0][0] if $rc == 0 and $rs[0][0]; 200 | 201 | ocf_log( 'err', 'Could not query last received LSN (%s)', $rc ) if $rc != 0; 202 | ocf_log( 'err', 'No values for last received LSN' ) 203 | if $rc == 0 and not $rs[0][0]; 204 | 205 | return undef; 206 | } 207 | 208 | # Get the master score for each connected standby 209 | # Returns directly the result set of the query or exit with an error. 210 | # Exits with OCF_ERR_GENERIC if the query failed 211 | sub _get_lag_scores { 212 | my $pg_current_wal_lsn = 'pg_current_wal_lsn()'; 213 | my $pg_wal_lsn_diff = 'pg_wal_lsn_diff'; 214 | my $write_lsn = 'write_lsn'; 215 | my $query; 216 | my $rc; 217 | my @rs; 218 | 219 | if ( $PGVERNUM < $PGVER_10 ) { 220 | $pg_current_wal_lsn = 'pg_current_xlog_location()'; 221 | $pg_wal_lsn_diff = 'pg_xlog_location_diff'; 222 | $write_lsn = 'write_location'; 223 | } 224 | 225 | # We check locations of connected standbies by querying the 226 | # "pg_stat_replication" view. 227 | # The row_number applies on the result set ordered on write_location ASC so 228 | # the highest row_number should be given to the closest node from the 229 | # master, then the lowest node name (alphanumeric sort) in case of equality. 230 | # The result set itself is order by priority DESC to process best known 231 | # candidate first. 232 | $query = qq{ 233 | SELECT application_name, priority, location, state, current_lag 234 | FROM ( 235 | SELECT application_name, 236 | (1000 - ( 237 | row_number() OVER ( 238 | PARTITION BY state IN ('startup', 'backup') 239 | ORDER BY location ASC, application_name ASC 240 | ) - 1 241 | ) * 10 242 | ) * CASE WHEN ( $maxlag > 0 243 | AND current_lag > $maxlag) 244 | THEN -1 245 | ELSE 1 246 | END AS priority, 247 | location, state, current_lag 248 | FROM ( 249 | SELECT application_name, $write_lsn AS location, state, 250 | $pg_wal_lsn_diff($pg_current_wal_lsn, $write_lsn) AS current_lag 251 | FROM pg_stat_replication 252 | ) AS s2 253 | ) AS s1 254 | ORDER BY priority DESC 255 | }; 256 | 257 | $rc = _query( $query, \@rs ); 258 | 259 | if ( $rc != 0 ) { 260 | ocf_exit_reason( 'Query to get standby locations failed (%d)', $rc ); 261 | exit $OCF_ERR_GENERIC; 262 | } 263 | 264 | return \@rs; 265 | } 266 | 267 | # get the timeout for the current action given from environment var 268 | # Returns timeout as integer 269 | # undef if unknown 270 | sub _get_action_timeout { 271 | my $timeout = $ENV{'OCF_RESKEY_CRM_meta_timeout'} / 1000; 272 | 273 | ocf_log( 'debug', '_get_action_timeout: known timeout: %s', 274 | defined $timeout ? $timeout : 'undef' ); 275 | 276 | return $timeout if defined $timeout and $timeout =~ /^\d+$/; 277 | 278 | return undef; 279 | } 280 | 281 | # Get, parse and return the value of the given private attribute name 282 | # Returns an empty string if not found. 283 | sub _get_priv_attr { 284 | my ( $name, $node ) = @_; 285 | my $val = ''; 286 | my $node_arg = ''; 287 | my $ans; 288 | 289 | $node = '' unless defined $node; 290 | $name = "$name-$OCF_RESOURCE_INSTANCE"; 291 | 292 | $node_arg= "--node $node" if $node ne ''; 293 | 294 | $ans = qx{ $ATTRD_PRIV --name "$name" --query $node_arg }; 295 | 296 | $ans =~ m/^name=".*" host=".*" value="(.*)"$/; 297 | 298 | $val = $1 if defined $1; 299 | 300 | ocf_log( 'debug', '_get_priv_attr: value of "%s"%s is "%s"', $name, 301 | ( $node ? " on \"$node\"": ""), 302 | $val ); 303 | 304 | return $val; 305 | } 306 | 307 | # Set the given private attribute name to the given value 308 | # As setting an attribute is asynchronous, this will return as soon as the 309 | # attribute is really set by attrd and available. 310 | sub _set_priv_attr { 311 | my ( $name, $val ) = @_; 312 | my $name_instance = "$name-$OCF_RESOURCE_INSTANCE"; 313 | 314 | ocf_log( 'debug', '_set_priv_attr: set "%s=%s"...', $name_instance, $val ); 315 | 316 | qx{ $ATTRD_PRIV --name "$name_instance" --update "$val" }; 317 | 318 | # give attr name without the resource instance name as _get_priv_attr adds 319 | # it as well 320 | while ( _get_priv_attr( $name ) ne $val ) { 321 | ocf_log( 'debug', '_set_priv_attr: waiting attrd ack for "%s"...', $name_instance ); 322 | select( undef, undef, undef, 0.1 ); 323 | } 324 | 325 | return; 326 | } 327 | 328 | # Delete the given private attribute. 329 | # As setting an attribute is asynchronous, this will return as soon as the 330 | # attribute is really deleted by attrd. 331 | sub _delete_priv_attr { 332 | my ( $name ) = @_; 333 | my $name_instance = "$name-$OCF_RESOURCE_INSTANCE"; 334 | 335 | ocf_log( 'debug', '_delete_priv_attr: delete "%s"...', $name_instance ); 336 | 337 | qx{ $ATTRD_PRIV --name "$name_instance" --delete }; 338 | 339 | # give attr name without the resource instance name as _get_priv_attr adds 340 | # it as well 341 | while ( _get_priv_attr( $name ) ne '' ) { 342 | ocf_log( 'debug', '_delete_priv_attr: waiting attrd ack for "%s"...', 343 | $name_instance ); 344 | select( undef, undef, undef, 0.1 ); 345 | } 346 | 347 | return; 348 | } 349 | 350 | # Get, parse and return the resource master score on given node. 351 | # Returns an empty string if not found. 352 | # Returns undef on crm_master call on error 353 | sub _get_master_score { 354 | my ( $node ) = @_; 355 | my $node_arg = ''; 356 | my $score; 357 | 358 | $node_arg = sprintf '--node "%s"', $node if defined $node and $node ne ''; 359 | 360 | $score = qx{ $CRM_MASTER --quiet --get-value $node_arg 2> /dev/null }; 361 | 362 | return '' unless $? == 0; 363 | 364 | chomp $score; 365 | 366 | $score = '' unless defined $score; 367 | 368 | return $score; 369 | } 370 | 371 | # Set the master score of the local node or the optionally given node. 372 | # As setting an attribute is asynchronous, this will return as soon as the 373 | # attribute is really set by attrd and available everywhere. 374 | sub _set_master_score { 375 | my ( $score, $node ) = @_; 376 | my $node_arg = ''; 377 | my $tmp; 378 | 379 | $node_arg = sprintf '--node "%s"', $node if defined $node and $node ne ''; 380 | 381 | qx{ $CRM_MASTER $node_arg --quiet --update "$score" }; 382 | 383 | while ( ( $tmp = _get_master_score( $node ) ) ne $score ) { 384 | ocf_log( 'debug', 385 | '_set_master_score: waiting to set score to "%s" (currently "%s")...', 386 | $score, $tmp ); 387 | select(undef, undef, undef, 0.1); 388 | } 389 | 390 | return; 391 | } 392 | 393 | # _master_score_exists 394 | # This subroutine checks if a master score is set for one of the relative clones 395 | # in the cluster and the score is greater or equal of 0. 396 | # Returns 1 if at least one master score >= 0 is found. 397 | # Returns 0 otherwise 398 | sub _master_score_exists { 399 | my @partition_nodes = split /\s+/ => qx{ $CRM_NODE --partition }; 400 | 401 | foreach my $node ( @partition_nodes ) { 402 | my $score = _get_master_score( $node ); 403 | 404 | return 1 if defined $score and $score ne '' and $score > -1; 405 | } 406 | 407 | return 0; 408 | } 409 | 410 | # Check if the current transiation is a recover of a master clone on given node. 411 | sub _is_master_recover { 412 | my ( $n ) = @_; 413 | 414 | return ( 415 | scalar grep { $_->{'uname'} eq $n } @{ $OCF_NOTIFY_ENV{'master'} } 416 | and scalar grep { $_->{'uname'} eq $n } @{ $OCF_NOTIFY_ENV{'promote'} } 417 | ); 418 | } 419 | 420 | # Check if the current transition is a recover of a slave clone on given node. 421 | sub _is_slave_recover { 422 | my ( $n ) = @_; 423 | 424 | return ( 425 | scalar grep { $_->{'uname'} eq $n } @{ $OCF_NOTIFY_ENV{'slave'} } 426 | and scalar grep { $_->{'uname'} eq $n } @{ $OCF_NOTIFY_ENV{'start'} } 427 | ); 428 | } 429 | 430 | # check if th current transition is a switchover to the given node. 431 | sub _is_switchover { 432 | my ( $n ) = @_; 433 | my $old = $OCF_NOTIFY_ENV{'master'}[0]{'uname'}; 434 | 435 | return 0 if scalar @{ $OCF_NOTIFY_ENV{'master'} } != 1 436 | or scalar @{ $OCF_NOTIFY_ENV{'demote'} } != 1 437 | or scalar @{ $OCF_NOTIFY_ENV{'promote'} } != 1; 438 | 439 | return ( 440 | scalar grep { $_->{'uname'} eq $old } @{ $OCF_NOTIFY_ENV{'demote'} } 441 | and scalar grep { $_->{'uname'} eq $n } @{ $OCF_NOTIFY_ENV{'slave'} } 442 | and scalar grep { $_->{'uname'} eq $n } @{ $OCF_NOTIFY_ENV{'promote'} } 443 | and not scalar grep { $_->{'uname'} eq $old } @{ $OCF_NOTIFY_ENV{'stop'} } 444 | ); 445 | } 446 | 447 | # Run the given command as the "system_user" given as parameter. 448 | # It basically forks and seteuid/setuid away from root. 449 | # 450 | sub _runas { 451 | my $rc; 452 | my $pid; 453 | my @cmd = @_; 454 | my (undef, undef, $postgres_uid, $postgres_gid ) = getpwnam( $system_user ); 455 | 456 | $pid = fork; 457 | 458 | if ( $pid == 0 ) { # in child 459 | $) = "$postgres_gid $postgres_gid"; 460 | while ( my ( undef, undef, $gid, $members ) = getgrent ) { 461 | $) .= " $gid" if grep { $system_user eq $_ } split /\s+/, $members 462 | } 463 | $( = $postgres_gid; 464 | 465 | $< = $> = $postgres_uid; 466 | 467 | exec @cmd; 468 | } 469 | 470 | ocf_log( 'debug', '_runas: launching as "%s" command "%s"', $system_user, 471 | join(' ', @cmd) ); 472 | 473 | waitpid $pid, 0; 474 | $rc = $? >> 8; 475 | 476 | return $rc; 477 | } 478 | 479 | # Check if instance is listening on the given host/port. 480 | # 481 | sub _pg_isready { 482 | my $rc = _runas( $PGISREADY, '-h', $pghost, '-p', $pgport ); 483 | 484 | # Possible error codes: 485 | # 1: ping rejected (usually when instance is in startup, in crash 486 | # recovery, in warm standby, or when a shutdown is in progress) 487 | # 2: no response, usually means the instance is down 488 | # 3: no attempt, probably a syntax error, should not happen 489 | return $rc; 490 | } 491 | 492 | # Check the postmaster.pid file and the postmaster process. 493 | # WARNING: we do not distinguish the scenario where postmaster.pid does not 494 | # exist from the scenario where the process is still alive. It should be ok 495 | # though, as this is considered a hard error from monitor. 496 | # 497 | sub _pg_ctl_status { 498 | my $rc = _runas( $PGCTL, '--pgdata', $pgdata, 'status' ); 499 | 500 | # Possible error codes: 501 | # 3: postmaster.pid file does not exist OR it does but the process 502 | # with the PID found in the file is not alive 503 | return $rc; 504 | } 505 | 506 | # Start the local instance using pg_ctl 507 | # 508 | sub _pg_ctl_start { 509 | # Add 60s to the timeout or use a 24h timeout fallback to make sure 510 | # Pacemaker will give up before us and take decisions 511 | my $timeout = ( _get_action_timeout() || 60*60*24 ) + 60; 512 | 513 | my @cmd = ( $PGCTL, '--pgdata', $pgdata, '-w', '--timeout', $timeout, 'start' ); 514 | 515 | push @cmd => ( '-o', $start_opts ) if $start_opts ne ''; 516 | 517 | return _runas( @cmd ); 518 | } 519 | 520 | # Create the recovery file based on the given template. 521 | # Given template MUST at least contain: 522 | # standby_mode=on 523 | # primary_conninfo='...' 524 | # recovery_target_timeline = 'latest' 525 | # 526 | sub _create_recovery_conf { 527 | my $fh; 528 | my (undef, undef, $uid, $gid) = getpwnam($system_user); 529 | my $recovery_conf = ''; 530 | my $recovery_file = "$datadir/recovery.conf"; 531 | 532 | ocf_log( 'debug', 533 | '_create_recovery_conf: get replication configuration from the template file "%s"', 534 | $recovery_tpl ); 535 | 536 | # Create the recovery.conf file to start the instance as a secondary. 537 | # NOTE: the recovery.conf is supposed to be set up so the secondary can 538 | # connect to the primary instance, usually using a virtual IP address. 539 | # As there is no primary instance available at startup, secondaries will 540 | # complain about failing to connect. 541 | # As we can not reload a recovery.conf file on a standby without restarting 542 | # it, we will leave with this. 543 | # FIXME how would the reload help us in this case ? 544 | unless ( defined open( $fh, '<', $recovery_tpl ) ) { 545 | ocf_exit_reason( 'Could not open file "%s": %s', $recovery_tpl, $! ); 546 | exit $OCF_ERR_CONFIGURED; 547 | } 548 | 549 | # Copy all parameters from the template file 550 | while (my $line = <$fh>) { 551 | chomp $line; 552 | $recovery_conf .= "$line\n"; 553 | } 554 | close $fh; 555 | 556 | ocf_log( 'debug', 557 | '_create_recovery_conf: write the replication configuration to "%s" file', 558 | $recovery_file ); 559 | 560 | unless ( open( $fh, '>', $recovery_file ) ) { 561 | ocf_exit_reason( 'Could not open file "%s": %s', $recovery_file, $! ); 562 | exit $OCF_ERR_CONFIGURED; 563 | } 564 | 565 | # Write the recovery.conf file using configuration from the template file 566 | print $fh $recovery_conf; 567 | 568 | close $fh; 569 | 570 | unless ( chown $uid, $gid, $recovery_file ) { 571 | ocf_exit_reason( 'Could not set owner of "%s"', $recovery_file ); 572 | exit $OCF_ERR_CONFIGURED; 573 | }; 574 | } 575 | 576 | # Parse and return various informations about the local PostgreSQL instance as 577 | # reported by its controldata file. 578 | # 579 | # WARNING: the status is NOT updated in case of crash. 580 | # 581 | sub _get_controldata { 582 | my %controldata; 583 | my $ans; 584 | 585 | $ans = qx{ $PGCTRLDATA "$datadir" 2>/dev/null }; 586 | 587 | # Parse the output of pg_controldata. 588 | # This output is quite stable between pg versions, but we might need to sort 589 | # it at some point if things are moving in there... 590 | $ans =~ m{ 591 | # get the current state 592 | ^\QDatabase cluster state\E:\s+(.*?)\s*$ 593 | .* 594 | # Get the latest known REDO location 595 | ^\QLatest checkpoint's REDO location\E:\s+([/0-9A-F]+)\s*$ 596 | .* 597 | # Get the latest known TL 598 | ^\QLatest checkpoint's TimeLineID\E:\s+(\d+)\s*$ 599 | .* 600 | # Get the wal level 601 | # NOTE: pg_controldata output changed with PostgreSQL 9.5, so we need to 602 | # account for both syntaxes 603 | ^(?:\QCurrent \E)?\Qwal_level setting\E:\s+(.*?)\s*$ 604 | }smx; 605 | 606 | $controldata{'state'} = $1 if defined $1; 607 | $controldata{'redo'} = $2 if defined $2; 608 | $controldata{'tl'} = $3 if defined $3; 609 | $controldata{'wal_level'} = $4 if defined $4; 610 | 611 | ocf_log( 'debug', 612 | "_get_controldata: found: %s", 613 | Data::Dumper->new( [ \%controldata ] )->Terse(1)->Dump ); 614 | 615 | return %controldata if defined $controldata{'state'} 616 | and defined $controldata{'tl'} 617 | and defined $controldata{'redo'} 618 | and defined $controldata{'wal_level'}; 619 | 620 | ocf_exit_reason( 'Could not read all datas from controldata file for "%s"', 621 | $datadir ); 622 | 623 | ocf_log( 'debug', 624 | "_get_controldata: controldata file: %s", 625 | Data::Dumper->new( [ \%controldata ] )->Terse(1)->Dump, $ans ); 626 | 627 | exit $OCF_ERR_ARGS; 628 | } 629 | 630 | # Use pg_controldata to check the state of the PostgreSQL server. This 631 | # function returns codes depending on this state, so we can find whether the 632 | # instance is a primary or a secondary, or use it to detect any inconsistency 633 | # that could indicate the instance has crashed. 634 | # 635 | sub _controldata_to_ocf { 636 | my %cdata = _get_controldata(); 637 | 638 | while ( 1 ) { 639 | ocf_log( 'debug', '_controldata: instance "%s" state is "%s"', 640 | $OCF_RESOURCE_INSTANCE, $cdata{'state'} ); 641 | 642 | # Instance should be running as a primary. 643 | return $OCF_RUNNING_MASTER if $cdata{'state'} eq "in production"; 644 | 645 | # Instance should be running as a secondary. 646 | # This state includes warm standby (rejects connections attempts, 647 | # including pg_isready) 648 | return $OCF_SUCCESS if $cdata{'state'} eq "in archive recovery"; 649 | 650 | 651 | # The instance should be stopped. 652 | # We don't care if it was a primary or secondary before, because we 653 | # always start instances as secondaries, and then promote if necessary. 654 | return $OCF_NOT_RUNNING if $cdata{'state'} eq "shut down" 655 | or $cdata{'state'} eq "shut down in recovery"; 656 | 657 | # The state is "in crash recovery", "starting up" or "shutting down". 658 | # This state should be transitional, so we wait and loop to check if 659 | # it changes. 660 | # If it does not, pacemaker will eventually abort with a timeout. 661 | ocf_log( 'debug', 662 | '_controldata: waiting for transitionnal state "%s" to finish', 663 | $cdata{'state'} ); 664 | sleep 1; 665 | %cdata = _get_controldata(); 666 | } 667 | 668 | # If we reach this point, something went really wrong with this code or 669 | # pg_controldata. 670 | ocf_exit_reason( 'Unable get instance "%s" state using pg_controldata', 671 | $OCF_RESOURCE_INSTANCE ); 672 | 673 | return $OCF_ERR_INSTALLED ; 674 | } 675 | 676 | # Check the write_location of all secondaries, and adapt their master score so 677 | # that the instance closest to the master will be the selected candidate should 678 | # a promotion be triggered. 679 | # NOTE: This is only a hint to pacemaker! The selected candidate to promotion 680 | # actually re-check it is the best candidate and force a re-election by failing 681 | # if a better one exists. This avoid a race condition between the call of the 682 | # monitor action and the promotion where another slave might have catchup faster 683 | # with the master. 684 | # NOTE: we cannot directly use the write_location, neither a lsn_diff value as 685 | # promotion score as Pacemaker considers any value greater than 1,000,000 as 686 | # INFINITY. 687 | # 688 | # This sub is supposed to be executed from a master monitor action. 689 | # 690 | sub _check_locations { 691 | my $node_score; 692 | my $row_num; 693 | my $row; 694 | my @rs; 695 | 696 | # Call crm_node to exclude nodes that are not part of the cluster at this 697 | # point. 698 | my $partition_nodes = qx{ $CRM_NODE --partition }; 699 | 700 | @rs = @{ _get_lag_scores() }; 701 | 702 | $row_num = scalar @rs; 703 | 704 | # If there is no row left at this point, it means that there is no 705 | # secondary instance connected. 706 | ocf_log( 'warning', 'No secondary connected to the master' ) 707 | if $row_num == 0; 708 | 709 | # For each standby connected, set their master score based on the following 710 | # rule: the first known node/application, with the highest priority and 711 | # with an acceptable state. 712 | while ( $row = shift @rs ) { 713 | 714 | if ( $partition_nodes !~ /$row->[0]/ ) { 715 | ocf_log( 'info', 'Ignoring unknown application_name/node "%s"', 716 | $row->[0] ); 717 | next; 718 | } 719 | 720 | if ( $row->[0] eq $nodename ) { 721 | ocf_log( 'warning', 'Streaming replication with myself!' ); 722 | next; 723 | } 724 | 725 | $node_score = _get_master_score( $row->[0] ); 726 | 727 | if ( $row->[3] =~ /^\s*(?:startup|backup)\s*$/ ) { 728 | # We exclude any standby being in state backup (pg_basebackup) or 729 | # startup (new standby or failing standby) 730 | ocf_log( 'info', 'Forbidding promotion on "%s" in state "%s"', 731 | $row->[0], $row->[3] ); 732 | 733 | _set_master_score( '-1', $row->[0] ) unless $node_score eq '-1'; 734 | } 735 | else { 736 | ocf_log( 'debug', 737 | '_check_locations: checking "%s" promotion ability (current_score: %s, priority: %s, location: %s, lag: %s)', 738 | $row->[0], $node_score, $row->[1], $row->[2], $row->[4] ); 739 | 740 | if ( $node_score ne $row->[1] ) { 741 | if ( $row->[1] < -1 ) { 742 | ocf_log( 'info', 'Update score of "%s" from %s to %s because replication lag (%s) is higher than given maxlag (%s).', 743 | $row->[0], $node_score, $row->[1], $row->[4], $maxlag ); 744 | } 745 | else { 746 | ocf_log( 'info', 'Update score of "%s" from %s to %s because of a change in the replication lag (%s).', 747 | $row->[0], $node_score, $row->[1], $row->[4] ); 748 | } 749 | _set_master_score( $row->[1], $row->[0] ); 750 | } 751 | else { 752 | ocf_log( 'debug', 753 | '_check_locations: "%s" keeps its current score of %s', 754 | $row->[0], $row->[1] ); 755 | } 756 | } 757 | 758 | # Remove this node from the known nodes list. 759 | $partition_nodes =~ s/(?:^|\s)$row->[0](?:\s|$)/ /g; 760 | } 761 | 762 | $partition_nodes =~ s/(?:^\s+)|(?:\s+$)//g; 763 | 764 | # If there are still nodes in "partition_nodes", it means there is no 765 | # corresponding line in "pg_stat_replication". 766 | foreach my $node (split /\s+/ => $partition_nodes) { 767 | # Exclude the current node. 768 | next if $node eq $nodename; 769 | 770 | # do not warn if the master score is already set to -1000. 771 | # this avoid log flooding (gh #138) 772 | $node_score = _get_master_score( $node ); 773 | next if $node_score eq '-1000'; 774 | 775 | ocf_log( 'warning', '"%s" is not connected to the primary', $node ); 776 | _set_master_score( '-1000', $node ); 777 | } 778 | 779 | # Finally set the master score if not already done 780 | $node_score = _get_master_score(); 781 | _set_master_score( '1001' ) unless $node_score eq '1001'; 782 | 783 | return $OCF_SUCCESS; 784 | } 785 | 786 | # _check_switchover 787 | # check if the pgsql switchover to the localnode is safe. 788 | # This is supposed to be called **after** the master has been stopped or demoted. 789 | # This sub checks if the local standby received the shutdown checkpoint from the 790 | # old master to make sure it can take over the master role and the old master 791 | # will be able to catchup as a standby after. 792 | # 793 | # Returns 0 if switchover is safe 794 | # Returns 1 if swithcover is not safe 795 | # Returns 2 for internal error 796 | sub _check_switchover { 797 | my $has_sht_chk = 0; 798 | my $last_redo; 799 | my $last_lsn; 800 | my $ans; 801 | my $rc; 802 | my $tl; 803 | my %cdata; 804 | 805 | ocf_log( 'info', 'Switchover in progress from "%s" to "%s".' 806 | .' Need to check the last record in WAL', 807 | $OCF_NOTIFY_ENV{'demote'}[0]{'uname'}, $nodename ); 808 | 809 | # check if we received the shutdown checkpoint of the master during its 810 | # demote process. 811 | # We need the last local checkpoint LSN and the last received LSN from 812 | # master to check in the WAL between these adresses if we have a 813 | # "checkpoint shutdown" using pg_xlogdump/pg_waldump. 814 | # 815 | # Force a checkpoint to make sure the controldata shows the very last TL 816 | # and the master's shutdown checkpoint 817 | _query( q{ CHECKPOINT }, {} ); 818 | %cdata = _get_controldata(); 819 | $tl = $cdata{'tl'}; 820 | $last_redo = $cdata{'redo'}; 821 | 822 | # Get the last received LSN from master 823 | $last_lsn = _get_last_received_lsn(); 824 | 825 | unless ( defined $last_lsn ) { 826 | ocf_exit_reason( 827 | 'Could not read last checkpoint and timeline from controldata file!' 828 | ); 829 | 830 | ocf_log( 'debug', 831 | '_check_switchover: %s parameters: datadir:"%s", last_chk: "%s", tl: "%s", mast_lsn: "%s"', 832 | $PGWALDUMP, $datadir, $last_redo, $tl, $last_lsn 833 | ); 834 | 835 | return 2; 836 | } 837 | 838 | $ans = qx{ $PGWALDUMP --path "$datadir" --timeline "$tl" \\ 839 | --start "$last_redo" --end "$last_lsn" 2>&1 }; 840 | $rc = $?; 841 | 842 | ocf_log( 'debug', 843 | '_check_switchover: %s rc: "%s", tl: "%s", last_chk: %s, last_lsn: %s, output: "%s"', 844 | $PGWALDUMP, $rc, $tl, $last_redo, $last_lsn, $ans 845 | ); 846 | 847 | if ( $rc == 0 and 848 | $ans =~ m{^rmgr: XLOG.*desc: (?i:checkpoint)(?::|_SHUTDOWN) redo [0-9A-F/]+; tli $tl;.*; shutdown$}m 849 | ) { 850 | ocf_log( 'info', 'Slave received the shutdown checkpoint' ); 851 | return 0; 852 | } 853 | 854 | ocf_exit_reason( 855 | 'Did not receive the shutdown checkpoint from the old master!' ); 856 | 857 | return 1; 858 | } 859 | 860 | # Check to confirm if the instance is really started as _pg_isready stated and 861 | # check if the instance is primary or secondary. 862 | # 863 | sub _confirm_role { 864 | my $is_in_recovery; 865 | my $rc; 866 | my @rs; 867 | 868 | $rc = _query( "SELECT pg_is_in_recovery()", \@rs ); 869 | 870 | $is_in_recovery = $rs[0][0]; 871 | 872 | if ( $rc == 0 ) { 873 | # The query was executed, check the result. 874 | if ( $is_in_recovery eq 't' ) { 875 | # The instance is a secondary. 876 | ocf_log( 'debug', "_confirm_role: instance $OCF_RESOURCE_INSTANCE is a secondary"); 877 | return $OCF_SUCCESS; 878 | } 879 | elsif ( $is_in_recovery eq 'f' ) { 880 | # The instance is a primary. 881 | ocf_log( 'debug', "_confirm_role: instance $OCF_RESOURCE_INSTANCE is a primary"); 882 | # Check lsn diff with current slaves if any 883 | _check_locations() if $OCF_ACTION eq 'monitor'; 884 | return $OCF_RUNNING_MASTER; 885 | } 886 | 887 | # This should not happen, raise a hard configuration error. 888 | ocf_exit_reason( 889 | 'Unexpected result from query to check if "%s" is a primary or a secondary: "%s"', 890 | $OCF_RESOURCE_INSTANCE, $is_in_recovery ); 891 | 892 | return $OCF_ERR_CONFIGURED; 893 | } 894 | elsif ( $rc == 1 or $rc == 2 ) { 895 | # psql cound not connect to the instance. 896 | # As pg_isready reported the instance was listening, this error 897 | # could be a max_connection saturation. Just report a soft error. 898 | ocf_exit_reason( 'psql could not connect to instance "%s"', 899 | $OCF_RESOURCE_INSTANCE ); 900 | return $OCF_ERR_GENERIC; 901 | } 902 | 903 | # The query failed (rc: 3) or bad parameters (rc: -1). 904 | # This should not happen, raise a hard configuration error. 905 | ocf_exit_reason( 906 | 'The query to check if instance "%s" is a primary or a secondary failed (rc: %d)', 907 | $OCF_RESOURCE_INSTANCE, $rc ); 908 | 909 | return $OCF_ERR_CONFIGURED; 910 | } 911 | 912 | 913 | # Check to confirm if the instance is really stopped as _pg_isready stated 914 | # and if it was propertly shut down. 915 | # 916 | sub _confirm_stopped { 917 | my $pgctlstatus_rc; 918 | my $controldata_rc; 919 | 920 | # Check the postmaster process status. 921 | $pgctlstatus_rc = _pg_ctl_status(); 922 | 923 | if ( $pgctlstatus_rc == 0 ) { 924 | # The PID file exists and the process is available. 925 | # That should not be the case, return an error. 926 | ocf_exit_reason( 927 | 'Instance "%s" is not listening, but the process referenced in postmaster.pid exists', 928 | $OCF_RESOURCE_INSTANCE ); 929 | return $OCF_ERR_GENERIC; 930 | } 931 | 932 | # The PID file does not exist or the process is not available. 933 | ocf_log( 'debug', 934 | '_confirm_stopped: no postmaster process found for instance "%s"', 935 | $OCF_RESOURCE_INSTANCE ); 936 | 937 | if ( -f "$datadir/backup_label" ) { 938 | # We are probably on a freshly built secondary that was not started yet. 939 | ocf_log( 'debug', 940 | '_confirm_stopped: backup_label file exists: probably on a never started secondary', 941 | ); 942 | return $OCF_NOT_RUNNING; 943 | } 944 | 945 | # Continue the check with pg_controldata. 946 | $controldata_rc = _controldata_to_ocf(); 947 | if ( $controldata_rc == $OCF_RUNNING_MASTER ) { 948 | # The controldata has not been updated to "shutdown". 949 | # It should mean we had a crash on a primary instance. 950 | ocf_exit_reason( 951 | 'Instance "%s" controldata indicates a running primary instance, the instance has probably crashed', 952 | $OCF_RESOURCE_INSTANCE ); 953 | return $OCF_FAILED_MASTER; 954 | } 955 | elsif ( $controldata_rc == $OCF_SUCCESS ) { 956 | # The controldata has not been updated to "shutdown in recovery". 957 | # It should mean we had a crash on a secondary instance. 958 | # There is no "FAILED_SLAVE" return code, so we return a generic error. 959 | ocf_exit_reason( 960 | 'Instance "%s" controldata indicates a running secondary instance, the instance has probably crashed', 961 | $OCF_RESOURCE_INSTANCE ); 962 | return $OCF_ERR_GENERIC; 963 | } 964 | elsif ( $controldata_rc == $OCF_NOT_RUNNING ) { 965 | # The controldata state is consistent, the instance was probably 966 | # propertly shut down. 967 | ocf_log( 'debug', 968 | '_confirm_stopped: instance "%s" controldata indicates that the instance was propertly shut down', 969 | $OCF_RESOURCE_INSTANCE ); 970 | return $OCF_NOT_RUNNING; 971 | } 972 | 973 | # Something went wrong with the controldata check. 974 | ocf_exit_reason( 975 | 'Could not get instance "%s" status from controldata (returned: %d)', 976 | $OCF_RESOURCE_INSTANCE, $controldata_rc ); 977 | 978 | return $OCF_ERR_GENERIC; 979 | } 980 | 981 | ############################################################ 982 | #### OCF FUNCS 983 | 984 | 985 | 986 | =head1 SUPPORTED PARAMETERS 987 | 988 | =over 989 | 990 | =item B 991 | 992 | Location of the PGDATA of your instance 993 | 994 | (optional, string, default "/var/lib/pgsql/data") 995 | 996 | =item B 997 | 998 | The socket directory or IP address to use to connect to the local instance 999 | 1000 | (optional, string, default "/tmp") 1001 | 1002 | =item B 1003 | 1004 | The port to connect to the local instance 1005 | 1006 | (optional, integer, default "5432") 1007 | 1008 | =item B 1009 | 1010 | Location of the PostgreSQL binaries. 1011 | 1012 | (optional, string, default "/usr/bin") 1013 | 1014 | =item B 1015 | 1016 | The system owner of your instance's process 1017 | 1018 | (optional, string, default "postgres") 1019 | 1020 | =item B 1021 | 1022 | The local template that will be copied as the C file. 1023 | This template file must exists on all node. 1024 | 1025 | (optional, string, default "$PGDATA/recovery.conf.pcmk") 1026 | 1027 | =item B 1028 | 1029 | Maximum lag allowed on a standby before we set a negative master score on it. 1030 | The calculation is based on the difference between the current xlog location on 1031 | the master and the write location on the standby. 1032 | 1033 | (optional, integer, default "0" disables this feature) 1034 | 1035 | =item B 1036 | 1037 | Path to the directory set in C from your postgresql.conf file. 1038 | This parameter has same default than PostgreSQL itself: the C parameter 1039 | value. 1040 | 1041 | Unless you have a special PostgreSQL setup and you understand this parameter, 1042 | B 1043 | 1044 | (optional, string, default to the value of C) 1045 | 1046 | =item B 1047 | 1048 | Additional arguments given to the postgres process on startup. See 1049 | "postgres --help" for available options. Useful when the postgresql.conf file 1050 | is not in the data directory (PGDATA), eg.: 1051 | 1052 | -c config_file=/etc/postgresql/9.3/main/postgresql.conf 1053 | 1054 | (optinal, string, default "") 1055 | 1056 | =back 1057 | 1058 | =cut 1059 | 1060 | sub ocf_meta_data { 1061 | print qq{ 1062 | 1063 | 1064 | 1.0 1065 | 1066 | 1067 | Resource script for PostgreSQL in replication. It manages PostgreSQL servers using streaming replication as an HA resource. 1068 | 1069 | Manages PostgreSQL servers in replication 1070 | 1071 | 1072 | 1073 | System user account used to run the PostgreSQL server 1074 | 1075 | PostgreSQL system User 1076 | 1077 | 1078 | 1079 | 1080 | 1081 | Path to the directory storing the PostgreSQL binaries. The agent uses psql, pg_isready, pg_controldata and pg_ctl. 1082 | 1083 | Path to the PostgreSQL binaries 1084 | 1085 | 1086 | 1087 | 1088 | 1089 | Path to the data directory, e.g. PGDATA 1090 | 1091 | Path to the data directory 1092 | 1093 | 1094 | 1095 | 1096 | 1097 | Path to the directory set in data_directory from your postgresql.conf file. This parameter 1098 | has the same default than PostgreSQL itself: the pgdata parameter value. Unless you have a 1099 | special PostgreSQL setup and you understand this parameter, ignore it. 1100 | 1101 | Path to the directory set in data_directory from your postgresql.conf file 1102 | 1103 | 1104 | 1105 | 1106 | 1107 | Host IP address or unix socket folder the instance is listening on. 1108 | 1109 | Instance IP or unix socket folder 1110 | 1111 | 1112 | 1113 | 1114 | 1115 | Port the instance is listening on. 1116 | 1117 | Instance port 1118 | 1119 | 1120 | 1121 | 1122 | 1123 | Maximum lag allowed on a standby before we set a negative master score on it. The calculation 1124 | is based on the difference between the current LSN on the master and the LSN 1125 | written on the standby. 1126 | This parameter must be a valid positive number as described in PostgreSQL documentation. 1127 | See: https://www.postgresql.org/docs/current/static/sql-syntax-lexical.html#SQL-SYNTAX-CONSTANTS-NUMERIC 1128 | 1129 | Maximum write lag before we mark a standby as inappropriate to promote 1130 | 1131 | 1132 | 1133 | 1134 | 1135 | Path to the recovery.conf template. This file is simply copied to \$PGDATA 1136 | before starting the instance as slave 1137 | 1138 | Path to the recovery.conf template. 1139 | 1140 | 1141 | 1142 | 1143 | 1144 | Additionnal arguments given to the postgres process on startup. 1145 | See "postgres --help" for available options. Usefull when the 1146 | postgresql.conf file is not in the data directory (PGDATA), eg.: 1147 | "-c config_file=/etc/postgresql/9.3/main/postgresql.conf". 1148 | 1149 | Additionnal arguments given to the postgres process on startup. 1150 | 1151 | 1152 | 1153 | 1154 | 1155 | 1156 | 1157 | 1158 | 1159 | 1160 | 1161 | 1162 | 1163 | 1164 | 1165 | 1166 | 1167 | 1168 | 1169 | }; 1170 | return; 1171 | } 1172 | 1173 | 1174 | =head1 SUPPORTED ACTIONS 1175 | 1176 | This resource agent supports the following actions (operations): 1177 | 1178 | =over 1179 | 1180 | =item B 1181 | 1182 | Starts the resource. Suggested minimum timeout: 60. 1183 | 1184 | =item B 1185 | 1186 | Stops the resource. Suggested minimum timeout: 60. 1187 | 1188 | =item B 1189 | 1190 | Suggested minimum timeout: 20. 1191 | 1192 | =item B 1193 | 1194 | Promotes the resource to the Master role. Suggested minimum timeout: 30. 1195 | 1196 | =item B 1197 | 1198 | Demotes the resource to the Slave role. Suggested minimum timeout: 120. 1199 | 1200 | =item B 1201 | 1202 | Performs a detailed status check. Suggested minimum timeout: 10. 1203 | Suggested interval: 15. 1204 | 1205 | =item B 1206 | 1207 | Performs a detailed status check. Suggested minimum timeout: 10. 1208 | Suggested interval: 16. 1209 | 1210 | =item B 1211 | 1212 | Suggested minimum timeout: 60 1213 | 1214 | =item B 1215 | 1216 | Retrieves resource agent metadata (internal use only). 1217 | Suggested minimum timeout: 5. 1218 | 1219 | =item B 1220 | 1221 | Suggested minimum timeout: 5. 1222 | 1223 | =item B 1224 | 1225 | Performs a validation of the resource configuration. 1226 | Suggested minimum timeout: 5. 1227 | 1228 | =back 1229 | 1230 | =cut 1231 | 1232 | sub ocf_methods { 1233 | print q{ 1234 | start 1235 | stop 1236 | reload 1237 | promote 1238 | demote 1239 | monitor 1240 | notify 1241 | methods 1242 | meta-data 1243 | validate-all 1244 | }; 1245 | return; 1246 | } 1247 | 1248 | ############################################################ 1249 | #### RA FUNCS 1250 | 1251 | sub pgsql_validate_all { 1252 | my $fh; 1253 | my $ans = ''; 1254 | my $PGVERSION; 1255 | my @content; 1256 | my %cdata; 1257 | 1258 | unless ( 1259 | ocf_version_cmp( $ENV{"OCF_RESKEY_crm_feature_set"}, '3.0.9' ) == 2 1260 | ) { 1261 | ocf_exit_reason( 1262 | 'PAF %s is compatible with Pacemaker 1.1.13 and greater', 1263 | $VERSION 1264 | ); 1265 | exit $OCF_ERR_INSTALLED; 1266 | } 1267 | 1268 | # check notify=true 1269 | $ans = qx{ $CRM_RESOURCE --resource "$OCF_RESOURCE_INSTANCE" \\ 1270 | --meta --get-parameter notify 2>/dev/null }; 1271 | chomp $ans; 1272 | unless ( lc($ans) =~ /^true$|^on$|^yes$|^y$|^1$/ ) { 1273 | ocf_exit_reason( 1274 | 'You must set meta parameter notify=true for your master resource' 1275 | ); 1276 | exit $OCF_ERR_INSTALLED; 1277 | } 1278 | 1279 | # check master-max=1 1280 | unless ( 1281 | defined $ENV{'OCF_RESKEY_CRM_meta_master_max'} 1282 | and $ENV{'OCF_RESKEY_CRM_meta_master_max'} eq '1' 1283 | ) { 1284 | ocf_exit_reason( 1285 | 'You must set meta parameter master-max=1 for your master resource' 1286 | ); 1287 | exit $OCF_ERR_INSTALLED; 1288 | } 1289 | 1290 | # check pgdata 1291 | if ( ! -d $pgdata ) { 1292 | ocf_exit_reason( 'PGDATA "%s" does not exist', $pgdata ); 1293 | exit $OCF_ERR_ARGS; 1294 | } 1295 | 1296 | # check datadir 1297 | if ( ! -d $datadir ) { 1298 | ocf_exit_reason( 'data_directory "%s" does not exist', $datadir ); 1299 | exit $OCF_ERR_ARGS; 1300 | } 1301 | 1302 | # check PG_VERSION 1303 | if ( ! -s "$datadir/PG_VERSION" ) { 1304 | ocf_exit_reason( 'PG_VERSION does not exist in "%s"', $datadir ); 1305 | exit $OCF_ERR_ARGS; 1306 | } 1307 | 1308 | # check recovery template 1309 | if ( ! -f $recovery_tpl ) { 1310 | ocf_exit_reason( 'Recovery template file "%s" does not exist', 1311 | $recovery_tpl ); 1312 | exit $OCF_ERR_ARGS; 1313 | } 1314 | 1315 | # check content of the recovery template file 1316 | unless ( open( $fh, '<', $recovery_tpl ) ) { 1317 | ocf_exit_reason( 'Could not open file "%s": %s', $recovery_tpl, $! ); 1318 | exit $OCF_ERR_ARGS; 1319 | } 1320 | @content = <$fh>; 1321 | close $fh; 1322 | 1323 | unless ( looks_like_number($maxlag) ) { 1324 | ocf_exit_reason( 'maxlag is not a number: "%s"', $maxlag ); 1325 | exit $OCF_ERR_INSTALLED; 1326 | } 1327 | 1328 | unless ( grep /^\s*standby_mode\s*=\s*'?on'?\s*$/, @content ) { 1329 | ocf_exit_reason( 1330 | 'Recovery template file must contain "standby_mode = on"' ); 1331 | exit $OCF_ERR_ARGS; 1332 | } 1333 | 1334 | unless ( grep /^\s*recovery_target_timeline\s*=\s*'?latest'?\s*$/, @content ) { 1335 | ocf_exit_reason( 1336 | "Recovery template file must contain \"recovery_target_timeline = 'latest'\"" 1337 | ); 1338 | exit $OCF_ERR_ARGS; 1339 | } 1340 | 1341 | unless ( 1342 | grep /^\s*primary_conninfo\s*=.*['\s]application_name=$nodename['\s]/, 1343 | @content 1344 | ) { 1345 | ocf_exit_reason( 1346 | 'Recovery template file must contain in primary_conninfo parameter "application_name=%s"', 1347 | $nodename ); 1348 | exit $OCF_ERR_ARGS; 1349 | } 1350 | 1351 | # check system user 1352 | unless ( defined getpwnam $system_user ) { 1353 | ocf_exit_reason( 'System user "%s" does not exist', $system_user ); 1354 | exit $OCF_ERR_ARGS; 1355 | } 1356 | 1357 | # require 9.3 minimum 1358 | unless ( open( $fh, '<', "$datadir/PG_VERSION" ) ) { 1359 | ocf_exit_reason( "Could not open file \"$datadir/PG_VERSION\": $!" ); 1360 | exit $OCF_ERR_ARGS; 1361 | } 1362 | read( $fh, $PGVERSION, 64 ); 1363 | close $fh; 1364 | 1365 | chomp $PGVERSION; 1366 | 1367 | $PGVERSION =~ /^(\d+)(?:\.(\d+))?$/; 1368 | 1369 | $PGVERNUM = $1 * 10000; 1370 | 1371 | # postgresql >= 10 do not include the 2nd num in the major release 1372 | $PGVERNUM += $2 * 100 if $1 < 10; 1373 | 1374 | if ( $PGVERNUM < 90300 ) { 1375 | ocf_exit_reason( "PostgreSQL %s not supported. Require 9.3 and more", 1376 | $PGVERSION ); 1377 | exit $OCF_ERR_INSTALLED; 1378 | } 1379 | 1380 | $PGWALDUMP = "$bindir/pg_xlogdump" if $PGVERNUM < $PGVER_10; 1381 | 1382 | # check binaries 1383 | unless ( -x $PGCTL and -x $PGPSQL and -x $PGCTRLDATA and -x $PGISREADY 1384 | and -x $PGWALDUMP 1385 | ) { 1386 | ocf_exit_reason( 1387 | "Missing one or more binary. Check following path: %s, %s, %s, %s, %s", 1388 | $PGCTL, $PGPSQL, $PGCTRLDATA, $PGISREADY, $PGWALDUMP ); 1389 | exit $OCF_ERR_ARGS; 1390 | } 1391 | 1392 | # require wal_level >= hot_standby 1393 | %cdata = _get_controldata(); 1394 | unless ( $cdata{'wal_level'} =~ m{hot_standby|logical|replica} ) { 1395 | ocf_exit_reason( 1396 | 'wal_level must be one of "hot_standby", "logical" or "replica"' ); 1397 | exit $OCF_ERR_ARGS; 1398 | } 1399 | 1400 | return $OCF_SUCCESS; 1401 | } 1402 | 1403 | 1404 | # Start the PostgreSQL instance as a *secondary* 1405 | # 1406 | sub pgsql_start { 1407 | my $rc = pgsql_monitor(); 1408 | my %cdata = _get_controldata(); 1409 | my $prev_state = $cdata{'state'}; 1410 | 1411 | # Instance must be running as secondary or being stopped. 1412 | # Anything else is an error. 1413 | if ( $rc == $OCF_SUCCESS ) { 1414 | ocf_log( 'info', 'Instance "%s" already started', 1415 | $OCF_RESOURCE_INSTANCE ); 1416 | return $OCF_SUCCESS; 1417 | } 1418 | elsif ( $rc != $OCF_NOT_RUNNING ) { 1419 | ocf_exit_reason( 'Unexpected state for instance "%s" (returned %d)', 1420 | $OCF_RESOURCE_INSTANCE, $rc ); 1421 | return $OCF_ERR_GENERIC; 1422 | } 1423 | 1424 | # 1425 | # From here, the instance is NOT running for sure. 1426 | # 1427 | 1428 | ocf_log( 'debug', 1429 | 'pgsql_start: instance "%s" is not running, starting it as a secondary', 1430 | $OCF_RESOURCE_INSTANCE ); 1431 | 1432 | # Create recovery.conf from the template file. 1433 | _create_recovery_conf(); 1434 | 1435 | # Start the instance as a secondary. 1436 | $rc = _pg_ctl_start(); 1437 | 1438 | if ( $rc == 0 ) { 1439 | 1440 | # Wait for the start to finish. 1441 | sleep 1 while ( $rc = pgsql_monitor() ) == $OCF_NOT_RUNNING; 1442 | 1443 | if ( $rc == $OCF_SUCCESS ) { 1444 | ocf_log( 'info', 'Instance "%s" started', $OCF_RESOURCE_INSTANCE ); 1445 | 1446 | # Check if a master score exists in the cluster. 1447 | # During the very first start of the cluster, no master score will 1448 | # exists on any of the existing slaves, unless an admin designated 1449 | # one of them using crm_master. If no master exists the cluster will 1450 | # not promote a master among the slaves. 1451 | # To solve this situation, we check if there is at least one master 1452 | # score existing on one node in the cluster. Do nothing if at least 1453 | # one master score is found among the clones of the resource. If no 1454 | # master score exists, set a score of 1 only if the resource was a 1455 | # shut downed master before the start. 1456 | if ( $prev_state eq "shut down" and not _master_score_exists() ) { 1457 | ocf_log( 'info', 'No master score around. Set mine to 1' ); 1458 | 1459 | _set_master_score( '1' ); 1460 | } 1461 | 1462 | return $OCF_SUCCESS; 1463 | } 1464 | 1465 | ocf_exit_reason( 1466 | 'Instance "%s" is not running as a slave (returned %d)', 1467 | $OCF_RESOURCE_INSTANCE, $rc ); 1468 | 1469 | return $OCF_ERR_GENERIC; 1470 | } 1471 | 1472 | ocf_exit_reason( 'Instance "%s" failed to start (rc: %d)', 1473 | $OCF_RESOURCE_INSTANCE, $rc ); 1474 | 1475 | return $OCF_ERR_GENERIC; 1476 | } 1477 | 1478 | # Stop the PostgreSQL instance 1479 | # 1480 | sub pgsql_stop { 1481 | my $rc; 1482 | my $state; 1483 | my $pidfile = "$datadir/postmaster.pid"; 1484 | # Add 60s to the timeout or use a 24h timeout fallback to make sure 1485 | # Pacemaker will give up before us and take decisions 1486 | my $timeout = ( _get_action_timeout() || 60*60*24 ) + 60; 1487 | 1488 | # Instance must be running as secondary or primary or being stopped. 1489 | # Anything else is an error. 1490 | $rc = pgsql_monitor(); 1491 | if ( $rc == $OCF_NOT_RUNNING ) { 1492 | ocf_log( 'info', 'Instance "%s" already stopped', 1493 | $OCF_RESOURCE_INSTANCE ); 1494 | return $OCF_SUCCESS; 1495 | } 1496 | elsif ( $rc != $OCF_SUCCESS and $rc != $OCF_RUNNING_MASTER ) { 1497 | ocf_exit_reason( 'Unexpected state for instance "%s" (returned %d)', 1498 | $OCF_RESOURCE_INSTANCE, $rc ); 1499 | return $OCF_ERR_GENERIC; 1500 | } 1501 | 1502 | # 1503 | # From here, the instance is running for sure. 1504 | # 1505 | 1506 | ocf_log( 'debug', 'pgsql_stop: instance "%s" is running, stopping it', 1507 | $OCF_RESOURCE_INSTANCE ); 1508 | 1509 | # Try to quit with proper shutdown. 1510 | 1511 | 1512 | $rc = _runas( $PGCTL, '--pgdata', $pgdata, '-w', '--timeout', $timeout, 1513 | '-m', 'fast', 'stop' ); 1514 | 1515 | if ( $rc == 0 ) { 1516 | # Wait for the stop to finish. 1517 | sleep 1 while ( $rc = pgsql_monitor() ) != $OCF_NOT_RUNNING ; 1518 | 1519 | ocf_log( 'info', 'Instance "%s" stopped', $OCF_RESOURCE_INSTANCE ); 1520 | 1521 | return $OCF_SUCCESS; 1522 | } 1523 | 1524 | ocf_exit_reason( 'Instance "%s" failed to stop', $OCF_RESOURCE_INSTANCE ); 1525 | return $OCF_ERR_GENERIC; 1526 | } 1527 | 1528 | # Monitor the PostgreSQL instance 1529 | # 1530 | sub pgsql_monitor { 1531 | my $pgisready_rc; 1532 | my $controldata_rc; 1533 | 1534 | ocf_log( 'debug', 'pgsql_monitor: monitor is a probe' ) if ocf_is_probe(); 1535 | 1536 | # First check, verify if the instance is listening. 1537 | $pgisready_rc = _pg_isready(); 1538 | 1539 | if ( $pgisready_rc == 0 ) { 1540 | # The instance is listening. 1541 | # We confirm that the instance is up and return if it is a primary or a 1542 | # secondary 1543 | ocf_log( 'debug', 'pgsql_monitor: instance "%s" is listening', 1544 | $OCF_RESOURCE_INSTANCE ); 1545 | return _confirm_role(); 1546 | } 1547 | 1548 | if ( $pgisready_rc == 1 ) { 1549 | # The attempt was rejected. 1550 | # This could happen in several cases: 1551 | # - at startup 1552 | # - during shutdown 1553 | # - during crash recovery 1554 | # - if instance is a warm standby 1555 | # Except for the warm standby case, this should be a transitional state. 1556 | # We try to confirm using pg_controldata. 1557 | ocf_log( 'debug', 1558 | 'pgsql_monitor: instance "%s" rejects connections - checking again...', 1559 | $OCF_RESOURCE_INSTANCE ); 1560 | $controldata_rc = _controldata_to_ocf(); 1561 | 1562 | if ( $controldata_rc == $OCF_RUNNING_MASTER 1563 | or $controldata_rc == $OCF_SUCCESS 1564 | ) { 1565 | # This state indicates that pg_isready check should succeed. 1566 | # We check again. 1567 | ocf_log( 'debug', 1568 | 'pgsql_monitor: instance "%s" controldata shows a running status', 1569 | $OCF_RESOURCE_INSTANCE ); 1570 | 1571 | $pgisready_rc = _pg_isready(); 1572 | if ( $pgisready_rc == 0 ) { 1573 | # Consistent with pg_controdata output. 1574 | # We can check if the instance is primary or secondary 1575 | ocf_log( 'debug', 'pgsql_monitor: instance "%s" is listening', 1576 | $OCF_RESOURCE_INSTANCE ); 1577 | return _confirm_role(); 1578 | } 1579 | 1580 | # Still not consistent, raise an error. 1581 | # NOTE: if the instance is a warm standby, we end here. 1582 | # TODO raise an hard error here ? 1583 | ocf_exit_reason( 1584 | 'Instance "%s" controldata is not consistent with pg_isready (returned: %d)', 1585 | $OCF_RESOURCE_INSTANCE, $pgisready_rc ); 1586 | ocf_log( 'info', 1587 | 'If this instance is in warm standby, this resource agent only supports hot standby', 1588 | $OCF_RESOURCE_INSTANCE, $pgisready_rc ); 1589 | 1590 | return $OCF_ERR_GENERIC; 1591 | } 1592 | 1593 | if ( $controldata_rc == $OCF_NOT_RUNNING ) { 1594 | # This state indicates that pg_isready check should fail with rc 2. 1595 | # We check again. 1596 | $pgisready_rc = _pg_isready(); 1597 | if ( $pgisready_rc == 2 ) { 1598 | # Consistent with pg_controdata output. 1599 | # We check the process status using pg_ctl status and check 1600 | # if it was propertly shut down using pg_controldata. 1601 | ocf_log( 'debug', 1602 | 'pgsql_monitor: instance "%s" is not listening', 1603 | $OCF_RESOURCE_INSTANCE ); 1604 | return _confirm_stopped(); 1605 | } 1606 | # Still not consistent, raise an error. 1607 | # TODO raise an hard error here ? 1608 | ocf_exit_reason( 1609 | 'Instance "%s" controldata is not consistent with pg_isready (returned: %d)', 1610 | $OCF_RESOURCE_INSTANCE, $pgisready_rc ); 1611 | 1612 | return $OCF_ERR_GENERIC; 1613 | } 1614 | 1615 | # Something went wrong with the controldata check, hard fail. 1616 | ocf_exit_reason( 1617 | 'Could not get instance "%s" status from controldata (returned: %d)', 1618 | $OCF_RESOURCE_INSTANCE, $controldata_rc ); 1619 | 1620 | return $OCF_ERR_INSTALLED; 1621 | } 1622 | 1623 | elsif ( $pgisready_rc == 2 ) { 1624 | # The instance is not listening. 1625 | # We check the process status using pg_ctl status and check 1626 | # if it was propertly shut down using pg_controldata. 1627 | ocf_log( 'debug', 'pgsql_monitor: instance "%s" is not listening', 1628 | $OCF_RESOURCE_INSTANCE ); 1629 | return _confirm_stopped(); 1630 | } 1631 | 1632 | elsif ( $pgisready_rc == 3 ) { 1633 | # No attempt was done, probably a syntax error. 1634 | # Hard configuration error, we don't want to retry or failover here. 1635 | ocf_exit_reason( 1636 | 'Unknown error while checking if instance "%s" is listening (returned %d)', 1637 | $OCF_RESOURCE_INSTANCE, $pgisready_rc ); 1638 | 1639 | return $OCF_ERR_CONFIGURED; 1640 | } 1641 | 1642 | ocf_exit_reason( 'Unexpected result when checking instance "%s" status', 1643 | $OCF_RESOURCE_INSTANCE ); 1644 | 1645 | return $OCF_ERR_GENERIC; 1646 | } 1647 | 1648 | 1649 | # Demote the PostgreSQL instance from primary to secondary 1650 | # To demote a PostgreSQL instance, we must: 1651 | # * stop it gracefully 1652 | # * create recovery.conf with standby_mode = on 1653 | # * start it 1654 | # 1655 | sub pgsql_demote { 1656 | my $rc; 1657 | 1658 | $rc = pgsql_monitor(); 1659 | 1660 | # Running as primary. Normal, expected behavior. 1661 | if ( $rc == $OCF_RUNNING_MASTER ) { 1662 | ocf_log( 'debug', 'pgsql_demote: "%s" currently running as a primary', 1663 | $OCF_RESOURCE_INSTANCE ) ; 1664 | } 1665 | elsif ( $rc == $OCF_SUCCESS ) { 1666 | # Already running as secondary. Nothing to do. 1667 | ocf_log( 'debug', 1668 | 'pgsql_demote: "%s" currently running as a secondary', 1669 | $OCF_RESOURCE_INSTANCE ); 1670 | return $OCF_SUCCESS; 1671 | } 1672 | elsif ( $rc == $OCF_NOT_RUNNING ) { 1673 | # Instance is stopped. Nothing to do. 1674 | ocf_log( 'debug', 'pgsql_demote: "%s" currently shut down', 1675 | $OCF_RESOURCE_INSTANCE ); 1676 | } 1677 | elsif ( $rc == $OCF_ERR_CONFIGURED ) { 1678 | # We actually prefer raising a hard or fatal error instead of leaving 1679 | # the CRM abording its transition for a new one because of a soft error. 1680 | # The hard error will force the CRM to move the resource immediately. 1681 | return $OCF_ERR_CONFIGURED; 1682 | } 1683 | else { 1684 | return $OCF_ERR_GENERIC; 1685 | } 1686 | 1687 | # TODO we need to make sure at least one slave is connected!! 1688 | 1689 | # WARNING if the resource state is stopped instead of master, the ocf ra dev 1690 | # rsc advises to return OCF_ERR_GENERIC, misleading the CRM in a loop where 1691 | # it computes transitions of demote(failing)->stop->start->promote actions 1692 | # until failcount == migration-threshold. 1693 | # This is a really ugly trick to keep going with the demode action if the 1694 | # rsc is already stopped gracefully. 1695 | # See discussion "CRM trying to demote a stopped resource" on 1696 | # developers@clusterlabs.org 1697 | unless ( $rc == $OCF_NOT_RUNNING ) { 1698 | # Add 60s to the timeout or use a 24h timeout fallback to make sure 1699 | # Pacemaker will give up before us and take decisions 1700 | my $timeout = ( _get_action_timeout() || 60*60*24 ) + 60; 1701 | 1702 | # WARNING the instance **MUST** be stopped gracefully. 1703 | # Do **not** use pg_stop() or service or systemctl here as these 1704 | # commands might force-stop the PostgreSQL instance using immediate 1705 | # after some timeout and return success, which is misleading. 1706 | 1707 | $rc = _runas( $PGCTL, '--pgdata', $pgdata, '--mode', 'fast', '-w', 1708 | '--timeout', $timeout , 'stop' ); 1709 | 1710 | # No need to wait for stop to complete, this is handled in pg_ctl 1711 | # using -w option. 1712 | unless ( $rc == 0 ) { 1713 | ocf_exit_reason( 'Failed to stop "%s" using pg_ctl (returned %d)', 1714 | $OCF_RESOURCE_INSTANCE, $rc ); 1715 | return $OCF_ERR_GENERIC; 1716 | } 1717 | 1718 | # Double check that the instance is stopped correctly. 1719 | $rc = pgsql_monitor(); 1720 | unless ( $rc == $OCF_NOT_RUNNING ) { 1721 | ocf_exit_reason( 1722 | 'Unexpected "%s" state: monitor status (%d) disagree with pg_ctl return code', 1723 | $OCF_RESOURCE_INSTANCE, $rc ); 1724 | return $OCF_ERR_GENERIC; 1725 | } 1726 | } 1727 | 1728 | # 1729 | # At this point, the instance **MUST** be stopped gracefully. 1730 | # 1731 | 1732 | # Note: We do not need to handle the recovery.conf file here as pgsql_start 1733 | # deal with that itself. Equally, no need to wait for the start to complete 1734 | # here, handled in pgsql_start. 1735 | $rc = pgsql_start(); 1736 | if ( $rc == $OCF_SUCCESS ) { 1737 | ocf_log( 'info', 'pgsql_demote: "%s" started as a secondary', 1738 | $OCF_RESOURCE_INSTANCE ); 1739 | return $OCF_SUCCESS; 1740 | } 1741 | 1742 | # NOTE: No need to double check the instance state as pgsql_start already use 1743 | # pgsql_monitor to check the state before returning. 1744 | 1745 | ocf_exit_reason( 'Starting "%s" as a standby failed (returned %d)', 1746 | $OCF_RESOURCE_INSTANCE, $rc ); 1747 | return $OCF_ERR_GENERIC; 1748 | } 1749 | 1750 | 1751 | # Promote the secondary instance to primary 1752 | # 1753 | sub pgsql_promote { 1754 | my $rc; 1755 | my $cancel_switchover; 1756 | 1757 | $rc = pgsql_monitor(); 1758 | 1759 | if ( $rc == $OCF_SUCCESS ) { 1760 | # Running as slave. Normal, expected behavior. 1761 | ocf_log( 'debug', 'pgsql_promote: "%s" currently running as a standby', 1762 | $OCF_RESOURCE_INSTANCE ); 1763 | } 1764 | elsif ( $rc == $OCF_RUNNING_MASTER ) { 1765 | # Already a master. Unexpected, but not a problem. 1766 | ocf_log( 'info', '"%s" already running as a primary', 1767 | $OCF_RESOURCE_INSTANCE ); 1768 | return $OCF_SUCCESS; 1769 | } 1770 | elsif ( $rc == $OCF_NOT_RUNNING ) { # INFO this is not supposed to happen. 1771 | # Currently not running. Need to start before promoting. 1772 | ocf_log( 'info', '"%s" currently not running, starting it', 1773 | $OCF_RESOURCE_INSTANCE ); 1774 | 1775 | $rc = pgsql_start(); 1776 | if ( $rc != $OCF_SUCCESS ) { 1777 | ocf_exit_reason( 'Failed to start the instance "%s"', 1778 | $OCF_RESOURCE_INSTANCE ); 1779 | return $OCF_ERR_GENERIC; 1780 | } 1781 | } 1782 | else { 1783 | ocf_exit_reason( 'Unexpected error, cannot promote "%s"', 1784 | $OCF_RESOURCE_INSTANCE ); 1785 | return $OCF_ERR_GENERIC; 1786 | } 1787 | 1788 | # 1789 | # At this point, the instance **MUST** be started as a secondary. 1790 | # 1791 | 1792 | # Cancel the switchover if it has been considered not safe during the 1793 | # pre-promote action 1794 | $cancel_switchover = _get_priv_attr('cancel_switchover'); 1795 | if ( $cancel_switchover ) { # if not empty or not 0 1796 | ocf_exit_reason( 1797 | 'Switchover has been canceled from pre-promote action' ); 1798 | 1799 | _delete_priv_attr( 'cancel_switchover' ); 1800 | 1801 | return $OCF_ERR_GENERIC if $cancel_switchover eq '1'; 1802 | return $OCF_ERR_ARGS; # ban the resource from the node if we have an 1803 | # internal error during _check_switchover 1804 | } 1805 | 1806 | # Do not check for a better candidate if we try to recover the master 1807 | # Recover of a master is detected during the pre-promote action. It sets the 1808 | # private attribute 'recover_master' to '1' if this is a master recover. 1809 | if ( _get_priv_attr( 'recover_master' ) eq '1' ) { 1810 | ocf_log( 'info', 'Recovering old master, no election needed'); 1811 | } 1812 | else { 1813 | 1814 | # The promotion is occurring on the best known candidate (highest 1815 | # master score), as chosen by pacemaker during the last working monitor 1816 | # on previous master (see pgsql_monitor/_check_locations subs). 1817 | # To avoid any race condition between the last monitor action on the 1818 | # previous master and the **real** most up-to-date standby, we 1819 | # set each standby location during the "pre-promote" action, and stored 1820 | # them using the "lsn_location" resource attribute. 1821 | # 1822 | # The best standby to promote would have the highest known LSN. If the 1823 | # current resource is not the best one, we need to modify the master 1824 | # scores accordingly, and abort the current promotion. 1825 | ocf_log( 'debug', 1826 | 'pgsql_promote: checking if current node is the best candidate for promotion' ); 1827 | 1828 | # Exclude nodes that are known to be unavailable (not in the current 1829 | # partition) using the "crm_node" command 1830 | my @active_nodes = split /\s+/ => _get_priv_attr( 'nodes' ); 1831 | my $node_to_promote = ''; 1832 | my $ans; 1833 | my $max_tl; 1834 | my $max_lsn; 1835 | my $node_tl; 1836 | my $node_lsn; 1837 | my $wal_num; 1838 | my $wal_off; 1839 | 1840 | # Get the "lsn_location" attribute value for the current node, as set 1841 | # during the "pre-promote" action. 1842 | # It should be the greatest among the secondary instances. 1843 | $ans = _get_priv_attr( 'lsn_location' ); 1844 | 1845 | if ( $ans eq '' ) { 1846 | # This should not happen as the "lsn_location" attribute should have 1847 | # been updated during the "pre-promote" action. 1848 | ocf_exit_reason( 'Can not get current node LSN location' ); 1849 | return $OCF_ERR_GENERIC; 1850 | } 1851 | 1852 | chomp $ans; 1853 | ( $max_tl, $max_lsn ) = split /#/, $ans; 1854 | 1855 | ocf_log( 'debug', 'pgsql_promote: current node TL#LSN location: %s#%s', 1856 | $max_tl, $max_lsn ); 1857 | 1858 | # Now we compare with the other available nodes. 1859 | foreach my $node ( @active_nodes ) { 1860 | # We exclude the current node from the check. 1861 | next if $node eq $nodename; 1862 | 1863 | # Get the "lsn_location" attribute value for the node, as set during 1864 | # the "pre-promote" action. 1865 | $ans = _get_priv_attr( 'lsn_location', $node ); 1866 | 1867 | if ( $ans eq '' ) { 1868 | # This should not happen as the "lsn_location" attribute should 1869 | # have been updated during the "pre-promote" action. 1870 | ocf_exit_reason( 'Can not get LSN location for "%s"', $node ); 1871 | return $OCF_ERR_GENERIC; 1872 | } 1873 | 1874 | # convert location to decimal 1875 | chomp $ans; 1876 | ( $node_tl, $node_lsn ) = split /#/, $ans; 1877 | 1878 | ocf_log( 'debug', 1879 | 'pgsql_promote: comparing with "%s": TL#LSN is %s#%s', 1880 | $node, $node_tl, $node_lsn ); 1881 | 1882 | # If the node has a higher LSN, select it as a best candidate to 1883 | # promotion and keep looping to check the TL/LSN of other nodes. 1884 | if ( $node_tl > $max_tl 1885 | or ( $node_tl == $max_tl and $node_lsn > $max_lsn ) 1886 | ) { 1887 | ocf_log( 'debug', 1888 | 'pgsql_promote: "%s" is a better candidate to promote (%s#%s > %s#%s)', 1889 | $node, $node_tl, $node_lsn, $max_tl, $max_lsn ); 1890 | $node_to_promote = $node; 1891 | $max_tl = $node_tl; 1892 | $max_lsn = $node_lsn; 1893 | } 1894 | } 1895 | 1896 | # If any node has been selected, we adapt the master scores accordingly 1897 | # and break the current promotion. 1898 | if ( $node_to_promote ne '' ) { 1899 | ocf_exit_reason( 1900 | '%s is the best candidate to promote, aborting current promotion', 1901 | $node_to_promote ); 1902 | 1903 | # Reset current node master score. 1904 | _set_master_score( '1' ); 1905 | 1906 | # Set promotion candidate master score. 1907 | _set_master_score( '1000', $node_to_promote ); 1908 | 1909 | # We fail the promotion to trigger another promotion transition 1910 | # with the new scores. 1911 | return $OCF_ERR_GENERIC; 1912 | } 1913 | 1914 | # Else, we will keep on promoting the current node. 1915 | } 1916 | 1917 | unless ( 1918 | # Promote the instance on the current node. 1919 | _runas( $PGCTL, '--pgdata', $pgdata, '-w', 'promote' ) == 0 ) 1920 | { 1921 | ocf_exit_reason( 'Error during promotion command' ); 1922 | return $OCF_ERR_GENERIC; 1923 | } 1924 | 1925 | # The instance promotion is asynchronous, so we need to wait for this 1926 | # process to complete. 1927 | while ( pgsql_monitor() != $OCF_RUNNING_MASTER ) { 1928 | ocf_log( 'info', 'Waiting for the promote to complete' ); 1929 | sleep 1; 1930 | } 1931 | 1932 | ocf_log( 'info', 'Promote complete' ); 1933 | 1934 | return $OCF_SUCCESS; 1935 | } 1936 | 1937 | # This action is called **before** the actual promotion when a failing master is 1938 | # considered unreclaimable, recoverable or a new master must be promoted 1939 | # (switchover or first start). 1940 | # As every "notify" action, it is executed almost simultaneously on all 1941 | # available nodes. 1942 | sub pgsql_notify_pre_promote { 1943 | my $rc; 1944 | my $node_tl; 1945 | my $node_lsn; 1946 | my %cdata; 1947 | my %active_nodes; 1948 | my $attr_nodes; 1949 | 1950 | ocf_log( 'info', 'Promoting instance on node "%s"', 1951 | $OCF_NOTIFY_ENV{'promote'}[0]{'uname'} ); 1952 | 1953 | # No need to do an election between slaves if this is recovery of the master 1954 | if ( _is_master_recover( $OCF_NOTIFY_ENV{'promote'}[0]{'uname'} ) ) { 1955 | ocf_log( 'warning', 'This is a master recovery!' ); 1956 | 1957 | _set_priv_attr( 'recover_master', '1' ) 1958 | if $OCF_NOTIFY_ENV{'promote'}[0]{'uname'} eq $nodename; 1959 | 1960 | return $OCF_SUCCESS; 1961 | } 1962 | 1963 | # Environment cleanup! 1964 | _delete_priv_attr( 'lsn_location' ); 1965 | _delete_priv_attr( 'recover_master' ); 1966 | _delete_priv_attr( 'nodes' ); 1967 | _delete_priv_attr( 'cancel_switchover' ); 1968 | 1969 | # check for the last received entry of WAL from the master if we are 1970 | # the designated slave to promote 1971 | if ( _is_switchover( $nodename ) and scalar 1972 | grep { $_->{'uname'} eq $nodename } @{ $OCF_NOTIFY_ENV{'promote'} } 1973 | ) { 1974 | $rc = _check_switchover(); 1975 | 1976 | unless ( $rc == 0 ) { 1977 | # Shortcut the election process as the switchover will be 1978 | # canceled 1979 | _set_priv_attr( 'cancel_switchover', $rc ); 1980 | return $OCF_SUCCESS; # return code is ignored during notify 1981 | } 1982 | 1983 | # If the sub keeps going, that means the switchover is safe. 1984 | # Keep going with the election process in case the switchover was 1985 | # instruct to the wrong node. 1986 | # FIXME: should we allow a switchover to a lagging slave? 1987 | } 1988 | 1989 | # We need to trigger an election between existing slaves to promote the best 1990 | # one based on its current LSN location. Each node set a private attribute 1991 | # "lsn_location" with its TL and LSN location. 1992 | # 1993 | # During the following promote action, The designated standby for 1994 | # promotion use these attributes to check if the instance to be promoted 1995 | # is the best one, so we can avoid a race condition between the last 1996 | # successful monitor on the previous master and the current promotion. 1997 | 1998 | # As we can not break the transition from a notification action, we check 1999 | # during the promotion if each node TL and LSN are valid. 2000 | 2001 | # Force a checpoint to make sure the controldata shows the very last TL 2002 | _query( q{ CHECKPOINT }, {} ); 2003 | %cdata = _get_controldata(); 2004 | $node_lsn = _get_last_received_lsn( 'in decimal' ); 2005 | 2006 | unless ( defined $node_lsn ) { 2007 | ocf_log( 'warning', 'Unknown current node LSN' ); 2008 | # Return code are ignored during notifications... 2009 | return $OCF_SUCCESS; 2010 | } 2011 | 2012 | $node_lsn = "$cdata{'tl'}#$node_lsn"; 2013 | 2014 | ocf_log( 'info', 'Current node TL#LSN: %s', $node_lsn ); 2015 | 2016 | # Set the "lsn_location" attribute value for this node so we can use it 2017 | # during the following "promote" action. 2018 | _set_priv_attr( 'lsn_location', $node_lsn ); 2019 | 2020 | ocf_log( 'warning', 'Could not set the current node LSN' ) 2021 | if $? != 0 ; 2022 | 2023 | # If this node is the future master, keep track of the slaves that 2024 | # received the same notification to compare our LSN with them during 2025 | # promotion 2026 | if ( $OCF_NOTIFY_ENV{'promote'}[0]{'uname'} eq $nodename ) { 2027 | # Build the list of active nodes: 2028 | #  master + slave + start - stop 2029 | # FIXME: Deal with rsc started during the same transaction but **after** 2030 | # the promotion ? 2031 | $active_nodes{ $_->{'uname'} }++ foreach @{ $OCF_NOTIFY_ENV{'active'} }, 2032 | @{ $OCF_NOTIFY_ENV{'start'} }; 2033 | $active_nodes{ $_->{'uname'} }-- foreach @{ $OCF_NOTIFY_ENV{'stop'} }; 2034 | 2035 | $attr_nodes = join " " 2036 | => grep { $active_nodes{$_} > 0 } keys %active_nodes; 2037 | 2038 | _set_priv_attr( 'nodes', $attr_nodes ); 2039 | } 2040 | 2041 | return $OCF_SUCCESS; 2042 | } 2043 | 2044 | # This action is called after a promote action. 2045 | sub pgsql_notify_post_promote { 2046 | 2047 | # We have a new master (or the previous one recovered). 2048 | # Environment cleanup! 2049 | _delete_priv_attr( 'lsn_location' ); 2050 | _delete_priv_attr( 'recover_master' ); 2051 | _delete_priv_attr( 'nodes' ); 2052 | _delete_priv_attr( 'cancel_switchover' ); 2053 | 2054 | return $OCF_SUCCESS; 2055 | } 2056 | 2057 | # This is called before a demote occurs. 2058 | sub pgsql_notify_pre_demote { 2059 | my $rc; 2060 | my %cdata; 2061 | 2062 | # do nothing if the local node will not be demoted 2063 | return $OCF_SUCCESS unless scalar 2064 | grep { $_->{'uname'} eq $nodename } @{ $OCF_NOTIFY_ENV{'demote'} }; 2065 | 2066 | $rc = pgsql_monitor(); 2067 | 2068 | # do nothing if this is not a master recovery 2069 | return $OCF_SUCCESS unless _is_master_recover( $nodename ) 2070 | and $rc == $OCF_FAILED_MASTER; 2071 | 2072 | # in case of master crash, we need to detect if the CRM tries to recover 2073 | # the master clone. The usual transition is to do: 2074 | # demote->stop->start->promote 2075 | # 2076 | # There are multiple flaws with this transition: 2077 | # * the 1st and 2nd actions will fail because the instance is in 2078 | # OCF_FAILED_MASTER step 2079 | # * the usual start action is dangerous as the instance will start with 2080 | # a recovery.conf instead of entering a normal recovery process 2081 | # 2082 | # To avoid this, we try to start the instance in recovery from here. 2083 | # If it success, at least it will be demoted correctly with a normal 2084 | # status. If it fails, it will be catched up in next steps. 2085 | 2086 | ocf_log( 'info', 'Trying to start failing master "%s"...', 2087 | $OCF_RESOURCE_INSTANCE ); 2088 | 2089 | # Either the instance managed to start or it couldn't. 2090 | # We rely on the pg_ctk '-w' switch to take care of this. If it couldn't 2091 | # start, this error will be catched up later during the various checks 2092 | _pg_ctl_start(); 2093 | 2094 | %cdata = _get_controldata(); 2095 | 2096 | ocf_log( 'info', 'State is "%s" after recovery attempt', $cdata{'state'} ); 2097 | 2098 | return $OCF_SUCCESS; 2099 | } 2100 | 2101 | # This is called before a stop occurs. 2102 | sub pgsql_notify_pre_stop { 2103 | my $rc; 2104 | my %cdata; 2105 | 2106 | # do nothing if the local node will not be stopped 2107 | return $OCF_SUCCESS unless scalar 2108 | grep { $_->{'uname'} eq $nodename } @{ $OCF_NOTIFY_ENV{'stop'} }; 2109 | 2110 | $rc = _controldata_to_ocf(); 2111 | 2112 | # do nothing if this is not a slave recovery 2113 | return $OCF_SUCCESS unless _is_slave_recover( $nodename ) 2114 | and $rc == $OCF_RUNNING_SLAVE; 2115 | 2116 | # in case of slave crash, we need to detect if the CRM tries to recover 2117 | # the slaveclone. The usual transition is to do: stop->start 2118 | # 2119 | # This transition can no twork because the instance is in 2120 | # OCF_ERR_GENERIC step. So the stop action will fail, leading most 2121 | # probably to fencing action. 2122 | # 2123 | # To avoid this, we try to start the instance in recovery from here. 2124 | # If it success, at least it will be stopped correctly with a normal 2125 | # status. If it fails, it will be catched up in next steps. 2126 | 2127 | ocf_log( 'info', 'Trying to start failing slave "%s"...', 2128 | $OCF_RESOURCE_INSTANCE ); 2129 | 2130 | # Either the instance managed to start or it couldn't. 2131 | # We rely on the pg_ctk '-w' switch to take care of this. If it couldn't 2132 | # start, this error will be catched up later during the various checks 2133 | _pg_ctl_start(); 2134 | 2135 | %cdata = _get_controldata(); 2136 | 2137 | ocf_log( 'info', 'State is "%s" after recovery attempt', $cdata{'state'} ); 2138 | 2139 | return $OCF_SUCCESS; 2140 | } 2141 | 2142 | # Notify type actions, called on all available nodes before (pre) and after 2143 | # (post) other actions, like promote, start, ... 2144 | # 2145 | sub pgsql_notify { 2146 | my $type_op; 2147 | 2148 | ocf_log( 'debug', "pgsql_notify: environment variables: %s", 2149 | Data::Dumper->new( [ \%OCF_NOTIFY_ENV ] )->Sortkeys(1)->Terse(1)->Dump ); 2150 | 2151 | return unless %OCF_NOTIFY_ENV; 2152 | 2153 | $type_op = "$OCF_NOTIFY_ENV{'type'}-$OCF_NOTIFY_ENV{'operation'}"; 2154 | 2155 | for ( $type_op ) { 2156 | if ( /^pre-promote$/ ) { return pgsql_notify_pre_promote() } 2157 | elsif ( /^post-promote$/ ) { return pgsql_notify_post_promote() } 2158 | elsif ( /^pre-demote$/ ) { return pgsql_notify_pre_demote() } 2159 | elsif ( /^pre-stop$/ ) { return pgsql_notify_pre_stop() } 2160 | } 2161 | 2162 | return $OCF_SUCCESS; 2163 | } 2164 | 2165 | # Action used to allow for online modification of resource parameters value. 2166 | # 2167 | sub pgsql_reload { 2168 | 2169 | # No action necessary, the action declaration is enough to inform pacemaker 2170 | # that the modification of any non-unique parameter can be applied without 2171 | # having to restart the resource. 2172 | ocf_log( 'info', 'Instance "%s" reloaded', $OCF_RESOURCE_INSTANCE ); 2173 | return $OCF_SUCCESS; 2174 | 2175 | } 2176 | 2177 | ############################################################ 2178 | #### MAIN 2179 | 2180 | # Avoid "could not change directory" when executing commands as "system-user". 2181 | chdir File::Spec->tmpdir(); 2182 | 2183 | # Set current node name. 2184 | $nodename = ocf_local_nodename(); 2185 | 2186 | if ( $OCF_ACTION =~ /^(?:start|stop|reload|monitor|promote|demote|notify)$/ ) { 2187 | pgsql_validate_all(); 2188 | # No need to validate for meta-data, methods or validate-all. 2189 | } 2190 | 2191 | # Run action 2192 | for ( $OCF_ACTION ) { 2193 | if ( /^start$/ ) { $exit_code = pgsql_start() } 2194 | elsif ( /^stop$/ ) { $exit_code = pgsql_stop() } 2195 | elsif ( /^monitor$/ ) { $exit_code = pgsql_monitor() } 2196 | elsif ( /^promote$/ ) { $exit_code = pgsql_promote() } 2197 | elsif ( /^demote$/ ) { $exit_code = pgsql_demote() } 2198 | elsif ( /^notify$/ ) { $exit_code = pgsql_notify() } 2199 | elsif ( /^reload$/ ) { $exit_code = pgsql_reload() } 2200 | elsif ( /^validate-all$/ ) { $exit_code = pgsql_validate_all() } 2201 | elsif ( /^meta-data$/ ) { ocf_meta_data() } 2202 | elsif ( /^methods$/ ) { ocf_methods() } 2203 | else { $exit_code = $OCF_ERR_UNIMPLEMENTED } 2204 | } 2205 | 2206 | exit $exit_code; 2207 | 2208 | 2209 | =head1 EXAMPLE CRM SHELL 2210 | 2211 | The following is an example configuration for a pgsqlms resource using the 2212 | crm(8) shell: 2213 | 2214 | primitive pgsqld pgsqlms \ 2215 | params pgdata="/var/lib/postgresql/9.6/main" \ 2216 | bindir="/usr/lib/postgresql/9.6/bin" \ 2217 | pghost="/var/run/postgresql" \ 2218 | recovery_template="/etc/postgresql/9.6/main/recovery.conf.pcmk" \ 2219 | start_opts="-c config_file=/etc/postgresql/9.6/main/postgresql.conf" \ 2220 | op start timeout=60s \ 2221 | op stop timeout=60s \ 2222 | op promote timeout=30s \ 2223 | op demote timeout=120s \ 2224 | op monitor interval=15s timeout=10s role="Master" \ 2225 | op monitor interval=16s timeout=10s role="Slave" \ 2226 | op notify timeout=60s 2227 | 2228 | ms pgsql-ha pgsqld meta notify=true 2229 | 2230 | 2231 | =head1 EXAMPLE PCS 2232 | 2233 | The following is an example configuration for a pgsqlms resource using pcs(8): 2234 | 2235 | pcs resource create pgsqld ocf:heartbeat:pgsqlms \ 2236 | bindir=/usr/pgsql-9.6/bin pgdata=/var/lib/pgsql/9.6/data \ 2237 | op start timeout=60s \ 2238 | op stop timeout=60s \ 2239 | op promote timeout=30s \ 2240 | op demote timeout=120s \ 2241 | op monitor interval=15s timeout=10s role="Master" \ 2242 | op monitor interval=16s timeout=10s role="Slave" \ 2243 | op notify timeout=60s --master notify=true 2244 | 2245 | =head1 SEE ALSO 2246 | 2247 | http://clusterlabs.org/ 2248 | 2249 | =head1 AUTHOR 2250 | 2251 | Jehan-Guillaume de Rorthais and Mael Rimbault. 2252 | 2253 | =cut 2254 | -------------------------------------------------------------------------------- /files/resource-agents-paf-1.1.0-1.noarch.rpm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YanChii/ansible-role-postgres-ha/20361e1d8183858a6a8656903250a465937c98db/files/resource-agents-paf-1.1.0-1.noarch.rpm -------------------------------------------------------------------------------- /files/resource-agents-paf-2.2.0-1.noarch.rpm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YanChii/ansible-role-postgres-ha/20361e1d8183858a6a8656903250a465937c98db/files/resource-agents-paf-2.2.0-1.noarch.rpm -------------------------------------------------------------------------------- /files/resource-agents-paf-2.2.1-1.noarch.rpm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YanChii/ansible-role-postgres-ha/20361e1d8183858a6a8656903250a465937c98db/files/resource-agents-paf-2.2.1-1.noarch.rpm -------------------------------------------------------------------------------- /handlers/main.yml: -------------------------------------------------------------------------------- 1 | --- 2 | # vim: set filetype=yaml expandtab tabstop=2 shiftwidth=2 softtabstop=2 background=dark : 3 | # handlers file for postgresql-ha 4 | 5 | - name: restart postgresql 6 | service: name="{{ postgres_ha_pg_systemd_svcname }}" state=restarted 7 | 8 | - name: reload postgresql 9 | shell: "{{ postgres_ha_pg_bindir }}/pg_ctl -D {{ postgres_ha_pg_data }} reload" 10 | become: yes 11 | become_user: postgres 12 | vars: 13 | ansible_ssh_pipelining: no 14 | 15 | - name: reload corosync 16 | shell: pcs cluster reload corosync 17 | 18 | - name: restart corosync 19 | service: name=corosync state=restarted 20 | 21 | - name: reload systemd 22 | shell: /bin/systemctl daemon-reload 23 | 24 | -------------------------------------------------------------------------------- /library/pcs_property: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # -*- coding: utf-8 -*- 3 | 4 | DOCUMENTATION = ''' 5 | --- 6 | module: pcs_property 7 | short_description: Manages I(pacemaker) cluster properties with pcs tool. 8 | options: 9 | state: 10 | required: false 11 | default: present 12 | choices: [ "absent", "present" ] 13 | name: 14 | required: true 15 | description: name of the property. 16 | value: 17 | required: true 18 | description: value of the property. 19 | ''' 20 | 21 | def main(): 22 | module = AnsibleModule( 23 | argument_spec = dict( 24 | state = dict(default='present', choices=['present', 'absent']), 25 | name = dict(required=True), 26 | value = dict(required=True), 27 | ), 28 | supports_check_mode=True, 29 | ) 30 | 31 | # TODO check pcs command is available. 32 | # TODO check pacemaker/corosync is running. 33 | 34 | # Get current property value. 35 | cmd = "pcs property list %(name)s | awk '/^ / { print $2}'" % module.params 36 | rc, out, err = module.run_command(cmd, use_unsafe_shell=True) 37 | value = out.strip() 38 | 39 | if module.params['state'] == 'absent': 40 | print "absent?=?" 41 | if value != '': 42 | changed = True 43 | if not module.check_mode: 44 | cmd = 'pcs property unset %(name)s' % module.params 45 | module.run_command(cmd) 46 | else: 47 | changed = False 48 | module.exit_json(changed=changed) 49 | else: 50 | print "VALUES: %s - %s" % (value, module.params['value']) 51 | if value != module.params['value']: 52 | changed = True 53 | if not module.check_mode: 54 | cmd = 'pcs property set %(name)s=%(value)s' % module.params 55 | module.run_command(cmd) 56 | else: 57 | changed = False 58 | module.exit_json(changed=changed, prev="|%s|" % value, msg="%(name)s=%(value)s" % module.params) 59 | 60 | # import module snippets 61 | from ansible.module_utils.basic import * 62 | main() 63 | 64 | -------------------------------------------------------------------------------- /library/pcs_resource: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | # -*- coding: utf-8 -*- 3 | 4 | DOCUMENTATION = ''' 5 | --- 6 | module: pcs_resource 7 | short_description: Manages I(pacemaker) cluster resources with pcs tool. 8 | options: 9 | command: 10 | required: true 11 | description: Supported commands. 12 | choices: [ "create", "master"] 13 | resource_id: 14 | required: true 15 | description: Id of the resource. 16 | type: 17 | required: true 18 | description: type of resource. Used in «create» command. 19 | ms_name: 20 | required: true 21 | description: name of the resource. Used in «master» command. 22 | group: 23 | required: false 24 | description: add the resource to specified group. 25 | options: 26 | required: false 27 | description: hash of resource options. 28 | operations: 29 | required: false 30 | description: list of hashes of operations. Used in «create» command. 31 | disabled: 32 | required: false 33 | type: bool 34 | description: don't start resource after creation. 35 | ''' 36 | 37 | def main(): 38 | module = AnsibleModule( 39 | argument_spec = dict( 40 | command = dict(choices=['create', 'master']), 41 | name = dict(required=True, aliases=['resource_id']), 42 | ms_name = dict(required=False, type='str'), 43 | type = dict(required=False), 44 | group = dict(required=False, type='str'), 45 | options = dict(required=False, type='dict'), 46 | operations = dict(required=False, type='raw'), 47 | disabled = dict(required=False, type='bool'), 48 | ), 49 | supports_check_mode=True, 50 | ) 51 | 52 | # TODO check pcs command is available. 53 | # TODO check pacemaker/corosync is running. 54 | 55 | # Check if resource already exists. 56 | cmd = "pcs resource show %(name)s" % module.params 57 | rc, out, err = module.run_command(cmd) 58 | exists = (rc is 0) 59 | 60 | if exists: 61 | module.exit_json(changed=False, msg="Resource already exists.") 62 | elif module.check_mode: 63 | module.exit_json(changed=True) 64 | 65 | # Validate and process command specific params. 66 | if module.params['command'] == 'create': 67 | if not module.params.has_key('type'): 68 | module.fail_json(msg="missing required arguments: type.") 69 | if not module.params.has_key('options'): 70 | module.fail_json(msg="missing required arguments: options.") 71 | # Command template. 72 | cmd = 'pcs resource %(command)s %(resource_id)s %(type)s %(options)s' 73 | # Process operations. 74 | if module.params.has_key('operations'): 75 | cmd += ' %(operations)s' 76 | operations = [] 77 | for op in module.params['operations']: 78 | op['options'] = ' '.join(['%s="%s"' % (key, value) for (key, value) in op['options'].items()]) 79 | operations.append('op %(action)s %(options)s' % op) 80 | module.params['operations'] = ' '.join(operations) 81 | 82 | elif module.params['command'] == 'master': 83 | if not module.params.has_key('options'): 84 | module.fail_json(msg="missing required arguments: options.") 85 | if not module.params.has_key('ms_name'): 86 | module.fail_json(msg="missing required arguments: ms_name.") 87 | # Command template. 88 | cmd = 'pcs resource %(command)s %(name)s %(ms_name)s %(options)s' 89 | 90 | # Process options. 91 | if module.params.has_key('options'): 92 | options = module.params['options'] 93 | if options: 94 | options = ' '.join(['%s="%s"' % (key, value) for (key, value) in options.items()]) 95 | module.params['options'] = options 96 | 97 | if module.params.has_key('group'): 98 | if module.params['group']: 99 | cmd += ' --group ' + module.params['group'] 100 | 101 | if module.params.has_key('disabled'): 102 | if module.params['disabled']: 103 | cmd += ' --disabled' 104 | 105 | # Run command. 106 | cmd = cmd % module.params 107 | message = 'Running cmd: %s' % cmd 108 | rc, out, err = module.run_command(cmd) 109 | if rc is 1: 110 | module.fail_json(msg="Execution failed.\nCommand: `%s`\nError: %s" % (cmd, err)) 111 | 112 | module.exit_json(changed=True, msg=message) 113 | 114 | # import module snippets 115 | from ansible.module_utils.basic import * 116 | main() 117 | 118 | -------------------------------------------------------------------------------- /meta/main.yml: -------------------------------------------------------------------------------- 1 | galaxy_info: 2 | author: YanChii 3 | description: 4 | company: Danube Cloud 5 | 6 | # If the issue tracker for your role is not on github, uncomment the 7 | # next line and provide a value 8 | # issue_tracker_url: http://example.com/issue/tracker 9 | 10 | license: Apache 11 | 12 | min_ansible_version: 2.0 13 | 14 | # Optionally specify the branch Galaxy will use when accessing the GitHub 15 | # repo for this role. During role install, if no tags are available, 16 | # Galaxy will use this branch. During import Galaxy will access files on 17 | # this branch. If travis integration is cofigured, only notification for this 18 | # branch will be accepted. Otherwise, in all cases, the repo's default branch 19 | # (usually master) will be used. 20 | #github_branch: 21 | 22 | # 23 | # Below are all platforms currently available. Just uncomment 24 | # the ones that apply to your role. If you don't see your 25 | # platform on this list, let us know and we'll get it added! 26 | # 27 | platforms: 28 | #- name: OpenBSD 29 | # versions: 30 | # - all 31 | # - 5.6 32 | # - 5.7 33 | # - 5.8 34 | # - 5.9 35 | # - 6.0 36 | #- name: Fedora 37 | # versions: 38 | # - all 39 | # - 16 40 | # - 17 41 | # - 18 42 | # - 19 43 | # - 20 44 | # - 21 45 | # - 22 46 | # - 23 47 | # - 24 48 | # - 25 49 | #- name: DellOS 50 | # versions: 51 | # - all 52 | # - 10 53 | # - 6 54 | # - 9 55 | #- name: MacOSX 56 | # versions: 57 | # - all 58 | # - 10.10 59 | # - 10.11 60 | # - 10.12 61 | # - 10.7 62 | # - 10.8 63 | # - 10.9 64 | #- name: Synology 65 | # versions: 66 | # - all 67 | # - any 68 | #- name: Junos 69 | # versions: 70 | # - all 71 | # - any 72 | #- name: GenericBSD 73 | # versions: 74 | # - all 75 | # - any 76 | #- name: Void Linux 77 | # versions: 78 | # - all 79 | # - any 80 | #- name: GenericLinux 81 | # versions: 82 | # - all 83 | # - any 84 | #- name: NXOS 85 | # versions: 86 | # - all 87 | # - any 88 | #- name: IOS 89 | # versions: 90 | # - all 91 | # - any 92 | #- name: Amazon 93 | # versions: 94 | # - all 95 | # - 2013.03 96 | # - 2013.09 97 | # - 2016.03 98 | # - 2016.09 99 | #- name: ArchLinux 100 | # versions: 101 | # - all 102 | # - any 103 | #- name: FreeBSD 104 | # versions: 105 | # - all 106 | # - 10.0 107 | # - 10.1 108 | # - 10.2 109 | # - 10.3 110 | # - 11.0 111 | # - 8.0 112 | # - 8.1 113 | # - 8.2 114 | # - 8.3 115 | # - 8.4 116 | # - 9.0 117 | # - 9.1 118 | # - 9.1 119 | # - 9.2 120 | # - 9.3 121 | #- name: Ubuntu 122 | # versions: 123 | # - all 124 | # - lucid 125 | # - maverick 126 | # - natty 127 | # - oneiric 128 | # - precise 129 | # - quantal 130 | # - raring 131 | # - saucy 132 | # - trusty 133 | # - utopic 134 | # - vivid 135 | # - wily 136 | # - xenial 137 | # - yakkety 138 | #- name: Debian 139 | # versions: 140 | # - all 141 | # - etch 142 | # - jessie 143 | # - lenny 144 | # - sid 145 | # - squeeze 146 | # - stretch 147 | # - wheezy 148 | #- name: Alpine 149 | # versions: 150 | # - all 151 | # - any 152 | - name: EL 153 | versions: 154 | # - all 155 | # - 5 156 | # - 6 157 | - 7 158 | #- name: Windows 159 | # versions: 160 | # - all 161 | # - 2012R2 162 | #- name: SmartOS 163 | # versions: 164 | # - all 165 | # - any 166 | #- name: opensuse 167 | # versions: 168 | # - all 169 | # - 12.1 170 | # - 12.2 171 | # - 12.3 172 | # - 13.1 173 | # - 13.2 174 | #- name: SLES 175 | # versions: 176 | # - all 177 | # - 10SP3 178 | # - 10SP4 179 | # - 11 180 | # - 11SP1 181 | # - 11SP2 182 | # - 11SP3 183 | # - 11SP4 184 | # - 12 185 | # - 12SP1 186 | #- name: GenericUNIX 187 | # versions: 188 | # - all 189 | # - any 190 | #- name: Solaris 191 | # versions: 192 | # - all 193 | # - 10 194 | # - 11.0 195 | # - 11.1 196 | # - 11.2 197 | # - 11.3 198 | #- name: eos 199 | # versions: 200 | # - all 201 | # - Any 202 | 203 | galaxy_tags: 204 | - database 205 | - clustering 206 | - postgresql 207 | 208 | dependencies: [] 209 | # List your role dependencies here, one per line. 210 | # Be sure to remove the '[]' above if you add dependencies 211 | # to this list. 212 | -------------------------------------------------------------------------------- /tasks/constraints.yml: -------------------------------------------------------------------------------- 1 | # vim: set filetype=yaml expandtab tabstop=2 shiftwidth=2 softtabstop=2 background=dark : 2 | 3 | # location constraints 4 | - name: setting VIP location constraints 5 | shell: pcs constraint location "{{ postgres_ha_cluster_vip_res_name }}" prefers {% for clhost in ansible_play_batch %}{{clhost}}=100 {% endfor %} 6 | when: inventory_hostname == postgres_ha_cluster_master_host # run only on one node 7 | 8 | - name: setting DB location constraints 9 | shell: pcs constraint location "{{ postgres_ha_cluster_pg_HA_res_name }}" prefers {% for clhost in ansible_play_batch %}{{clhost}}=100 {% endfor %} 10 | when: inventory_hostname == postgres_ha_cluster_master_host # run only on one node 11 | 12 | 13 | # colocation constraints 14 | - name: setting resources colocation group 1 15 | shell: pcs constraint colocation add "{{ postgres_ha_cluster_vip_res_name }}" with master "{{ postgres_ha_cluster_pg_HA_res_name }}" INFINITY 16 | when: inventory_hostname == postgres_ha_cluster_master_host # run only on one node 17 | 18 | # start order constraints 19 | - name: setting resources start order 20 | shell: pcs constraint order promote "{{ postgres_ha_cluster_pg_HA_res_name }}" then start "{{ postgres_ha_cluster_vip_res_name }}" symmetrical=false 21 | when: inventory_hostname == postgres_ha_cluster_master_host # run only on one node 22 | 23 | - name: setting resources stop order 24 | shell: pcs constraint order demote "{{ postgres_ha_cluster_pg_HA_res_name }}" then stop "{{ postgres_ha_cluster_vip_res_name }}" symmetrical=false 25 | when: inventory_hostname == postgres_ha_cluster_master_host # run only on one node 26 | 27 | - name: marking constraints as processed 28 | shell: echo "LOCKFILE. Do not delete!" > "/var/lib/pgsql/{{ postgres_ha_pg_version }}/.{{postgres_ha_cluster_name}}_constraints_processed" 29 | args: 30 | creates: "/var/lib/pgsql/{{ postgres_ha_pg_version }}/.{{postgres_ha_cluster_name}}_constraints_processed" 31 | -------------------------------------------------------------------------------- /tasks/finalize.yml: -------------------------------------------------------------------------------- 1 | # vim: set filetype=yaml expandtab tabstop=2 shiftwidth=2 softtabstop=2 background=dark : 2 | 3 | - name: enable database cluster resource 4 | shell: pcs resource enable "{{ postgres_ha_cluster_pg_HA_res_name }}" 5 | when: inventory_hostname == postgres_ha_cluster_master_host # run only on one node 6 | 7 | - name: refresh database cluster resource 8 | shell: pcs resource manage "{{ postgres_ha_cluster_pg_HA_res_name }}" && pcs resource clear "{{ postgres_ha_cluster_pg_HA_res_name }}" && pcs resource refresh "{{ postgres_ha_cluster_pg_HA_res_name }}" 9 | when: inventory_hostname == postgres_ha_cluster_master_host # run only on one node 10 | 11 | - name: check if all slaves are connected 12 | shell: psql -Aqtc "SELECT count(*) FROM pg_stat_replication" 13 | when: inventory_hostname == postgres_ha_cluster_master_host # run only on one node 14 | become: yes 15 | become_user: postgres 16 | register: slavecount 17 | vars: 18 | ansible_ssh_pipelining: no 19 | until: (slavecount.stdout|string) == ((ansible_play_batch|length - 1)|string) 20 | retries: 16 21 | delay: 2 22 | 23 | -------------------------------------------------------------------------------- /tasks/main.yml: -------------------------------------------------------------------------------- 1 | --- 2 | # vim: set filetype=yaml expandtab tabstop=2 shiftwidth=2 softtabstop=2 background=dark : 3 | 4 | - include: pre-tasks.yml 5 | - include: pcs.yml 6 | - include: vip.yml 7 | - include: postgresql_sync.yml 8 | - include: paf.yml 9 | 10 | - name: test constraints presence 11 | stat: path="/var/lib/pgsql/{{ postgres_ha_pg_version }}/.{{postgres_ha_cluster_name}}_constraints_processed" 12 | register: constraints_processed 13 | 14 | - include: constraints.yml 15 | when: not constraints_processed.stat.exists 16 | 17 | - include: finalize.yml 18 | 19 | - include: maintenance.yml 20 | -------------------------------------------------------------------------------- /tasks/maintenance.yml: -------------------------------------------------------------------------------- 1 | # vim: set filetype=yaml expandtab tabstop=2 shiftwidth=2 softtabstop=2 background=dark : 2 | 3 | - name: create cluster maintenance directory 4 | file: 5 | path: "{{ postgres_ha_maint_scripts_path }}" 6 | state: directory 7 | owner: postgres 8 | group: postgres 9 | mode: 0755 10 | 11 | - name: create servers IP list 12 | set_fact: 13 | all_ips: "" 14 | 15 | - name: append servers IP list 16 | set_fact: 17 | all_ips: "{% if all_ips != '' %}{{ all_ips }} {% endif %}{{ hostvars[item].ansible_default_ipv4.address }}" 18 | with_items: "{{ ansible_play_batch }}" 19 | 20 | - name: generate cluster maintenance scripts 21 | template: src="{{ item }}" dest="{{ postgres_ha_maint_scripts_path }}/{{ item | basename | regex_replace('\.j2$', '') }}" 22 | args: 23 | owner: postgres 24 | group: postgres 25 | mode: 0750 26 | with_fileglob: "{{ role_path }}/templates/maint/*" 27 | -------------------------------------------------------------------------------- /tasks/paf.yml: -------------------------------------------------------------------------------- 1 | # vim: set filetype=yaml expandtab tabstop=2 shiftwidth=2 softtabstop=2 background=dark : 2 | 3 | - name: select proper PAF package (centos7) 4 | set_fact: 5 | paf_pkg: 'resource-agents-paf-{{ postgres_ha_paf_version }}-1.noarch.rpm' 6 | when: os_version == 'centos7' 7 | 8 | - name: select proper PAF package (centos6) 9 | set_fact: 10 | paf_pkg: 'resource-agents-paf-1.1.0-1.noarch.rpm' 11 | when: os_version == 'centos6' 12 | 13 | # this rpm is also available on github.. but when multiple servers start to download 14 | # the same file from github simultanneously, github will likely block you 15 | # and the role will fail 16 | # that's why it's embedded in the role 17 | 18 | - name: copy PAF rpm to hosts 19 | copy: src="{{ paf_pkg }}" dest="/tmp/{{ paf_pkg }}" 20 | 21 | - name: install PAF DB failover agent 22 | yum: 23 | name: "/tmp/{{ paf_pkg }}" 24 | state: present 25 | 26 | - name: apply PAF v2.2.0 fix for newest pacemaker 27 | copy: 28 | src: 'pgsqlms-2.2.0-fix-pg10' 29 | dest: '/usr/lib/ocf/resource.d/heartbeat/pgsqlms' 30 | args: 31 | owner: root 32 | group: root 33 | mode: 0555 34 | when: postgres_ha_paf_version == '2.2.0' and 35 | not postgres_ha_paf_geo_patch 36 | 37 | - name: apply geo-HA patches to DB failover agent 38 | copy: 39 | src: 'pgsqlms-{{ postgres_ha_paf_version }}-geo-patched' 40 | dest: '/usr/lib/ocf/resource.d/heartbeat/pgsqlms' 41 | args: 42 | owner: root 43 | group: root 44 | mode: 0555 45 | when: postgres_ha_paf_geo_patch 46 | 47 | - name: prepare DB recovery config 48 | template: src=recovery.conf.pcmk.j2 dest="{{ postgres_ha_pg_data }}/../recovery.conf.{{postgres_ha_cluster_name}}.pcmk" 49 | args: 50 | owner: postgres 51 | group: postgres 52 | mode: 0644 53 | 54 | - name: stop database for clustering 55 | service: name="{{ postgres_ha_pg_systemd_svcname }}" state=stopped enabled=false 56 | 57 | - name: create database cluster resource 58 | when: inventory_hostname == postgres_ha_cluster_master_host # run only on one node 59 | pcs_resource: command=create resource_id="{{ postgres_ha_cluster_pg_res_name }}" type=ocf:heartbeat:pgsqlms 60 | args: 61 | disabled: True 62 | options: 63 | bindir: "{{ postgres_ha_pg_bindir }}" 64 | pgdata: "{{ postgres_ha_pg_data }}" 65 | pgport: "{{ postgres_ha_pg_port }}" 66 | recovery_template: "{{ postgres_ha_pg_data }}/../recovery.conf.{{postgres_ha_cluster_name}}.pcmk" 67 | operations: 68 | - action: start 69 | options: 70 | timeout: 60s 71 | - action: stop 72 | options: 73 | timeout: 60s 74 | - action: promote 75 | options: 76 | timeout: 30s 77 | - action: demote 78 | options: 79 | timeout: 120s 80 | - action: notify 81 | options: 82 | timeout: 60s 83 | - action: monitor 84 | options: 85 | interval: "{{ postgres_ha_monitor_interval_pgmaster }}" 86 | timeout: 10s 87 | role: Master 88 | - action: monitor 89 | options: 90 | interval: "{{ postgres_ha_monitor_interval_pgslave }}" 91 | timeout: 10s 92 | role: Slave 93 | 94 | - name: create master DB resource 95 | when: inventory_hostname == postgres_ha_cluster_master_host # run only on one node 96 | pcs_resource: command=master resource_id="{{ postgres_ha_cluster_pg_HA_res_name }}" ms_name="{{ postgres_ha_cluster_pg_res_name }}" disabled=True 97 | args: 98 | options: 99 | master-max : 1 100 | master-node-max : 1 101 | clone-max : "{{ ansible_play_batch|length }}" 102 | clone-node-max : 1 103 | notify : true 104 | 105 | -------------------------------------------------------------------------------- /tasks/pcs.yml: -------------------------------------------------------------------------------- 1 | # vim: set filetype=yaml expandtab tabstop=2 shiftwidth=2 softtabstop=2 background=dark : 2 | 3 | - debug: msg='cluster_members={{ansible_play_batch}}' 4 | run_once: true 5 | 6 | - name: install cluster pkgs 7 | yum: 8 | name: pcs 9 | state: present 10 | 11 | - name: install additional cluster pkgs for centos 6 12 | yum: 13 | name: '{{ item }}' 14 | state: present 15 | when: os_version == 'centos6' 16 | with_items: 17 | - pacemaker 18 | - libselinux-python 19 | 20 | - name: "Build hosts file" 21 | lineinfile: dest=/etc/hosts regexp='.*{{ item }}$' line="{{ hostvars[item].ansible_default_ipv4.address }} {{ item }}" state=present 22 | when: hostvars[item].ansible_default_ipv4.address is defined 23 | with_items: "{{ ansible_play_batch }}" 24 | 25 | # For each host add hostnames for all postgres_ha_network_rings interfaces to /etc/hosts 26 | # example output: "10.10.90.200 myhost-ring1" 27 | - name: add additional network rings to hosts file 28 | lineinfile: dest=/etc/hosts regexp='.*{{ item[0] }}-{{ item[1] }}$' line="{{ hostvars[item[0]][['ansible_', postgres_ha_network_rings[item[1]]]|join]['ipv4']['address'] }} {{ item[0] }}-{{ item[1] }}" state=present 29 | with_nested: 30 | - "{{ ansible_play_batch }}" 31 | - "{{ postgres_ha_network_rings }}" 32 | when: postgres_ha_network_rings and 33 | hostvars[item[0]][['ansible_', postgres_ha_network_rings[item[1]]]|join] is defined 34 | 35 | # create hostname string for "pcs cluster" command that lists also all ring hostnames (if exist) 36 | # example: "node01,node01-ring1" 37 | # (the lowest element (e.g. "ring0") is omitted in the first name and the actual inventory_hostname is used as a default name) 38 | - name: generate pcs hostname string 39 | set_fact: 40 | pcs_hostname: "{{ inventory_hostname }}{% if postgres_ha_network_rings %}{% for ring in postgres_ha_network_rings|difference(postgres_ha_network_rings|min)|sort %},{{ inventory_hostname }}-{{ ring }}{% endfor %}{% endif %}" 41 | 42 | # output: "--addr0 net.work.ip.addr --addr1 other.net.ip.addr ..." 43 | - name: compute mcast addr settings 44 | set_fact: 45 | pcs_ring_addrs: "{% set ring_num = 0 %}{% if postgres_ha_network_rings %}{% for ring in postgres_ha_network_rings|sort %}--addr{{ ring_num }} {{ hostvars[inventory_hostname][['ansible_', postgres_ha_network_rings[ring]]|join]['ipv4']['network'] }} {% set ring_num = ring_num + 1 %}{% endfor %}{% endif %}" 46 | when: postgres_ha_mcast_enable 47 | 48 | - name: enable GUI if required 49 | lineinfile: dest=/etc/sysconfig/pcsd regexp='^PCSD_DISABLE_GUI=' line="PCSD_DISABLE_GUI={% if postgres_ha_gui_enable %}false{% else %}true{% endif %}" state=present 50 | 51 | - name: service pcsd start 52 | service: name=pcsd state=started enabled=yes 53 | 54 | - name: setup hacluster password 55 | user: 56 | name: hacluster 57 | state: present 58 | update_password: always 59 | password: "{{ postgres_ha_cluster_ha_password_hash }}" 60 | 61 | - name: setup cluster auth 62 | shell: pcs cluster auth {{ ansible_play_batch | join( " ") }} -u hacluster -p "{{ postgres_ha_cluster_ha_password }}" 63 | 64 | # We create cluster in two steps: 65 | # 1. create one-node cluster 66 | # 2. join other cluster nodes (the task below) 67 | # The reason is that we want to support adding new nodes by re-running the role. 68 | - name: create cluster (centos7) 69 | shell: pcs cluster setup --name {{ postgres_ha_cluster_name }} "{{ pcs_hostname }}" {% if postgres_ha_pcs_advanced_params %}{% for param in postgres_ha_pcs_advanced_params|difference(['addr0', 'addr1', 'addr2', 'addr3', 'transport']) %}--{{ param }} {{ postgres_ha_pcs_advanced_params[param] }} {% endfor %}{% endif %} {% if postgres_ha_mcast_enable %}--transport udp {{ pcs_ring_addrs }}{% endif %} 70 | args: 71 | creates: /etc/corosync/corosync.conf 72 | when: os_version == 'centos7' and 73 | inventory_hostname == postgres_ha_cluster_master_host # run only on master node 74 | 75 | # ignore these parameters from postgres_ha_pcs_advanced_params: 'addr0', 'addr1', 'addr2', 'addr3', 'transport' 76 | - name: create cluster (centos6) 77 | shell: pcs cluster setup --name {{ postgres_ha_cluster_name }} {{ pcs_hostname }} {% if postgres_ha_pcs_advanced_params %}{% for param in postgres_ha_pcs_advanced_params|difference(['addr0', 'addr1', 'addr2', 'addr3', 'transport']) %}--{{ param }} {{ postgres_ha_pcs_advanced_params[param] }} {% endfor %}{% endif %} {% if postgres_ha_mcast_enable %}--transport udp {{ pcs_ring_addrs }}{% else %}--transport udpu{% endif %} 78 | args: 79 | creates: /etc/cluster/cluster.conf 80 | when: os_version == 'centos6' and 81 | inventory_hostname == postgres_ha_cluster_master_host # run only on master node 82 | 83 | - name: join cluster nodes (centos7) 84 | shell: /bin/sh -c "if ! grep -q 'ring0_addr[:] *{{ item }}[\t ]*$' /etc/corosync/corosync.conf; then pcs cluster node add {{ hostvars[item]['pcs_hostname'] }}; fi" 85 | with_items: '{{ ansible_play_batch | difference([inventory_hostname]) }}' # all hosts except me 86 | when: os_version == 'centos7' and 87 | inventory_hostname == postgres_ha_cluster_master_host # run only on master node 88 | 89 | - name: join cluster nodes (centos6) 90 | shell: /bin/sh -c "if ! grep -q '= 10 13 | 14 | - name: determine the correct postgres package name (all systems) 15 | set_fact: 16 | pg_pkg_name: 'pgdg-centos{{ postgres_ha_pg_version | replace(".", "") }}-{{ postgres_ha_pg_version }}-{{ pg_pkg_vers_suffix }}.noarch.rpm' 17 | #when: ansible_distribution == 'CentOS' # this is default 18 | 19 | - name: determine the correct postgres package name (RHEL) 20 | set_fact: 21 | pg_pkg_name: 'pgdg-redhat{{ postgres_ha_pg_version | replace(".", "") }}-{{ postgres_ha_pg_version }}-{{ pg_pkg_vers_suffix }}.noarch.rpm' 22 | when: ansible_distribution == 'Red Hat Enterprise Linux' 23 | 24 | - name: 'import pg{{ postgres_ha_pg_version | replace(".", "") }} repo' 25 | yum: 26 | name: "{{ postgres_ha_repo_url }}" 27 | state: installed 28 | when: postgres_ha_import_repo 29 | 30 | - name: 'install pg{{ postgres_ha_pg_version | replace(".", "") }}' 31 | yum: 32 | name: 'postgresql{{ postgres_ha_pg_version | replace(".", "") }}-server, postgresql{{ postgres_ha_pg_version | replace(".", "") }}-contrib, python-psycopg2' 33 | state: installed 34 | 35 | - name: init DB dir on master if necessary (centos 7 and postgresql 9.6 or older) 36 | shell: '{{ postgres_ha_pg_bindir }}/postgresql{{ postgres_ha_pg_version | replace(".", "") }}-setup initdb' 37 | args: 38 | creates: "{{ postgres_ha_pg_data }}/PG_VERSION" 39 | # run only on one node 40 | when: inventory_hostname == postgres_ha_cluster_master_host 41 | and os_version == 'centos7' 42 | and postgres_ha_pg_version |int < 10 43 | 44 | - name: init DB dir on master if necessary (centos 7 and postgresql 10 or above) 45 | shell: '{{ postgres_ha_pg_bindir }}/postgresql-{{ postgres_ha_pg_version | replace(".", "") }}-setup initdb' 46 | args: 47 | creates: "{{ postgres_ha_pg_data }}/PG_VERSION" 48 | # run only on one node 49 | when: inventory_hostname == postgres_ha_cluster_master_host 50 | and os_version == 'centos7' 51 | and postgres_ha_pg_version |int >= 10 52 | 53 | - name: init DB dir on master if necessary (centos 6) 54 | shell: '/etc/init.d/postgresql-{{ postgres_ha_pg_version }} initdb' 55 | args: 56 | creates: "{{ postgres_ha_pg_data }}/PG_VERSION" 57 | # run only on one node 58 | when: inventory_hostname == postgres_ha_cluster_master_host 59 | and os_version == 'centos6' 60 | 61 | # if the file does not exist, DB sync is needed 62 | - name: check if DB was synchronized before 63 | stat: path="{{ postgres_ha_pg_data }}/.synchronized" 64 | register: db_prevsync_file 65 | 66 | #- name: push DB config (clustering) 67 | # template: src=postgresql.conf.j2 dest="{{ postgres_ha_pg_data }}/postgresql.conf" 68 | # when: inventory_hostname == postgres_ha_cluster_master_host # run only on one node 69 | # args: 70 | # owner: postgres 71 | # group: postgres 72 | # mode: 0600 73 | 74 | - name: alter clustering-related settings in postgresql.conf 75 | replace: 76 | dest="{{ postgres_ha_pg_data }}/postgresql.conf" 77 | regexp="^([# ]*{{ item.key }} *=.*$)" 78 | replace="{{ item.key }} = {{ item.value }}" 79 | when: inventory_hostname == postgres_ha_cluster_master_host # run only on master node 80 | with_dict: "{{ postgres_ha_postgresql_conf_vars }}" 81 | notify: restart postgresql 82 | 83 | - meta: flush_handlers 84 | 85 | - name: alter DB ACL in pg_hba.conf 86 | lineinfile: dest="{{ postgres_ha_pg_data }}/pg_hba.conf" 87 | regexp='^host postgres .*{{ hostvars[item].ansible_default_ipv4.address }}/32' 88 | insertbefore='^host' 89 | line='host postgres {{ postgres_ha_pg_repl_user }} {{ hostvars[item].ansible_default_ipv4.address }}/32 md5' 90 | with_items: "{{ ansible_play_batch }}" 91 | when: inventory_hostname == postgres_ha_cluster_master_host or 92 | db_prevsync_file.stat.exists 93 | # run only on master node or on synchronized slave 94 | register: alter_pg_acl 95 | 96 | - name: alter DB replication ACL in pg_hba.conf on master 97 | lineinfile: dest="{{ postgres_ha_pg_data }}/pg_hba.conf" 98 | regexp='^host replication .*{{ hostvars[item].ansible_default_ipv4.address }}/32' 99 | insertbefore='^host' 100 | line='host replication {{ postgres_ha_pg_repl_user }} {{ hostvars[item].ansible_default_ipv4.address }}/32 {% if item == inventory_hostname %}reject{% else %}md5{% endif %}' 101 | with_items: "{{ ansible_play_batch }}" 102 | when: inventory_hostname == postgres_ha_cluster_master_host or 103 | db_prevsync_file.stat.exists 104 | # run only on master node or on synchronized slave 105 | register: alter_repl_acl 106 | 107 | #hostname:port:database:username:password 108 | - name: setup DB cluster auth (master IP) 109 | lineinfile: dest="~postgres/.pgpass" line="{{ postgres_ha_cluster_vip }}:{{ postgres_ha_pg_port }}:replication:postgres:{{ postgres_ha_pg_repl_pass }}" state=present 110 | args: 111 | owner: postgres 112 | group: postgres 113 | mode: 0600 114 | create: yes 115 | 116 | - name: setup .pgpass replication auth for master IP 117 | lineinfile: dest="~postgres/.pgpass" line="{{ postgres_ha_cluster_vip }}:{{ postgres_ha_pg_port }}:replication:{{ postgres_ha_pg_repl_user }}:{{ postgres_ha_pg_repl_pass }}" state=present 118 | args: 119 | owner: postgres 120 | group: postgres 121 | mode: 0600 122 | create: yes 123 | 124 | - name: setup .pgpass replication auth for other IPs 125 | lineinfile: dest="~postgres/.pgpass" line="{{ hostvars[item].ansible_default_ipv4.address }}:{{ postgres_ha_pg_port }}:replication:{{ postgres_ha_pg_repl_user }}:{{ postgres_ha_pg_repl_pass }}" state=present 126 | with_items: "{{ansible_play_batch}}" 127 | args: 128 | owner: postgres 129 | group: postgres 130 | mode: 0600 131 | create: yes 132 | 133 | - name: check if master host "{{ postgres_ha_cluster_master_host }}" is really a DB master 134 | shell: psql -Aqtc "SELECT pg_is_in_recovery()" 135 | when: inventory_hostname == postgres_ha_cluster_master_host # run only on one node 136 | become: yes 137 | become_user: postgres 138 | vars: 139 | ansible_ssh_pipelining: no 140 | register: is_slave 141 | failed_when: is_slave.stdout == 't' 142 | 143 | - name: mark master DB 144 | lineinfile: dest="{{ postgres_ha_pg_data }}/.synchronized" line="DO NOT REMOVE THIS FILE! Otherwise DB sync can go wrong." state=present 145 | args: 146 | owner: postgres 147 | group: postgres 148 | mode: 0600 149 | create: yes 150 | when: inventory_hostname == postgres_ha_cluster_master_host # run only on one node 151 | register: create_syncfile 152 | 153 | - name: check if DB is running (failure is OK) 154 | #shell: /bin/sh -c 'if {{ postgres_ha_pg_bindir }}/pg_ctl -D "{{ postgres_ha_pg_data }}" status &> /dev/null; then echo True; else echo False; fi' 155 | shell: "{{ postgres_ha_pg_bindir }}/pg_ctl -D {{ postgres_ha_pg_data }} status" 156 | register: db_running 157 | become: yes 158 | become_user: postgres 159 | ignore_errors: True 160 | 161 | # check if the DB is already clustered 162 | # If the resource constraint already exists, it means that DB clustering was fully applied before 163 | # and the database should run only from cluster resource. 164 | 165 | - name: check if DB is running in cluster (failure is OK) 166 | #shell: /bin/sh -c 'if pcs resource show "{{ postgres_ha_cluster_pg_HA_res_name }}" &> /dev/null; then echo True; else echo False; fi' 167 | #shell: pcs resource show "{{ postgres_ha_cluster_pg_HA_res_name }}" 168 | shell: pcs constraint location show resources "{{ postgres_ha_cluster_pg_HA_res_name }}" | grep -q Enabled 169 | register: db_resource_exists 170 | ignore_errors: True 171 | 172 | - name: reload DB ACLs 173 | shell: "{{ postgres_ha_pg_bindir }}/pg_ctl -D {{ postgres_ha_pg_data }} reload" 174 | become: yes 175 | become_user: postgres 176 | vars: 177 | ansible_ssh_pipelining: no 178 | when: (alter_pg_acl.changed or alter_repl_acl.changed) and db_running is succeeded 179 | 180 | #- name: start master DB if necessary (without cluster) 181 | # service: name="{{ postgres_ha_pg_systemd_svcname }}" state=started enabled=yes 182 | # when: (inventory_hostname == postgres_ha_cluster_master_host) and 183 | # (db_resource_exists is failed) and 184 | # (db_running is failed) 185 | 186 | # if DB is clustered and is not running, we have a problem.. try at least cleaning the resource status 187 | - name: start master DB if necessary (in cluster) 188 | shell: pcs resource cleanup "{{ postgres_ha_cluster_pg_HA_res_name }}" && sleep 15 189 | when: (inventory_hostname == postgres_ha_cluster_master_host) and 190 | (db_resource_exists is succeeded) and 191 | (db_running is failed) 192 | 193 | - name: setup DB replication auth 194 | postgresql_user: 195 | name: "{{ postgres_ha_pg_repl_user }}" 196 | state: present 197 | port: "{{ postgres_ha_pg_port }}" 198 | password: "{{ postgres_ha_pg_repl_pass }}" 199 | role_attr_flags: SUPERUSER,LOGIN 200 | become: yes 201 | become_user: postgres 202 | vars: 203 | ansible_ssh_pipelining: no 204 | when: inventory_hostname == postgres_ha_cluster_master_host # run only on one node 205 | 206 | #- name: re-check DB datadir 207 | # find: paths="{{ postgres_ha_pg_data }}" 208 | # register: datadir_files 209 | 210 | # if the file does not exist, DB sync is needed 211 | - name: check if DB sync is required 212 | stat: path="{{ postgres_ha_pg_data }}/.synchronized" 213 | register: db_sync_file 214 | 215 | - name: stop slave DB 216 | service: name="{{ postgres_ha_pg_systemd_svcname }}" state=stopped enabled=false 217 | when: (not db_sync_file.stat.exists) and 218 | (db_resource_exists is failed) and 219 | (db_running is succeeded) 220 | 221 | - name: remove slave DB datadir before sync 222 | file: state=absent path="{{ postgres_ha_pg_data }}" 223 | #shell: rm -rf "{{ postgres_ha_pg_data }}" 224 | #args: 225 | # creates: "{{ postgres_ha_pg_data }}/.synchronized" # will not delete if the file exists (means that DB is already synced) 226 | when: not db_sync_file.stat.exists 227 | register: delete_datadir 228 | 229 | # replicate slave node (assumes firewall is already setup) 230 | # run only on slave nodes and only when postgres datadir is empty 231 | - name: synchronize slave databases 232 | shell: '{{ postgres_ha_pg_bindir }}/pg_basebackup -h "{{ hostvars[postgres_ha_cluster_master_host].ansible_default_ipv4.address }}" -p {{ postgres_ha_pg_port }} -R -D "{{ postgres_ha_pg_data }}" -U "{{ postgres_ha_pg_repl_user }}" -v -P -X stream' 233 | become: yes 234 | become_user: postgres 235 | vars: 236 | ansible_ssh_pipelining: no 237 | when: delete_datadir.changed or not db_sync_file.stat.exists 238 | #when: (inventory_hostname != postgres_ha_cluster_master_host) and 239 | # (datadir_files.matched|int == 0) 240 | register: slave_resync 241 | 242 | - name: forbid self-replication in pg_hba.conf on slaves 243 | lineinfile: dest="{{ postgres_ha_pg_data }}/pg_hba.conf" 244 | regexp='^host replication .*{{ hostvars[item].ansible_default_ipv4.address }}/32' 245 | insertbefore='^host' 246 | line='host replication {{ postgres_ha_pg_repl_user }} {{ hostvars[item].ansible_default_ipv4.address }}/32 {% if item == inventory_hostname %}reject{% else %}md5{% endif %}' 247 | with_items: "{{ ansible_play_batch }}" 248 | # run only on freshly synchronized slave 249 | when: inventory_hostname != postgres_ha_cluster_master_host and slave_resync.changed 250 | 251 | # Pause ansible execution to manually correct the postgres sync if necessary 252 | #- name: Exterminate mankind 253 | # pause: prompt='Please confirm you want to exterminate mankind! Press return to continue. Press Ctrl+c and then "a" to abort' 254 | 255 | - name: start slave DBs 256 | service: name="{{ postgres_ha_pg_systemd_svcname }}" state=started enabled=yes 257 | when: (inventory_hostname != postgres_ha_cluster_master_host) and 258 | (db_resource_exists is failed) #and delete_datadir.changed 259 | 260 | - name: check if slaves are connected 261 | shell: psql -Aqtc "SELECT count(*) FROM pg_stat_replication" 262 | when: inventory_hostname == postgres_ha_cluster_master_host # run only on one node 263 | become: yes 264 | become_user: postgres 265 | register: slavecount 266 | vars: 267 | ansible_ssh_pipelining: no 268 | until: (slavecount.stdout|string) == ((ansible_play_batch|length - 1)|string) 269 | retries: 15 270 | delay: 2 271 | -------------------------------------------------------------------------------- /tasks/pre-tasks.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - debug: msg='MASTER NODE SET TO {{ postgres_ha_cluster_master_host }}' 3 | run_once: true 4 | 5 | - name: verify postgres_ha_cluster_master_host 6 | fail: 7 | msg: "CRITICAL: defined master host ({{ postgres_ha_cluster_master_host }}) is not in host list ({{ ansible_play_batch }})" 8 | when: postgres_ha_cluster_master_host not in ansible_play_batch 9 | 10 | # Set default os_version. 11 | # Without os_version set, the role will fail. 12 | # This is a preparation for multi-OS. 13 | - name: identify the OS (default) 14 | set_fact: 15 | os_version: 'centos7' 16 | 17 | - name: identify the OS (centos) 18 | set_fact: 19 | os_version: 'centos{{ ansible_distribution_major_version }}' 20 | when: ansible_distribution == 'CentOS' or ansible_distribution == 'Red Hat Enterprise Linux' 21 | 22 | - debug: msg='The OS version is set to {{ os_version }}' 23 | run_once: true 24 | 25 | - name: verify PG cluster monitor intervals 26 | fail: 27 | msg: 'Paremeters "postgres_ha_monitor_interval_pgmaster" and "postgres_ha_monitor_interval_pgslave" cannot have the same value!' 28 | when: postgres_ha_monitor_interval_pgmaster == postgres_ha_monitor_interval_pgslave 29 | 30 | - name: verify specific RRP setting for Centos 6 31 | fail: 32 | msg: 'Centos 6 requires at least 2 ring interfaces for RRP. Add one more or remove the current interface in postgres_ha_network_rings config parameter.' 33 | when: os_version == 'centos6' and postgres_ha_mcast_enable and postgres_ha_network_rings and (postgres_ha_network_rings|length == 1) 34 | 35 | # not necessary, cluster automatically selects the default interface 36 | #- name: set default network ring 37 | # set_fact: 38 | # network_rings: '{% if postgres_ha_network_rings %}{{ postgres_ha_network_rings }}{% else %}{{ { "ring0": ansible_default_ipv4.alias } }}{% endif %}' 39 | 40 | -------------------------------------------------------------------------------- /tasks/vip.yml: -------------------------------------------------------------------------------- 1 | # vim: set filetype=yaml expandtab tabstop=2 shiftwidth=2 softtabstop=2 background=dark : 2 | 3 | - name: create virtual IP resource 4 | when: inventory_hostname == postgres_ha_cluster_master_host # run only on one node 5 | pcs_resource: command=create resource_id="{{ postgres_ha_cluster_vip_res_name }}" type=ocf:heartbeat:IPaddr2 6 | args: 7 | options: 8 | ip: "{{ postgres_ha_cluster_vip }}" 9 | cidr_netmask: "{{ postgres_ha_cluster_vip_mask }}" 10 | operations: 11 | - action: monitor 12 | options: 13 | interval: 10s 14 | 15 | 16 | -------------------------------------------------------------------------------- /templates/maint/clone_clusterdb_from_master.sh.j2: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | if [ $(whoami) != "postgres" ]; then 4 | echo "Please run this script as user postgres!" 5 | exit 1 6 | fi 7 | 8 | CLUSTER_NAME="{{ postgres_ha_cluster_name }}" 9 | DBMASTER_IP="{{ postgres_ha_cluster_vip }}" 10 | DBPORT="{{ postgres_ha_pg_port }}" 11 | PGDATA="{{ postgres_ha_pg_data }}" 12 | PGBINDIR="{{ postgres_ha_pg_bindir }}" 13 | DBVERSION="{{ postgres_ha_pg_version }}" 14 | REPL_USER="{{ postgres_ha_pg_repl_user }}" 15 | CLUSTER_RES_NAME="{{ postgres_ha_cluster_pg_HA_res_name }}" 16 | RECOVERY_TEMPLATE="$(dirname "${PGDATA}")/recovery.conf.${CLUSTER_NAME}.pcmk" 17 | RECOVERY_DEST="${PGDATA}/recovery.conf" 18 | 19 | # usage: if check_reply_yes "Some question?"; then echo yes; else echo no; fi 20 | function check_reply_yes() 21 | { 22 | echo -n "*** $1 (Y/n)" 23 | read reply 24 | if [[ -z "$reply" ]] || [[ $reply == "y" ]] || [[ $reply == "Y" ]] 25 | then 26 | return 0 27 | else 28 | return 1 29 | fi 30 | } 31 | 32 | function check_retval() 33 | { 34 | retval=$? 35 | if [ $retval -ne 0 ] 36 | then 37 | echo 38 | echo "***** Error in module $1 *****" 39 | echo 40 | exit $retval 41 | fi 42 | } 43 | 44 | 45 | ####################################################################################### 46 | # PROGRAM START 47 | ####################################################################################### 48 | 49 | PGDATA=${PGDATA%%+(/)} # remove trailing slash 50 | cat << EOF 51 | **************************************************************************************** 52 | This script will sync postgresql datadir from master. It can also be used to convert 53 | a failed master node to slave and reconnect it to the cluster. 54 | Please make sure that postgres database is not running on this node! 55 | Cluster command to shut down database locally (for 60 minutes): 56 | pcs resource ban $CLUSTER_RES_NAME {{ inventory_hostname }} lifetime=PT60M 57 | And to bring it up before lifetime expiration: 58 | pcs resource clear $CLUSTER_RES_NAME {{ inventory_hostname }} 59 | 60 | Cluster parameters: 61 | CLUSTER NAME: $CLUSTER_NAME 62 | DB MASTER IP: $DBMASTER_IP 63 | DB PORT: $DBPORT 64 | PG DATADIR: $PGDATA 65 | 66 | EOF 67 | 68 | # check if the master IP is on this host (makes no sense to sync from myself) 69 | if /usr/sbin/ip addr show | grep -q "[ \t]$DBMASTER_IP/"; then 70 | echo "THIS IS THE MASTER NODE!!!" 71 | echo "Aborting action." 72 | exit 111 73 | fi 74 | 75 | # check if master DB is running 76 | if ! ${PGBINDIR}/pg_isready -qh "$DBMASTER_IP" -p "$DBPORT" -U "$REPL_USER"; then 77 | echo "The master database is not responding!" 78 | echo "Aborting action." 79 | exit 112 80 | fi 81 | 82 | # checks succeeded, we are ready to run the sync 83 | if ! check_reply_yes "Continue?"; then 84 | echo 85 | echo "Exiting without changes.." 86 | exit 0 87 | fi 88 | echo "****************************************************************************************" 89 | echo 90 | 91 | 92 | if ps auxww | grep -v grep | grep -Eq -- "-D +${PGDATA}( |$)"; then 93 | echo "**************************************************" 94 | echo "Postgresql server is still running. Stop it first!" 95 | echo "**************************************************" 96 | exit 1 97 | fi 98 | if ${PGBINDIR}/pg_ctl -D "$PGDATA" status > /dev/null; then 99 | echo "***********************************" 100 | echo "Postgresql server is still running!" 101 | echo "***********************************" 102 | 103 | echo ${PGBINDIR}/pg_ctl -D "$PGDATA" status 104 | ${PGBINDIR}/pg_ctl -D "$PGDATA" status 105 | echo 106 | echo Please stop the database and run this script again. 107 | exit 1 108 | fi 109 | 110 | OLD_PGDATA="${PGDATA}.backup-$(date +%Y%m%d%H%M)" 111 | if [ -d ${PGDATA} ]; then 112 | if ! check_reply_yes "Remove old datadir? (n = rename old datadir)"; then 113 | echo "Moving old datadir.." 114 | echo mv ${PGDATA} ${OLD_PGDATA} 115 | mv ${PGDATA} ${OLD_PGDATA} 116 | check_retval rename_old_dir 117 | else 118 | echo "Removing old datadir.." 119 | echo rm -rf ${PGDATA} 120 | rm -rf "${PGDATA}" 121 | check_retval delete_old_dir 122 | fi 123 | else 124 | echo "Old datadir ${PGDATA} does not exist. Continuing without backing it up." 125 | echo 126 | fi 127 | echo 128 | echo "Cloning the DB:" 129 | echo pg_basebackup -h $DBMASTER_IP -D "$PGDATA" -X stream -P -p $DBPORT -U $REPL_USER 130 | pg_basebackup -h $DBMASTER_IP -D "$PGDATA" -X stream -P -p $DBPORT -U $REPL_USER 131 | check_retval clone_DB 132 | echo 133 | echo Writing recovery.conf 134 | echo cp -f "${RECOVERY_TEMPLATE}" "${RECOVERY_DEST}" 135 | 136 | cp -f "${RECOVERY_TEMPLATE}" "${RECOVERY_DEST}" 137 | 138 | echo Altering pg_hba.conf to forbid self-replication 139 | # permit all hosts first 140 | sed -ri'' -e 's,^(host replication .*({{ all_ips.split(" ") | join("|") }})/32 *)(reject|md5),\1md5,g' "${PGDATA}/pg_hba.conf" 141 | # deny myself 142 | sed -ri'' -e 's,^(host replication .*({{ hostvars[inventory_hostname].ansible_default_ipv4.address }})/32 *)md5,\1reject,g' "${PGDATA}/pg_hba.conf" 143 | 144 | cat << EOF 145 | Slave database cloned successfully. 146 | 147 | You can start it now: 148 | pcs resource clear $CLUSTER_RES_NAME {{ inventory_hostname }} 149 | 150 | Or (in case of emergency) manually by command: 151 | ${PGBINDIR}/pg_ctl -D "$PGDATA" start 152 | 153 | Also check logs: 154 | tail -f ${PGDATA}/pg_log/\$(cd ${PGDATA}/pg_log; ls -1 | tail -1) 155 | or 156 | journalctl --follow 157 | 158 | EOF 159 | 160 | if [ -d ${OLD_PGDATA} ]; then 161 | echo "Old datadir can be safely removed:" 162 | echo "rm -rf ${OLD_PGDATA}" 163 | echo 164 | fi 165 | echo May the Force be with you.. 166 | echo 167 | exit 0 168 | -------------------------------------------------------------------------------- /templates/pg_hba.conf.j2: -------------------------------------------------------------------------------- 1 | # TYPE DATABASE USER ADDRESS METHOD 2 | 3 | # Default: 4 | #####!!!!!######local all postgres ident map=superuser 5 | local all postgres trust 6 | # "local" is for Unix domain socket connections only 7 | local all all md5 8 | # replication ACLs 9 | {% for node in ansible_play_batch %} 10 | host replication {{ postgres_ha_pg_repl_user }} {{ hostvars[node].ansible_default_ipv4.address }}/32 md5 11 | host postgres {{ postgres_ha_pg_repl_user }} {{ hostvars[node].ansible_default_ipv4.address }}/32 md5 12 | {% endfor %} 13 | # IPv4 local connections 14 | host all all 127.0.0.1/32 md5 15 | # IPv6 local connections 16 | host all all ::1/128 md5 17 | -------------------------------------------------------------------------------- /templates/recovery.conf.pcmk.j2: -------------------------------------------------------------------------------- 1 | standby_mode = on 2 | primary_conninfo = 'port={{ postgres_ha_pg_port }} host={{ postgres_ha_cluster_vip }} user={{ postgres_ha_pg_repl_user }} application_name={{ inventory_hostname }}' 3 | recovery_target_timeline = 'latest' 4 | -------------------------------------------------------------------------------- /tests/inventory: -------------------------------------------------------------------------------- 1 | localhost -------------------------------------------------------------------------------- /tests/test.yml: -------------------------------------------------------------------------------- 1 | --- 2 | - hosts: localhost 3 | remote_user: root 4 | roles: 5 | - postgresql-ha -------------------------------------------------------------------------------- /vars/main.yml: -------------------------------------------------------------------------------- 1 | --- 2 | # vars file for postgresql-ha 3 | --------------------------------------------------------------------------------