├── .gitignore
├── .travis.yml
├── LICENSE
├── README.md
├── defaults
    └── main.yml
├── files
    ├── pgsqlms-2.2.0-fix-pg10
    ├── pgsqlms-2.2.0-geo-patched
    ├── pgsqlms-2.2.1-geo-patched
    ├── resource-agents-paf-1.1.0-1.noarch.rpm
    ├── resource-agents-paf-2.2.0-1.noarch.rpm
    └── resource-agents-paf-2.2.1-1.noarch.rpm
├── handlers
    └── main.yml
├── library
    ├── pcs_property
    └── pcs_resource
├── meta
    └── main.yml
├── tasks
    ├── constraints.yml
    ├── finalize.yml
    ├── main.yml
    ├── maintenance.yml
    ├── paf.yml
    ├── pcs.yml
    ├── postgresql_sync.yml
    ├── pre-tasks.yml
    └── vip.yml
├── templates
    ├── maint
    │   └── clone_clusterdb_from_master.sh.j2
    ├── pg_hba.conf.j2
    └── recovery.conf.pcmk.j2
├── tests
    ├── inventory
    └── test.yml
└── vars
    └── main.yml


/.gitignore:
--------------------------------------------------------------------------------
1 | *.swp
2 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | language: python
 3 | python: "2.7"
 4 | 
 5 | # Use the new container infrastructure
 6 | sudo: false
 7 | 
 8 | # Install ansible
 9 | addons:
10 |   apt:
11 |     packages:
12 |     - python-pip
13 | 
14 | install:
15 |   # Install ansible
16 |   - pip install ansible
17 | 
18 |   # Check ansible version
19 |   - ansible --version
20 | 
21 |   # Create ansible.cfg with correct roles_path
22 |   - printf '[defaults]\nroles_path=../' >ansible.cfg
23 | 
24 | script:
25 |   # Basic role syntax check
26 |   - ansible-playbook tests/test.yml -i tests/inventory --syntax-check
27 | 
28 | notifications:
29 |   webhooks: https://galaxy.ansible.com/api/v1/notifications/


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "{}"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright {yyyy} {name of copyright owner}
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | postgres-ha
  2 | ===========
  3 | 
  4 | With this role, you will transform your standalone postgresql server to N-node postgres cluster with automated failover. You only need one working postgresql server and other hosts with clean CentOS 7 or CentOS 6 minimal install.
  5 | 
  6 | Alternatively, this role can create a database cluster for you from scratch. If no postgres database is detected, it will be created.
  7 | 
  8 | What it will do:
  9 | - install the cluster software stack (pcs, corosync, pacemaker)
 10 | - add IPs of cluster hosts to /etc/hosts files
 11 | - create a pcs cluster from all play hosts
 12 | - install database binaries if needed
 13 | - init master database if needed
 14 | - alter postgresql configuration if needed
 15 | - sync slave databases from master host
 16 | - make sure the DB replication is working
 17 | - create cluster resources for database, floating IP and constraints
 18 | - check again that everything is working as expected
 19 | 
 20 | Automated failover is setup using PAF pacemaker module: https://github.com/dalibo/PAF
 21 | 
 22 | What you should know
 23 | --------------------
 24 | 
 25 | - The role is idempotent. I've made many checks to allow running it multiple times without breaking things. You can run it again safely even if the role fails. The only thing you need to check before the run is the `postgres_ha_cluster_master_host` variable. But don't worry, if the specified host is not the master database, the role will fail gracefully without disrupting things.
 26 | 
 27 | - During the run, the role will alter your postgresql.conf and pg_hba.conf to enable replication. You can review the changes to postgresql.conf in [defaults/main.yml](defaults/main.yml) (`postgres_ha_postgresql_conf_vars` variable). In pg_hba.conf, the host ACL statements will be added for every cluster node. They will be added before all previously existing host ACL statements.
 28 | 
 29 | - The postgres replication is asynchronnous by default. If you want synchronnous replication, alter the `postgres_ha_postgresql_conf_vars` variable by adding `synchronous_standby_names` parameter. Please see postgresql manual for more info. Also note that if the last synchronnous replica disconnects from master, the master database will stop serving requests.
 30 | 
 31 | - You should have at least a basic understanding of clustering and how to work with `pcs` command. If the role fails for some reason, it is relatively easy to recover from it.. if you understand what logs are trying to say and/or how to run appropriate recovery actions. See cleanup section for more info.
 32 | 
 33 | - You need to alter firewall settings before running this role. The cluster members need to communicate among each other to form a cluster and to replicate postgres DB. I recommend adding some firewall role before the postgres-ha role.
 34 | 
 35 | - If the master datadir is empty on the first run, the role will init an empty datadir. Slave nodes will then download this empty database. If the datadir is not empty, the initdb will be skipped. This means that you can run this role on clean CentOS installs that don't have any postgresql database installed. The result will be fully working empty database cluster.
 36 | 
 37 | - On the first run, the datadirs on slave nodes will be deleted without prompt. Please make sure you specify the correct `postgres_ha_cluster_master_host` at least for this first run (slave datadirs will NEVER be deleted after first initial sync is done).
 38 | 
 39 | - If you plan to apply the role to higher number of servers (7+) please be aware that the servers are downloading rpms packages simultaneously. This can be identified as DDoS and some repository providers may refuse your downloads. As a result, the role will fail. I recommend setting up your own repository mirror in such cases.
 40 | 
 41 | - Please don't change the cluster resource name parameters after the role has been applied. In next run, it will result in trying to create the new colliding resources.
 42 | 
 43 | - Fencing is not configured by this role. If you need one, you have to configure it manually after running the role.
 44 | 
 45 | Requirements
 46 | ------------
 47 | 
 48 | This role works on CentOS 6 and 7. RHEL was not tested but should work without problem. If you need support for other distribution, I can help. Post an issue.
 49 | 
 50 | The postgresql binaries on your primary server should be installed from the official repository:
 51 | 
 52 | https://yum.postgresql.org/repopackages.php
 53 | 
 54 | Note: If you have binaries from other repo, you need to modify the `postgres_ha_repo_url` variable to change the postgres repository source and maybe also bindir and datadir paths in other role variables. If you need to change the installed package name(s), you need to directly modify `install pg*` task in `tasks/postgresql_sync.yml` file.
 55 | 
 56 | Role Variables
 57 | --------------
 58 | 
 59 | For all variables with description see [defaults/main.yml](defaults/main.yml)
 60 | 
 61 | Variables that must be changed:
 62 | - `postgres_ha_cluster_master_host`        -    the master database host (WARNING: please make sure you fill this correctly, otherwise you may lose data!)
 63 | - `postgres_ha_cluster_vip`                -    a floating IP address that travels with master database
 64 | - `postgres_ha_pg_repl_pass`               -    password for replicating postgresql data
 65 | - `postgres_ha_cluster_ha_password`        -    password for cluster config replication
 66 | - `postgres_ha_cluster_ha_password_hash`   -    password hash of postgres_ha_cluster_ha_password
 67 | 
 68 | Password hash can be generated for example by this command:
 69 | 
 70 | `python -c 'import crypt; print(crypt.crypt("my_cluster_ha_password", crypt.mksalt(crypt.METHOD_SHA512)))'`
 71 | 
 72 | Dependencies
 73 | ------------
 74 | 
 75 | No other roles are required as a dependency. However you can combine this role with some other role that installs a postgresql database.
 76 | 
 77 | Example Playbook
 78 | ----------------
 79 | 
 80 | The usage is relatively simple - install minimal CentOS-es, set the variables and run the role.
 81 | 
 82 | Two settings are required:
 83 | - `gather_facts=True`        - we need to know the IP addresses of cluster nodes
 84 | - `any_errors_fatal=True`    - it ensures that error on any node will result in stopping the whole ansible run. Because it doesn't make sense to continue when you lose some of your cluster nodes during transit.
 85 | 
 86 | ```
 87 |     - name: install PG HA
 88 |       hosts: db?
 89 |       gather_facts: True
 90 |       any_errors_fatal: True
 91 |       vars:
 92 |         postgres_ha_cluster_master_host: db1
 93 |         postgres_ha_cluster_vip: 10.10.10.10
 94 |         postgres_ha_pg_repl_pass: MySuperSecretDBPass
 95 |         postgres_ha_cluster_ha_password: AnotherSuperSecretPass1234
 96 |         postgres_ha_cluster_ha_password_hash: '$6$mHeZ7/LD1y.........7VJYu.'
 97 |       pre_tasks:
 98 |         - name: disable firewall
 99 |           service: name=firewalld state=stopped enabled=no
100 |       roles:
101 |          - postgres-ha
102 | ```
103 | 
104 | Cleanup after failure
105 | ---------------------
106 | 
107 | If the role fails repeatedly and you want to run it fresh as if it was the first time, you need to clean up some things.
108 | Please note that default resource names are used here. If you change them using variables, you need to change it also in these commands.
109 | 
110 | - RUN ON ANY NODE:
111 | ```
112 | pcs resource delete pg-vip
113 | pcs resource delete postgres
114 | #pcs resource delete postgres-ha   # probably not needed
115 | #pcs resource cleanup postgres     # probably not needed
116 | 
117 | # Make sure no (related) cluster resources are defined.
118 | ```
119 | - RUN ON ALL SLAVE NODES:
120 | ```
121 | systemctl stop postgresql-9.6
122 | # Make sure no postgres db is running.
123 | systemctl status postgresql-9.6
124 | ps aux | grep postgres
125 | rm -rf /var/lib/pgsql/9.6/data
126 | rm -f /var/lib/pgsql/9.6/recovery.conf.pgcluster.pcmk
127 | rm -f /var/lib/pgsql/9.6/.*_constraints_processed   # name generated from postgres_ha_cluster_pg_res_name
128 | ```
129 | - RUN ONLY ON MASTER NODE:
130 | ```
131 | systemctl stop postgresql-9.6
132 | rm -f /var/lib/pgsql/9.6/recovery.conf.pgcluster.pcmk
133 | rm -f /var/lib/pgsql/9.6/.*_constraints_processed
134 | rm -f /var/lib/pgsql/9.6/data/recovery.conf
135 | rm -f /var/lib/pgsql/9.6/data/.synchronized
136 | # Make sure no postgres db is running.
137 | ps aux | grep postgres
138 | systemctl start postgresql-9.6
139 | systemctl status postgresql-9.6
140 | # Check postgres db functionality.
141 | ```
142 | - START AGAIN
143 | ```
144 | # Check variables & defaults and run ansible role again.
145 | ```
146 | 
147 | 
148 | License
149 | -------
150 | 
151 | BSD
152 | 
153 | Author Information
154 | ------------------
155 | 
156 | Created by YanChi.
157 | 
158 | Originally part of the Danube Cloud project (https://github.com/erigones/esdc-ce).
159 | 
160 | 


--------------------------------------------------------------------------------
/defaults/main.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | # cluster config
 3 | postgres_ha_cluster_master_host: "{{ ansible_play_batch[0] }}"     # sync all DB slaves from this master node (set to first node if not defined)
 4 | postgres_ha_cluster_name: 'pgcluster'                      # name of the pcs cluster
 5 | postgres_ha_cluster_vip:  10.20.30.40                      # floating IP that will be used to connect to clustered DB (always follows the master)
 6 | postgres_ha_cluster_vip_mask: 24                           # floating IP netmask
 7 | postgres_ha_cluster_vip_res_name:  'pg-vip'                # resource name of the floating IP
 8 | postgres_ha_cluster_pg_res_name: 'postgres'                # slave DB cluster resource name
 9 | postgres_ha_cluster_pg_HA_res_name: 'postgres-ha'          # master DB cluster resource name (this name is used to manage postgres resource)
10 | postgres_ha_pg_port:  5432
11 | 
12 | postgres_ha_monitor_interval_pgmaster: '15s'               # frequency of checking if master PG instance is alive
13 | postgres_ha_monitor_interval_pgslave: '16s'                # frequency of checking if slave PG instance is alive
14 | 
15 | # auth config
16 | postgres_ha_pg_repl_user: replicator                       # database user used for replication
17 | postgres_ha_pg_repl_pass: rybKath3KeckGov1
18 | postgres_ha_cluster_ha_password: 'fropFav7epAbOch2'        # password for joining the pcs cluster
19 | postgres_ha_cluster_ha_password_hash: '$6$MHAki4YS$Nk7O3FEC2G.INznoSUj4ByFgdwFJ8mcI9.Ks3XAoLLe9f9GB36G8hZe9o8ygDySJwvnLVCn0LGPzcOapK42/A/'  # fropFav7epAbOch2
20 | 
21 | # postgres config
22 | postgres_ha_pg_version: 9.6
23 | postgres_ha_import_repo: true                               # Enable download of postgresql repo before install
24 | postgres_ha_repo_url: 'https://download.postgresql.org/pub/repos/yum/{{ postgres_ha_pg_version }}/redhat/rhel-7-x86_64/{{ pg_pkg_name }}'
25 | postgres_ha_pg_systemd_svcname: "postgresql-{{ postgres_ha_pg_version }}"   # the name of the original posgres DB resource in systemd
26 | postgres_ha_pg_data: "/var/lib/pgsql/{{ postgres_ha_pg_version }}/data"     # where can I find PG datadir?
27 | postgres_ha_pg_bindir: "/usr/pgsql-{{ postgres_ha_pg_version }}/bin"        # where are the PG binaries?
28 | 
29 | postgres_ha_postgresql_conf_vars:                          # When altering this variable, please copy all postgresql.conf items specified here \
30 |     listen_addresses: "'*'"                                     # because this whole variable will be overloaded by your new definition. \
31 |     max_wal_senders: "{{ ansible_play_batch|length * 2 }}"      # You can change also other postgresql.conf variables here, it will work.
32 |     max_replication_slots: "{{ ansible_play_batch|length * 2 }}"
33 |     wal_level: "hot_standby"
34 |     hot_standby: "on"
35 |     wal_log_hints: "on"
36 | 
37 | postgres_ha_maint_scripts_path: /var/lib/pgsql/pg-maint    # location where to create user scripts for database administration
38 | 
39 | # PAF vars
40 | postgres_ha_paf_version: 2.2.1
41 | postgres_ha_paf_geo_patch: False                           # Apply a patch to PAF to better handle network splits in geographically split clusters.
42 |                                                                 # This patch is from creator of this ansible role and is not official.
43 |                                                                 # It allows having a postgresql master-slave cluster without the need of fencing configured
44 |                                                                 # (which is hard to have in geographically split clusters).
45 |                                                                 # It adds additional safety mechanism of tracking the highest seen database timeline.
46 |                                                                 # See diff here: https://github.com/YanChii/PAF/compare/master...YanChii:master-geo-ha?expand=1
47 | 
48 | postgres_ha_network_rings:                                 # Use multiple networks for cluster interconnection. Defaults to primary NIC.
49 | #    ring0: "eth0"                                         # Primary network. The "lowest" string (ASCII compare) will be selected as the primary network ring.
50 | #    ring1: "eth1"                                         # Additional network. You can use any names instead of "ring*". These will be appended to a hostname.
51 | 
52 | postgres_ha_mcast_enable: False                            # Enable cluster communication using multicast (sets transport=udp, rrpmode=passive). \
53 |                                                                 # For mcast to work, you need to set postgres_ha_network_rings. \
54 |                                                                 # Centos6: It is recommended to enable multicast. You need at least 2 postgres_ha_network_rings.
55 | 
56 | postgres_ha_pcs_advanced_params:                           # Additional parameters for "pcs cluster setup" command.
57 | #    token: 1000
58 | #    mcast0: '239.255.1.1'                                 # these are the default mcast addresses for the respective cluster network rings
59 | #    mcast1: '255.239.2.1'
60 | #    mcastport0: 5405
61 | #    mcastport1: 5405
62 | #Ignored params: 'addr0', 'addr1', 'addr2', 'addr3', 'transport' (they are set by postgres_ha_mcast_enable)
63 | 
64 | postgres_ha_gui_enable: False                              # enable GUI on all hosts (one host is sufficient for GUI to work \
65 |                                                                 # but when it goes down, you loose the GUI) on https port 2224
66 | 


--------------------------------------------------------------------------------
/files/pgsqlms-2.2.0-fix-pg10:
--------------------------------------------------------------------------------
   1 | #!/usr/bin/perl
   2 | # This program is open source, licensed under the PostgreSQL License.
   3 | # For license terms, see the LICENSE file.
   4 | #
   5 | # Copyright (C) 2016-2017: Jehan-Guillaume de Rorthais and Mael Rimbault
   6 | 
   7 | =head1 NAME
   8 | 
   9 | ocf_heartbeat_pgsqlms - A PostgreSQL multi-state resource agent for Pacemaker
  10 | 
  11 | =head1 SYNOPSIS
  12 | 
  13 | B<pgsqlms> [start | stop | monitor | promote | demote | notify | reload | methods | meta-data | validate-all]
  14 | 
  15 | =head1 DESCRIPTION
  16 | 
  17 | Resource script for PostgreSQL in replication. It manages PostgreSQL servers using streaming replication as an HA resource.
  18 | 
  19 | =cut
  20 | 
  21 | use strict;
  22 | use warnings;
  23 | use 5.008;
  24 | 
  25 | use POSIX qw(locale_h);
  26 | use Scalar::Util qw(looks_like_number);
  27 | use File::Spec;
  28 | use File::Temp;
  29 | use Data::Dumper;
  30 | 
  31 | use FindBin;
  32 | use lib "$FindBin::RealBin/../lib/";
  33 | use lib "$FindBin::RealBin/../../lib/heartbeat/";
  34 | 
  35 | use OCF_ReturnCodes;
  36 | use OCF_Directories;
  37 | use OCF_Functions;
  38 | 
  39 | our $VERSION = 'v2.2.0';
  40 | our $PROGRAM = 'pgsqlms';
  41 | 
  42 | # OCF environment
  43 | my $OCF_RESOURCE_INSTANCE = $ENV{'OCF_RESOURCE_INSTANCE'};
  44 | my $OCF_ACTION            = $ARGV[0];
  45 | my $OCF_RUNNING_SLAVE     = $OCF_SUCCESS;
  46 | my %OCF_NOTIFY_ENV        = ocf_notify_env() if $OCF_ACTION eq 'notify';
  47 | 
  48 | # Default parameters values
  49 | my $system_user_default = "postgres";
  50 | my $bindir_default      = "/usr/bin";
  51 | my $pgdata_default      = "/var/lib/pgsql/data";
  52 | my $pghost_default      = "/tmp";
  53 | my $pgport_default      = 5432;
  54 | my $start_opts_default  = "";
  55 | my $maxlag_default    = "0";
  56 | 
  57 | # Set default values if not found in environment
  58 | my $system_user  = $ENV{'OCF_RESKEY_system_user'} || $system_user_default;
  59 | my $bindir       = $ENV{'OCF_RESKEY_bindir'} || $bindir_default;
  60 | my $pgdata       = $ENV{'OCF_RESKEY_pgdata'} || $pgdata_default;
  61 | my $datadir      = $ENV{'OCF_RESKEY_datadir'} || $pgdata;
  62 | my $pghost       = $ENV{'OCF_RESKEY_pghost'} || $pghost_default;
  63 | my $pgport       = $ENV{'OCF_RESKEY_pgport'} || $pgport_default;
  64 | my $start_opts   = $ENV{'OCF_RESKEY_start_opts'} || $start_opts_default;
  65 | my $maxlag       = $ENV{'OCF_RESKEY_maxlag'} || $maxlag_default;
  66 | my $recovery_tpl = $ENV{'OCF_RESKEY_recovery_template'}
  67 |     || "$pgdata/recovery.conf.pcmk";
  68 | 
  69 | 
  70 | # PostgreSQL commands path
  71 | my $PGCTL      = "$bindir/pg_ctl";
  72 | my $PGPSQL     = "$bindir/psql";
  73 | my $PGCTRLDATA = "$bindir/pg_controldata";
  74 | my $PGISREADY  = "$bindir/pg_isready";
  75 | my $PGWALDUMP = "$bindir/pg_waldump";
  76 | 
  77 | # pacemaker commands path
  78 | my $CRM_MASTER    = "$HA_SBIN_DIR/crm_master --lifetime forever";
  79 | my $CRM_NODE      = "$HA_SBIN_DIR/crm_node";
  80 | my $CRM_RESOURCE  = "$HA_SBIN_DIR/crm_resource";
  81 | my $ATTRD_PRIV    = "$HA_SBIN_DIR/attrd_updater --private --lifetime reboot";
  82 | 
  83 | # Global vars
  84 | my $nodename;
  85 | my $exit_code = 0;
  86 | # numeric pgsql versions
  87 | my $PGVERNUM;
  88 | my $PGVER_10  = 100000;
  89 | 
  90 | # Run a query using psql.
  91 | #
  92 | # This function returns an array with psql return code as first element and
  93 | # the result as second one.
  94 | #
  95 | sub _query {
  96 |     my $query        = shift;
  97 |     my $res          = shift;
  98 |     my $connstr      = "dbname=postgres";
  99 |     my $RS           = chr(30); # ASCII RS  (record separator)
 100 |     my $FS           = chr(3);  # ASCII ETX (end of text)
 101 |     my $postgres_uid = getpwnam( $system_user );
 102 |     my $oldeuid      = $>;
 103 |     my $tmpfile;
 104 |     my @res;
 105 |     my $ans;
 106 |     my $pid;
 107 |     my $rc;
 108 | 
 109 |     unless ( defined $res and defined $query and $query ne '' ) {
 110 |         ocf_log( 'debug', '_query: wrong parameters!' );
 111 |         return -1;
 112 |     }
 113 | 
 114 |     unless ( $tmpfile = File::Temp->new(
 115 |             TEMPLATE => 'pgsqlms-XXXXXXXX',
 116 |             DIR      => $HA_RSCTMP
 117 |         ) )
 118 |     {
 119 |         ocf_exit_reason( 'Could not create or write in a temp file' );
 120 |         exit $OCF_ERR_INSTALLED;
 121 |     }
 122 | 
 123 |     print $tmpfile $query;
 124 |     chmod 0644, $tmpfile;
 125 | 
 126 |     ocf_log( 'debug', '_query: %s', $query );
 127 | 
 128 |     # Change the effective user to the given system_user so after forking
 129 |     # the given uid to the process should allow psql to connect w/o password
 130 |     $> = $postgres_uid;
 131 | 
 132 |     # Forking + piping
 133 |     $pid = open(my $KID, "-|");
 134 | 
 135 |     if ( $pid == 0 ) { # child
 136 |         exec $PGPSQL, '--set', 'ON_ERROR_STOP=1', '-qXAtf', $tmpfile,
 137 |             '-R', $RS, '-F', $FS, '--port', $pgport, '--host', $pghost,
 138 |             $connstr;
 139 |     }
 140 | 
 141 |     # parent
 142 |     $> = $oldeuid;
 143 | 
 144 |     {
 145 |         local $/;
 146 |         $ans = <$KID>;
 147 |     }
 148 | 
 149 |     close $KID;
 150 |     $rc = $? >> 8;
 151 | 
 152 |     ocf_log( 'debug', '_query: psql return code: %d', $rc );
 153 | 
 154 |     if ( defined $ans ) {
 155 |         chop $ans;
 156 | 
 157 |         push @{ $res }, [ split(chr(3) => $_, -1) ]
 158 |             foreach split (chr(30) => $ans, -1);
 159 | 
 160 |         ocf_log( 'debug', '_query: @res: %s',
 161 |             Data::Dumper->new( [ $res ] )->Terse(1)->Dump );
 162 |     }
 163 | 
 164 |     # Possible return codes:
 165 |     #  -1: wrong parameters
 166 |     #   0: OK
 167 |     #   1: failed to get resources (memory, missing file, ...)
 168 |     #   2: unable to connect
 169 |     #   3: query failed
 170 |     return $rc;
 171 | }
 172 | 
 173 | # Get the last received location on a standby
 174 | # if the first argument is true, returns the value as decimal
 175 | # if the first argument is false, returns the value as LSN
 176 | # Returns undef if query failed
 177 | sub _get_last_received_lsn {
 178 |     my ( $dec ) = @_;
 179 |     my $pg_last_wal_receive_lsn = 'pg_last_wal_receive_lsn()';
 180 |     my $pg_wal_lsn_diff         = 'pg_wal_lsn_diff';
 181 |     my $query;
 182 |     my $rc;
 183 |     my @rs;
 184 | 
 185 |     if ( $PGVERNUM < $PGVER_10  ) {
 186 |         $pg_last_wal_receive_lsn = 'pg_last_xlog_receive_location()';
 187 |         $pg_wal_lsn_diff         = 'pg_xlog_location_diff';
 188 |     }
 189 | 
 190 |     if ( $dec ) {
 191 |         $query = "SELECT $pg_wal_lsn_diff( $pg_last_wal_receive_lsn, '0/0' )";
 192 |     }
 193 |     else {
 194 |         $query = "SELECT $pg_last_wal_receive_lsn";
 195 |     }
 196 | 
 197 |     $rc = _query( $query, \@rs );
 198 | 
 199 |     return $rs[0][0] if $rc == 0 and $rs[0][0];
 200 | 
 201 |     ocf_log( 'err', 'Could not query last received LSN (%s)', $rc ) if $rc != 0;
 202 |     ocf_log( 'err', 'No values for last received LSN' )
 203 |         if $rc == 0 and not $rs[0][0];
 204 | 
 205 |     return undef;
 206 | }
 207 | 
 208 | # Get the master score for each connected standby
 209 | # Returns directly the result set of the query or exit with an error.
 210 | # Exits with OCF_ERR_GENERIC if the query failed
 211 | sub _get_lag_scores {
 212 |     my $pg_current_wal_lsn = 'pg_current_wal_lsn()';
 213 |     my $pg_wal_lsn_diff    = 'pg_wal_lsn_diff';
 214 |     my $write_lsn          = 'write_lsn';
 215 |     my $query;
 216 |     my $rc;
 217 |     my @rs;
 218 | 
 219 |     if ( $PGVERNUM < $PGVER_10  ) {
 220 |         $pg_current_wal_lsn = 'pg_current_xlog_location()';
 221 |         $pg_wal_lsn_diff    = 'pg_xlog_location_diff';
 222 |         $write_lsn          = 'write_location';
 223 |     }
 224 | 
 225 |     # We check locations of connected standbies by querying the
 226 |     # "pg_stat_replication" view.
 227 |     # The row_number applies on the result set ordered on write_location ASC so
 228 |     # the highest row_number should be given to the closest node from the
 229 |     # master, then the lowest node name (alphanumeric sort) in case of equality.
 230 |     # The result set itself is order by priority DESC to process best known
 231 |     # candidate first.
 232 |     $query = qq{
 233 |       SELECT application_name, priority, location, state, current_lag
 234 |       FROM (
 235 |         SELECT application_name,
 236 |           (1000 - (
 237 |             row_number() OVER (
 238 |               PARTITION BY state IN ('startup', 'backup')
 239 |               ORDER BY location ASC, application_name ASC
 240 |             ) - 1
 241 |            ) * 10
 242 |           ) * CASE WHEN ( $maxlag > 0
 243 |                      AND current_lag > $maxlag)
 244 |                         THEN -1
 245 |                    ELSE 1
 246 |               END AS priority,
 247 |           location, state, current_lag
 248 |         FROM (
 249 |           SELECT application_name, $write_lsn AS location, state,
 250 |             $pg_wal_lsn_diff($pg_current_wal_lsn, $write_lsn) AS current_lag
 251 |           FROM pg_stat_replication
 252 |         ) AS s2
 253 |       ) AS s1
 254 |       ORDER BY priority DESC
 255 |     };
 256 | 
 257 |     $rc = _query( $query, \@rs );
 258 | 
 259 |     if ( $rc != 0 ) {
 260 |         ocf_exit_reason( 'Query to get standby locations failed (%d)', $rc );
 261 |         exit $OCF_ERR_GENERIC;
 262 |     }
 263 | 
 264 |     return \@rs;
 265 | }
 266 | 
 267 | # get the timeout for the current action given from environment var
 268 | # Returns   timeout as integer
 269 | #           undef if unknown
 270 | sub _get_action_timeout {
 271 |     my $timeout = $ENV{'OCF_RESKEY_CRM_meta_timeout'} / 1000;
 272 | 
 273 |     ocf_log( 'debug', '_get_action_timeout: known timeout: %s',
 274 |         defined $timeout ? $timeout : 'undef' );
 275 | 
 276 |     return $timeout if defined $timeout and $timeout =~ /^\d+$/;
 277 | 
 278 |     return undef;
 279 | }
 280 | 
 281 | # Get, parse and return the value of the given private attribute name
 282 | # Returns an empty string if not found.
 283 | sub _get_priv_attr {
 284 |     my ( $name, $node ) = @_;
 285 |     my $val             = '';
 286 |     my $node_arg        = '';
 287 |     my $ans;
 288 | 
 289 |     $node = '' unless defined $node;
 290 |     $name = "$name-$OCF_RESOURCE_INSTANCE";
 291 | 
 292 |     $node_arg= "--node $node" if $node ne '';
 293 | 
 294 |     $ans = qx{ $ATTRD_PRIV --name "$name" --query $node_arg };
 295 | 
 296 |     $ans =~ m/^name=".*" host=".*" value="(.*)"$/;
 297 | 
 298 |     $val = $1 if defined $1;
 299 | 
 300 |     ocf_log( 'debug', '_get_priv_attr: value of "%s"%s is "%s"', $name,
 301 |         ( $node ? " on \"$node\"": ""),
 302 |         $val );
 303 | 
 304 |     return $val;
 305 | }
 306 | 
 307 | # Set the given private attribute name to the given value
 308 | # As setting an attribute is asynchronous, this will return as soon as the
 309 | # attribute is really set by attrd and available.
 310 | sub _set_priv_attr {
 311 |     my ( $name, $val ) = @_;
 312 |     my $name_instance  = "$name-$OCF_RESOURCE_INSTANCE";
 313 | 
 314 |     ocf_log( 'debug', '_set_priv_attr: set "%s=%s"...', $name_instance, $val );
 315 | 
 316 |     qx{ $ATTRD_PRIV --name "$name_instance" --update "$val" };
 317 | 
 318 |     # give attr name without the resource instance name as _get_priv_attr adds
 319 |     # it as well
 320 |     while ( _get_priv_attr( $name ) ne $val ) {
 321 |         ocf_log( 'debug', '_set_priv_attr: waiting attrd ack for "%s"...', $name_instance );
 322 |         select( undef, undef, undef, 0.1 );
 323 |     }
 324 | 
 325 |     return;
 326 | }
 327 | 
 328 | # Delete the given private attribute.
 329 | # As setting an attribute is asynchronous, this will return as soon as the
 330 | # attribute is really deleted by attrd.
 331 | sub _delete_priv_attr {
 332 |     my ( $name ) = @_;
 333 |     my $name_instance  = "$name-$OCF_RESOURCE_INSTANCE";
 334 | 
 335 |     ocf_log( 'debug', '_delete_priv_attr: delete "%s"...', $name_instance );
 336 | 
 337 |     qx{ $ATTRD_PRIV --name "$name_instance" --delete };
 338 | 
 339 |     # give attr name without the resource instance name as _get_priv_attr adds
 340 |     # it as well
 341 |     while ( _get_priv_attr( $name ) ne '' ) {
 342 |         ocf_log( 'debug', '_delete_priv_attr: waiting attrd ack for "%s"...',
 343 |             $name_instance );
 344 |         select( undef, undef, undef, 0.1 );
 345 |     }
 346 | 
 347 |     return;
 348 | }
 349 | 
 350 | # Get, parse and return the resource master score on given node.
 351 | # Returns an empty string if not found.
 352 | # Returns undef on crm_master call on error
 353 | sub _get_master_score {
 354 |     my ( $node ) = @_;
 355 |     my $node_arg = '';
 356 |     my $score;
 357 | 
 358 |     $node_arg = sprintf '--node "%s"', $node if defined $node and $node ne '';
 359 | 
 360 |     $score = qx{ $CRM_MASTER --quiet --get-value $node_arg 2> /dev/null };
 361 | 
 362 |     return '' unless $? == 0;
 363 | 
 364 |     chomp $score;
 365 | 
 366 |     $score = '' unless defined $score;
 367 | 
 368 |     return $score;
 369 | }
 370 | 
 371 | # Set the master score of the local node or the optionally given node.
 372 | # As setting an attribute is asynchronous, this will return as soon as the
 373 | # attribute is really set by attrd and available everywhere.
 374 | sub _set_master_score {
 375 |     my ( $score, $node ) = @_;
 376 |     my $node_arg = '';
 377 |     my $tmp;
 378 | 
 379 |     $node_arg = sprintf '--node "%s"', $node if defined $node and $node ne '';
 380 | 
 381 |     qx{ $CRM_MASTER $node_arg --quiet --update "$score" };
 382 | 
 383 |     while ( ( $tmp = _get_master_score( $node ) ) ne $score ) {
 384 |         ocf_log( 'debug',
 385 |             '_set_master_score: waiting to set score to "%s" (currently "%s")...',
 386 |             $score, $tmp );
 387 |         select(undef, undef, undef, 0.1);
 388 |     }
 389 | 
 390 |     return;
 391 | }
 392 | 
 393 | # _master_score_exists
 394 | # This subroutine checks if a master score is set for one of the relative clones
 395 | # in the cluster and the score is greater or equal of 0.
 396 | # Returns 1 if at least one master score >= 0 is found.
 397 | # Returns 0 otherwise
 398 | sub _master_score_exists {
 399 |     my @partition_nodes = split /\s+/ => qx{ $CRM_NODE --partition };
 400 | 
 401 |     foreach my $node ( @partition_nodes ) {
 402 |         my $score = _get_master_score( $node );
 403 | 
 404 |         return 1 if defined $score and $score ne '' and $score > -1;
 405 |     }
 406 | 
 407 |     return 0;
 408 | }
 409 | 
 410 | # Check if the current transiation is a recover of a master clone on given node.
 411 | sub _is_master_recover {
 412 |     my ( $n ) = @_;
 413 | 
 414 |     return (
 415 |             scalar grep { $_->{'uname'} eq $n } @{ $OCF_NOTIFY_ENV{'master'} }
 416 |         and scalar grep { $_->{'uname'} eq $n } @{ $OCF_NOTIFY_ENV{'promote'} }
 417 |     );
 418 | }
 419 | 
 420 | # Check if the current transition is a recover of a slave clone on given node.
 421 | sub _is_slave_recover {
 422 |     my ( $n ) = @_;
 423 | 
 424 |     return (
 425 |             scalar grep { $_->{'uname'} eq $n } @{ $OCF_NOTIFY_ENV{'slave'} }
 426 |         and scalar grep { $_->{'uname'} eq $n } @{ $OCF_NOTIFY_ENV{'start'} }
 427 |     );
 428 | }
 429 | 
 430 | # check if th current transition is a switchover to the given node.
 431 | sub _is_switchover {
 432 |     my ( $n ) = @_;
 433 |     my $old = $OCF_NOTIFY_ENV{'master'}[0]{'uname'};
 434 | 
 435 |     return 0 if scalar @{ $OCF_NOTIFY_ENV{'master'} }  != 1
 436 |              or scalar @{ $OCF_NOTIFY_ENV{'demote'} }  != 1
 437 |              or scalar @{ $OCF_NOTIFY_ENV{'promote'} } != 1;
 438 | 
 439 |     return (
 440 |            scalar grep { $_->{'uname'} eq $old } @{ $OCF_NOTIFY_ENV{'demote'} }
 441 |        and scalar grep { $_->{'uname'} eq $n } @{ $OCF_NOTIFY_ENV{'slave'} }
 442 |        and scalar grep { $_->{'uname'} eq $n } @{ $OCF_NOTIFY_ENV{'promote'} }
 443 |        and not scalar grep { $_->{'uname'} eq $old } @{ $OCF_NOTIFY_ENV{'stop'} }
 444 |     );
 445 | }
 446 | 
 447 | # Run the given command as the "system_user" given as parameter.
 448 | # It basically forks and seteuid/setuid away from root.
 449 | #
 450 | sub _runas {
 451 |     my $rc;
 452 |     my $pid;
 453 |     my @cmd = @_;
 454 |     my (undef, undef, $postgres_uid, $postgres_gid ) = getpwnam( $system_user );
 455 | 
 456 |     $pid = fork;
 457 | 
 458 |     if ( $pid == 0 ) { # in child
 459 |         $) = "$postgres_gid $postgres_gid";
 460 |         while ( my ( undef, undef, $gid, $members ) = getgrent ) {
 461 |             $) .= " $gid" if grep { $system_user eq $_ } split /\s+/, $members
 462 |         }
 463 |         $( = $postgres_gid;
 464 | 
 465 |         $< = $> = $postgres_uid;
 466 | 
 467 |         exec @cmd;
 468 |     }
 469 | 
 470 |     ocf_log( 'debug', '_runas: launching as "%s" command "%s"', $system_user,
 471 |         join(' ', @cmd) );
 472 | 
 473 |     waitpid $pid, 0;
 474 |     $rc = $? >> 8;
 475 | 
 476 |     return $rc;
 477 | }
 478 | 
 479 | # Check if instance is listening on the given host/port.
 480 | #
 481 | sub _pg_isready {
 482 |     my $rc = _runas( $PGISREADY, '-h', $pghost, '-p', $pgport );
 483 | 
 484 |     # Possible error codes:
 485 |     #   1: ping rejected (usually when instance is in startup, in crash
 486 |     #      recovery, in warm standby, or when a shutdown is in progress)
 487 |     #   2: no response, usually means the instance is down
 488 |     #   3: no attempt, probably a syntax error, should not happen
 489 |     return $rc;
 490 | }
 491 | 
 492 | # Check the postmaster.pid file and the postmaster process.
 493 | # WARNING: we do not distinguish the scenario where postmaster.pid does not
 494 | # exist from the scenario where the process is still alive. It should be ok
 495 | # though, as this is considered a hard error from monitor.
 496 | #
 497 | sub _pg_ctl_status {
 498 |     my $rc = _runas( $PGCTL, '--pgdata', $pgdata, 'status' );
 499 | 
 500 |     # Possible error codes:
 501 |     #   3: postmaster.pid file does not exist OR it does but the process
 502 |     #      with the PID found in the file is not alive
 503 |     return $rc;
 504 | }
 505 | 
 506 | # Start the local instance using pg_ctl
 507 | #
 508 | sub _pg_ctl_start {
 509 |     # Add 60s to the timeout or use a 24h timeout fallback to make sure
 510 |     # Pacemaker will give up before us and take decisions
 511 |     my $timeout = ( _get_action_timeout() || 60*60*24 ) + 60;
 512 | 
 513 |     my @cmd = ( $PGCTL, '--pgdata', $pgdata, '-w', '--timeout', $timeout, 'start' );
 514 | 
 515 |     push @cmd => ( '-o', $start_opts ) if $start_opts ne '';
 516 | 
 517 |     return _runas( @cmd );
 518 | }
 519 | 
 520 | # Create the recovery file based on the given template.
 521 | # Given template MUST at least contain:
 522 | #   standby_mode=on
 523 | #   primary_conninfo='...'
 524 | #   recovery_target_timeline = 'latest'
 525 | #
 526 | sub _create_recovery_conf {
 527 |     my $fh;
 528 |     my (undef, undef, $uid, $gid) = getpwnam($system_user);
 529 |     my $recovery_conf    = '';
 530 |     my $recovery_file    = "$datadir/recovery.conf";
 531 | 
 532 |     ocf_log( 'debug',
 533 |         '_create_recovery_conf: get replication configuration from the template file "%s"',
 534 |         $recovery_tpl );
 535 | 
 536 |     # Create the recovery.conf file to start the instance as a secondary.
 537 |     # NOTE: the recovery.conf is supposed to be set up so the secondary can
 538 |     # connect to the primary instance, usually using a virtual IP address.
 539 |     # As there is no primary instance available at startup, secondaries will
 540 |     # complain about failing to connect.
 541 |     # As we can not reload a recovery.conf file on a standby without restarting
 542 |     # it, we will leave with this.
 543 |     # FIXME how would the reload help us in this case ?
 544 |     unless ( defined open( $fh, '<', $recovery_tpl ) ) {
 545 |         ocf_exit_reason( 'Could not open file "%s": %s', $recovery_tpl, $! );
 546 |         exit $OCF_ERR_CONFIGURED;
 547 |     }
 548 | 
 549 |     # Copy all parameters from the template file
 550 |     while (my $line = <$fh>) {
 551 |         chomp $line;
 552 |         $recovery_conf .= "$line\n";
 553 |     }
 554 |     close $fh;
 555 | 
 556 |     ocf_log( 'debug',
 557 |         '_create_recovery_conf: write the replication configuration to "%s" file',
 558 |         $recovery_file );
 559 | 
 560 |     unless ( open( $fh, '>', $recovery_file ) ) {
 561 |         ocf_exit_reason( 'Could not open file "%s": %s', $recovery_file, $! );
 562 |         exit $OCF_ERR_CONFIGURED;
 563 |     }
 564 | 
 565 |     # Write the recovery.conf file using configuration from the template file
 566 |     print $fh $recovery_conf;
 567 | 
 568 |     close $fh;
 569 | 
 570 |     unless ( chown $uid, $gid, $recovery_file ) {
 571 |         ocf_exit_reason( 'Could not set owner of "%s"', $recovery_file );
 572 |         exit $OCF_ERR_CONFIGURED;
 573 |     };
 574 | }
 575 | 
 576 | # Parse and return various informations about the local PostgreSQL instance as
 577 | # reported by its controldata file.
 578 | #
 579 | # WARNING: the status is NOT updated in case of crash.
 580 | #
 581 | sub _get_controldata {
 582 |     my %controldata;
 583 |     my $ans;
 584 | 
 585 |     $ans = qx{ $PGCTRLDATA "$datadir" 2>/dev/null };
 586 | 
 587 |     # Parse the output of pg_controldata.
 588 |     # This output is quite stable between pg versions, but we might need to sort
 589 |     # it at some point if things are moving in there...
 590 |     $ans =~ m{
 591 |         # get the current state
 592 |         ^\QDatabase cluster state\E:\s+(.*?)\s*$
 593 |         .*
 594 |         # Get the latest known REDO location
 595 |         ^\QLatest checkpoint's REDO location\E:\s+([/0-9A-F]+)\s*$
 596 |         .*
 597 |         # Get the latest known TL
 598 |         ^\QLatest checkpoint's TimeLineID\E:\s+(\d+)\s*$
 599 |         .*
 600 |         # Get the wal level
 601 |         # NOTE: pg_controldata output changed with PostgreSQL 9.5, so we need to
 602 |         # account for both syntaxes
 603 |         ^(?:\QCurrent \E)?\Qwal_level setting\E:\s+(.*?)\s*$
 604 |     }smx;
 605 | 
 606 |     $controldata{'state'}     = $1 if defined $1;
 607 |     $controldata{'redo'}      = $2 if defined $2;
 608 |     $controldata{'tl'}        = $3 if defined $3;
 609 |     $controldata{'wal_level'} = $4 if defined $4;
 610 | 
 611 |     ocf_log( 'debug',
 612 |         "_get_controldata: found: %s",
 613 |         Data::Dumper->new( [ \%controldata ] )->Terse(1)->Dump );
 614 | 
 615 |     return %controldata if defined $controldata{'state'}
 616 |                         and defined $controldata{'tl'}
 617 |                         and defined $controldata{'redo'}
 618 |                         and defined $controldata{'wal_level'};
 619 | 
 620 |     ocf_exit_reason( 'Could not read all datas from controldata file for "%s"',
 621 |         $datadir );
 622 | 
 623 |     ocf_log( 'debug',
 624 |         "_get_controldata: controldata file: %s",
 625 |         Data::Dumper->new( [ \%controldata ] )->Terse(1)->Dump, $ans );
 626 | 
 627 |     exit $OCF_ERR_ARGS;
 628 | }
 629 | 
 630 | # Use pg_controldata to check the state of the PostgreSQL server. This
 631 | # function returns codes depending on this state, so we can find whether the
 632 | # instance is a primary or a secondary, or use it to detect any inconsistency
 633 | # that could indicate the instance has crashed.
 634 | #
 635 | sub _controldata_to_ocf {
 636 |     my %cdata = _get_controldata();
 637 | 
 638 |     while ( 1 ) {
 639 |         ocf_log( 'debug', '_controldata: instance "%s" state is "%s"',
 640 |             $OCF_RESOURCE_INSTANCE, $cdata{'state'} );
 641 | 
 642 |         # Instance should be running as a primary.
 643 |         return $OCF_RUNNING_MASTER if $cdata{'state'} eq "in production";
 644 | 
 645 |         # Instance should be running as a secondary.
 646 |         # This state includes warm standby (rejects connections attempts,
 647 |         # including pg_isready)
 648 |         return $OCF_SUCCESS if $cdata{'state'} eq "in archive recovery";
 649 | 
 650 | 
 651 |         # The instance should be stopped.
 652 |         # We don't care if it was a primary or secondary before, because we
 653 |         # always start instances as secondaries, and then promote if necessary.
 654 |         return $OCF_NOT_RUNNING if $cdata{'state'} eq "shut down"
 655 |             or $cdata{'state'} eq "shut down in recovery";
 656 | 
 657 |         # The state is "in crash recovery", "starting up" or "shutting down".
 658 |         # This state should be transitional, so we wait and loop to check if
 659 |         # it changes.
 660 |         # If it does not, pacemaker will eventually abort with a timeout.
 661 |         ocf_log( 'debug',
 662 |             '_controldata: waiting for transitionnal state "%s" to finish',
 663 |             $cdata{'state'} );
 664 |         sleep 1;
 665 |         %cdata = _get_controldata();
 666 |     }
 667 | 
 668 |     # If we reach this point, something went really wrong with this code or
 669 |     # pg_controldata.
 670 |     ocf_exit_reason( 'Unable get instance "%s" state using pg_controldata',
 671 |         $OCF_RESOURCE_INSTANCE );
 672 | 
 673 |     return $OCF_ERR_INSTALLED ;
 674 | }
 675 | 
 676 | # Check the write_location of all secondaries, and adapt their master score so
 677 | # that the instance closest to the master will be the selected candidate should
 678 | # a promotion be triggered.
 679 | # NOTE: This is only a hint to pacemaker! The selected candidate to promotion
 680 | # actually re-check it is the best candidate and force a re-election by failing
 681 | # if a better one exists. This avoid a race condition between the call of the
 682 | # monitor action and the promotion where another slave might have catchup faster
 683 | # with the master.
 684 | # NOTE: we cannot directly use the write_location, neither a lsn_diff value as
 685 | # promotion score as Pacemaker considers any value greater than 1,000,000 as
 686 | # INFINITY.
 687 | #
 688 | # This sub is supposed to be executed from a master monitor action.
 689 | #
 690 | sub _check_locations {
 691 |     my $node_score;
 692 |     my $row_num;
 693 |     my $row;
 694 |     my @rs;
 695 | 
 696 |     # Call crm_node to exclude nodes that are not part of the cluster at this
 697 |     # point.
 698 |     my $partition_nodes = qx{ $CRM_NODE --partition };
 699 | 
 700 |     @rs = @{ _get_lag_scores() };
 701 | 
 702 |     $row_num = scalar @rs;
 703 | 
 704 |     # If there is no row left at this point, it means that there is no
 705 |     # secondary instance connected.
 706 |     ocf_log( 'warning', 'No secondary connected to the master' )
 707 |         if $row_num == 0;
 708 | 
 709 |     # For each standby connected, set their master score based on the following
 710 |     # rule: the first known node/application, with the highest priority and
 711 |     # with an acceptable state.
 712 |     while ( $row = shift @rs ) {
 713 | 
 714 |         if ( $partition_nodes !~ /$row->[0]/ ) {
 715 |             ocf_log( 'info', 'Ignoring unknown application_name/node "%s"',
 716 |                 $row->[0] );
 717 |             next;
 718 |         }
 719 | 
 720 |         if ( $row->[0] eq $nodename ) {
 721 |             ocf_log( 'warning', 'Streaming replication with myself!' );
 722 |             next;
 723 |         }
 724 | 
 725 |         $node_score = _get_master_score( $row->[0] );
 726 | 
 727 |         if ( $row->[3] =~ /^\s*(?:startup|backup)\s*$/ ) {
 728 |             # We exclude any standby being in state backup (pg_basebackup) or
 729 |             # startup (new standby or failing standby)
 730 |             ocf_log( 'info', 'Forbidding promotion on "%s" in state "%s"',
 731 |                 $row->[0], $row->[3] );
 732 | 
 733 |             _set_master_score( '-1', $row->[0] ) unless $node_score eq '-1';
 734 |         }
 735 |         else {
 736 |             ocf_log( 'debug',
 737 |                 '_check_locations: checking "%s" promotion ability (current_score: %s, priority: %s, location: %s, lag: %s)',
 738 |                 $row->[0], $node_score, $row->[1], $row->[2], $row->[4] );
 739 | 
 740 |             if ( $node_score ne $row->[1] ) {
 741 |                 if ( $row->[1] < -1 ) {
 742 |                     ocf_log( 'info', 'Update score of "%s" from %s to %s because replication lag (%s) is higher than given maxlag (%s).',
 743 |                         $row->[0], $node_score, $row->[1], $row->[4], $maxlag );
 744 |                 }
 745 |                 else {
 746 |                     ocf_log( 'info', 'Update score of "%s" from %s to %s because of a change in the replication lag (%s).',
 747 |                         $row->[0], $node_score, $row->[1], $row->[4] );
 748 |                 }
 749 |                 _set_master_score( $row->[1], $row->[0] );
 750 |             }
 751 |             else {
 752 |                 ocf_log( 'debug',
 753 |                     '_check_locations: "%s" keeps its current score of %s',
 754 |                     $row->[0], $row->[1] );
 755 |             }
 756 |         }
 757 | 
 758 |         # Remove this node from the known nodes list.
 759 |         $partition_nodes =~ s/(?:^|\s)$row->[0](?:\s|$)/ /g;
 760 |     }
 761 | 
 762 |     $partition_nodes =~ s/(?:^\s+)|(?:\s+$)//g;
 763 | 
 764 |     # If there are still nodes in "partition_nodes", it means there is no
 765 |     # corresponding line in "pg_stat_replication".
 766 |     foreach my $node (split /\s+/ => $partition_nodes) {
 767 |         # Exclude the current node.
 768 |         next if $node eq $nodename;
 769 | 
 770 |         # do not warn if the master score is already set to -1000.
 771 |         # this avoid log flooding (gh #138)
 772 |         $node_score = _get_master_score( $node );
 773 |         next if $node_score eq '-1000';
 774 | 
 775 |         ocf_log( 'warning', '"%s" is not connected to the primary', $node );
 776 |         _set_master_score( '-1000', $node );
 777 |     }
 778 | 
 779 |     # Finally set the master score if not already done
 780 |     $node_score = _get_master_score();
 781 |     _set_master_score( '1001' ) unless $node_score eq '1001';
 782 | 
 783 |     return $OCF_SUCCESS;
 784 | }
 785 | 
 786 | # _check_switchover
 787 | # check if the pgsql switchover to the localnode is safe.
 788 | # This is supposed to be called **after** the master has been stopped or demoted.
 789 | # This sub checks if the local standby received the shutdown checkpoint from the
 790 | # old master to make sure it can take over the master role and the old master
 791 | # will be able to catchup as a standby after.
 792 | #
 793 | # Returns 0 if switchover is safe
 794 | # Returns 1 if swithcover is not safe
 795 | # Returns 2 for internal error
 796 | sub _check_switchover {
 797 |     my $has_sht_chk = 0;
 798 |     my $last_redo;
 799 |     my $last_lsn;
 800 |     my $ans;
 801 |     my $rc;
 802 |     my $tl;
 803 |     my %cdata;
 804 | 
 805 |     ocf_log( 'info', 'Switchover in progress from "%s" to "%s".'
 806 |         .' Need to check the last record in WAL',
 807 |         $OCF_NOTIFY_ENV{'demote'}[0]{'uname'}, $nodename );
 808 | 
 809 |     # check if we received the shutdown checkpoint of the master during its
 810 |     # demote process.
 811 |     # We need the last local checkpoint LSN and the last received LSN from
 812 |     # master to check in the WAL between these adresses if we have a
 813 |     # "checkpoint shutdown" using pg_xlogdump/pg_waldump.
 814 |     #
 815 |     # Force a checkpoint to make sure the controldata shows the very last TL
 816 |     # and the master's shutdown checkpoint
 817 |     _query( q{ CHECKPOINT }, {} );
 818 |     %cdata     = _get_controldata();
 819 |     $tl        = $cdata{'tl'};
 820 |     $last_redo = $cdata{'redo'};
 821 | 
 822 |     # Get the last received LSN from master
 823 |     $last_lsn = _get_last_received_lsn();
 824 | 
 825 |     unless ( defined $last_lsn ) {
 826 |         ocf_exit_reason(
 827 |             'Could not read last checkpoint and timeline from controldata file!'
 828 |         );
 829 | 
 830 |         ocf_log( 'debug',
 831 |             '_check_switchover: %s parameters: datadir:"%s", last_chk: "%s", tl: "%s", mast_lsn: "%s"',
 832 |             $PGWALDUMP, $datadir, $last_redo, $tl, $last_lsn
 833 |         );
 834 | 
 835 |         return 2;
 836 |     }
 837 | 
 838 |     $ans = qx{ $PGWALDUMP --path "$datadir" --timeline "$tl" \\
 839 |                --start "$last_redo" --end "$last_lsn" 2>&1 };
 840 |     $rc = $?;
 841 | 
 842 |     ocf_log( 'debug',
 843 |         '_check_switchover: %s rc: "%s", tl: "%s", last_chk: %s, last_lsn: %s, output: "%s"',
 844 |         $PGWALDUMP, $rc, $tl, $last_redo, $last_lsn, $ans
 845 |     );
 846 | 
 847 |     if ( $rc == 0 and
 848 |          $ans =~ m{^rmgr: XLOG.*desc: (?i:checkpoint)(?::|_SHUTDOWN) redo [0-9A-F/]+; tli $tl;.*; shutdown$}m
 849 |     ) {
 850 |         ocf_log( 'info', 'Slave received the shutdown checkpoint' );
 851 |         return 0;
 852 |     }
 853 | 
 854 |     ocf_exit_reason(
 855 |         'Did not receive the shutdown checkpoint from the old master!' );
 856 | 
 857 |     return 1;
 858 | }
 859 | 
 860 | # Check to confirm if the instance is really started as _pg_isready stated and
 861 | # check if the instance is primary or secondary.
 862 | #
 863 | sub _confirm_role {
 864 |     my $is_in_recovery;
 865 |     my $rc;
 866 |     my @rs;
 867 | 
 868 |     $rc = _query( "SELECT pg_is_in_recovery()", \@rs );
 869 | 
 870 |     $is_in_recovery = $rs[0][0];
 871 | 
 872 |     if ( $rc == 0 ) {
 873 |         # The query was executed, check the result.
 874 |         if ( $is_in_recovery eq 't' ) {
 875 |             # The instance is a secondary.
 876 |             ocf_log( 'debug', "_confirm_role: instance $OCF_RESOURCE_INSTANCE is a secondary");
 877 |             return $OCF_SUCCESS;
 878 |         }
 879 |         elsif ( $is_in_recovery eq 'f' ) {
 880 |             # The instance is a primary.
 881 |             ocf_log( 'debug', "_confirm_role: instance $OCF_RESOURCE_INSTANCE is a primary");
 882 |             # Check lsn diff with current slaves if any
 883 |             _check_locations() if $OCF_ACTION eq 'monitor';
 884 |             return $OCF_RUNNING_MASTER;
 885 |         }
 886 | 
 887 |         # This should not happen, raise a hard configuration error.
 888 |         ocf_exit_reason(
 889 |             'Unexpected result from query to check if "%s" is a primary or a secondary: "%s"',
 890 |             $OCF_RESOURCE_INSTANCE, $is_in_recovery );
 891 | 
 892 |         return $OCF_ERR_CONFIGURED;
 893 |     }
 894 |     elsif ( $rc == 1 or $rc == 2 ) {
 895 |         # psql cound not connect to the instance.
 896 |         # As pg_isready reported the instance was listening, this error
 897 |         # could be a max_connection saturation. Just report a soft error.
 898 |         ocf_exit_reason( 'psql could not connect to instance "%s"',
 899 |             $OCF_RESOURCE_INSTANCE );
 900 |         return $OCF_ERR_GENERIC;
 901 |     }
 902 | 
 903 |     # The query failed (rc: 3) or bad parameters (rc: -1).
 904 |     # This should not happen, raise a hard configuration error.
 905 |     ocf_exit_reason(
 906 |         'The query to check if instance "%s" is a primary or a secondary failed (rc: %d)',
 907 |         $OCF_RESOURCE_INSTANCE, $rc );
 908 | 
 909 |     return $OCF_ERR_CONFIGURED;
 910 | }
 911 | 
 912 | 
 913 | # Check to confirm if the instance is really stopped as _pg_isready stated
 914 | # and if it was propertly shut down.
 915 | #
 916 | sub _confirm_stopped {
 917 |     my $pgctlstatus_rc;
 918 |     my $controldata_rc;
 919 | 
 920 |     # Check the postmaster process status.
 921 |     $pgctlstatus_rc = _pg_ctl_status();
 922 | 
 923 |     if ( $pgctlstatus_rc == 0 ) {
 924 |         # The PID file exists and the process is available.
 925 |         # That should not be the case, return an error.
 926 |         ocf_exit_reason(
 927 |             'Instance "%s" is not listening, but the process referenced in postmaster.pid exists',
 928 |             $OCF_RESOURCE_INSTANCE );
 929 |         return $OCF_ERR_GENERIC;
 930 |     }
 931 | 
 932 |     # The PID file does not exist or the process is not available.
 933 |     ocf_log( 'debug',
 934 |         '_confirm_stopped: no postmaster process found for instance "%s"',
 935 |         $OCF_RESOURCE_INSTANCE );
 936 | 
 937 |     if ( -f "$datadir/backup_label" ) {
 938 |         # We are probably on a freshly built secondary that was not started yet.
 939 |         ocf_log( 'debug',
 940 |             '_confirm_stopped: backup_label file exists: probably on a never started secondary',
 941 |         );
 942 |         return $OCF_NOT_RUNNING;
 943 |     }
 944 | 
 945 |     # Continue the check with pg_controldata.
 946 |     $controldata_rc = _controldata_to_ocf();
 947 |     if ( $controldata_rc == $OCF_RUNNING_MASTER ) {
 948 |         # The controldata has not been updated to "shutdown".
 949 |         # It should mean we had a crash on a primary instance.
 950 |         ocf_exit_reason(
 951 |             'Instance "%s" controldata indicates a running primary instance, the instance has probably crashed',
 952 |             $OCF_RESOURCE_INSTANCE );
 953 |         return $OCF_FAILED_MASTER;
 954 |     }
 955 |     elsif ( $controldata_rc == $OCF_SUCCESS ) {
 956 |         # The controldata has not been updated to "shutdown in recovery".
 957 |         # It should mean we had a crash on a secondary instance.
 958 |         # There is no "FAILED_SLAVE" return code, so we return a generic error.
 959 |         ocf_exit_reason(
 960 |             'Instance "%s" controldata indicates a running secondary instance, the instance has probably crashed',
 961 |             $OCF_RESOURCE_INSTANCE );
 962 |         return $OCF_ERR_GENERIC;
 963 |     }
 964 |     elsif ( $controldata_rc == $OCF_NOT_RUNNING ) {
 965 |         # The controldata state is consistent, the instance was probably
 966 |         # propertly shut down.
 967 |         ocf_log( 'debug',
 968 |             '_confirm_stopped: instance "%s" controldata indicates that the instance was propertly shut down',
 969 |             $OCF_RESOURCE_INSTANCE );
 970 |         return $OCF_NOT_RUNNING;
 971 |     }
 972 | 
 973 |     # Something went wrong with the controldata check.
 974 |     ocf_exit_reason(
 975 |         'Could not get instance "%s" status from controldata (returned: %d)',
 976 |         $OCF_RESOURCE_INSTANCE, $controldata_rc );
 977 | 
 978 |     return $OCF_ERR_GENERIC;
 979 | }
 980 | 
 981 | ############################################################
 982 | #### OCF FUNCS
 983 | 
 984 | 
 985 | 
 986 | =head1 SUPPORTED PARAMETERS
 987 | 
 988 | =over
 989 | 
 990 | =item B<pgdata>
 991 | 
 992 | Location of the PGDATA of your instance
 993 | 
 994 | (optional, string, default "/var/lib/pgsql/data")
 995 | 
 996 | =item B<pghost>
 997 | 
 998 | The socket directory or IP address to use to connect to the local instance
 999 | 
1000 | (optional, string, default "/tmp")
1001 | 
1002 | =item B<pgport>
1003 | 
1004 | The port to connect to the local instance
1005 | 
1006 | (optional, integer, default "5432")
1007 | 
1008 | =item B<bindir>
1009 | 
1010 | Location of the PostgreSQL binaries.
1011 | 
1012 | (optional, string, default "/usr/bin")
1013 | 
1014 | =item B<system_user>
1015 | 
1016 | The system owner of your instance's process
1017 | 
1018 | (optional, string, default "postgres")
1019 | 
1020 | =item B<recovery_template>
1021 | 
1022 | The local template that will be copied as the C<PGDATA/recovery.conf> file.
1023 | This template file must exists on all node.
1024 | 
1025 | (optional, string, default "$PGDATA/recovery.conf.pcmk")
1026 | 
1027 | =item B<maxlag>
1028 | 
1029 | Maximum lag allowed on a standby before we set a negative master score on it.
1030 | The calculation is based on the difference between the current xlog location on
1031 | the master and the write location on the standby.
1032 | 
1033 | (optional, integer, default "0" disables this feature)
1034 | 
1035 | =item B<datadir>
1036 | 
1037 | Path to the directory set in C<data_directory> from your postgresql.conf file.
1038 | This parameter has same default than PostgreSQL itself: the C<pgdata> parameter
1039 | value.
1040 | 
1041 | Unless you have a special PostgreSQL setup and you understand this parameter,
1042 | B<ignore it>
1043 | 
1044 | (optional, string, default to the value of C<pgdata>)
1045 | 
1046 | =item B<start_opts>
1047 | 
1048 | Additional arguments given to the postgres process on startup. See
1049 | "postgres --help" for available options. Useful when the postgresql.conf file
1050 | is not in the data directory (PGDATA), eg.:
1051 | 
1052 |   -c config_file=/etc/postgresql/9.3/main/postgresql.conf
1053 | 
1054 | (optinal, string, default "")
1055 | 
1056 | =back
1057 | 
1058 | =cut
1059 | 
1060 | sub ocf_meta_data {
1061 |     print qq{<?xml version="1.0"?>
1062 |         <!DOCTYPE resource-agent SYSTEM "ra-api-1.dtd">
1063 |         <resource-agent name="pgsqlms">
1064 |           <version>1.0</version>
1065 | 
1066 |           <longdesc lang="en">
1067 |             Resource script for PostgreSQL in replication. It manages PostgreSQL servers using streaming replication as an HA resource.
1068 |           </longdesc>
1069 |           <shortdesc lang="en">Manages PostgreSQL servers in replication</shortdesc>
1070 |           <parameters>
1071 |             <parameter name="system_user" unique="0" required="0">
1072 |               <longdesc lang="en">
1073 |                 System user account used to run the PostgreSQL server
1074 |               </longdesc>
1075 |               <shortdesc lang="en">PostgreSQL system User</shortdesc>
1076 |               <content type="string" default="$system_user_default" />
1077 |             </parameter>
1078 | 
1079 |             <parameter name="bindir" unique="0" required="0">
1080 |               <longdesc lang="en">
1081 |                 Path to the directory storing the PostgreSQL binaries. The agent uses psql, pg_isready, pg_controldata and pg_ctl.
1082 |               </longdesc>
1083 |               <shortdesc lang="en">Path to the PostgreSQL binaries</shortdesc>
1084 |               <content type="string" default="$bindir_default" />
1085 |             </parameter>
1086 | 
1087 |             <parameter name="pgdata" unique="1" required="0">
1088 |               <longdesc lang="en">
1089 |                 Path to the data directory, e.g. PGDATA
1090 |               </longdesc>
1091 |               <shortdesc lang="en">Path to the data directory</shortdesc>
1092 |               <content type="string" default="$pgdata_default" />
1093 |             </parameter>
1094 | 
1095 |             <parameter name="datadir" unique="1" required="0">
1096 |               <longdesc lang="en">
1097 |                 Path to the directory set in data_directory from your postgresql.conf file. This parameter
1098 |                 has the same default than PostgreSQL itself: the pgdata parameter value. Unless you have a
1099 |                 special PostgreSQL setup and you understand this parameter, ignore it.
1100 |               </longdesc>
1101 |               <shortdesc lang="en">Path to the directory set in data_directory from your postgresql.conf file</shortdesc>
1102 |               <content type="string" default="PGDATA" />
1103 |             </parameter>
1104 | 
1105 |             <parameter name="pghost" unique="0" required="0">
1106 |               <longdesc lang="en">
1107 |                 Host IP address or unix socket folder the instance is listening on.
1108 |               </longdesc>
1109 |               <shortdesc lang="en">Instance IP or unix socket folder</shortdesc>
1110 |               <content type="string" default="$pghost_default" />
1111 |             </parameter>
1112 | 
1113 |             <parameter name="pgport" unique="0" required="0">
1114 |               <longdesc lang="en">
1115 |                 Port the instance is listening on.
1116 |               </longdesc>
1117 |               <shortdesc lang="en">Instance port</shortdesc>
1118 |               <content type="integer" default="$pgport_default" />
1119 |             </parameter>
1120 | 
1121 |            <parameter name="maxlag" unique="0" required="0">
1122 |               <longdesc lang="en">
1123 |                 Maximum lag allowed on a standby before we set a negative master score on it. The calculation
1124 |                 is based on the difference between the current LSN on the master and the LSN
1125 |                 written on the standby.
1126 |                 This parameter must be a valid positive number as described in PostgreSQL documentation.
1127 |                 See: https://www.postgresql.org/docs/current/static/sql-syntax-lexical.html#SQL-SYNTAX-CONSTANTS-NUMERIC
1128 |               </longdesc>
1129 |               <shortdesc lang="en">Maximum write lag before we mark a standby as inappropriate to promote</shortdesc>
1130 |               <content type="integer" default="$maxlag_default" />
1131 |             </parameter>
1132 | 
1133 |             <parameter name="recovery_template" unique="1" required="0">
1134 |               <longdesc lang="en">
1135 |                 Path to the recovery.conf template. This file is simply copied to \$PGDATA
1136 |                 before starting the instance as slave
1137 |               </longdesc>
1138 |               <shortdesc lang="en">Path to the recovery.conf template.</shortdesc>
1139 |               <content type="string" default="PGDATA/recovery.conf.pcmk" />
1140 |             </parameter>
1141 | 
1142 |             <parameter name="start_opts" unique="0" required="0">
1143 |               <longdesc lang="en">
1144 |                 Additionnal arguments given to the postgres process on startup.
1145 |                 See "postgres --help" for available options. Usefull when the
1146 |                 postgresql.conf file is not in the data directory (PGDATA), eg.:
1147 |                 "-c config_file=/etc/postgresql/9.3/main/postgresql.conf".
1148 |               </longdesc>
1149 |               <shortdesc lang="en">Additionnal arguments given to the postgres process on startup.</shortdesc>
1150 |               <content type="string" default="$start_opts_default" />
1151 |             </parameter>
1152 | 
1153 |           </parameters>
1154 |           <actions>
1155 |             <action name="start" timeout="60" />
1156 |             <action name="stop" timeout="60" />
1157 |             <action name="reload" timeout="20" />
1158 |             <action name="promote" timeout="30" />
1159 |             <action name="demote" timeout="120" />
1160 |             <action name="monitor" depth="0" timeout="10" interval="15"/>
1161 |             <action name="monitor" depth="0" timeout="10" interval="15" role="Master"/>
1162 |             <action name="monitor" depth="0" timeout="10" interval="16" role="Slave"/>
1163 |             <action name="notify" timeout="60" />
1164 |             <action name="meta-data" timeout="5" />
1165 |             <action name="validate-all" timeout="5" />
1166 |             <action name="methods" timeout="5" />
1167 |           </actions>
1168 |         </resource-agent>
1169 |     };
1170 |     return;
1171 | }
1172 | 
1173 | 
1174 | =head1 SUPPORTED ACTIONS
1175 | 
1176 | This resource agent supports the following actions (operations):
1177 | 
1178 | =over
1179 | 
1180 | =item B<start>
1181 | 
1182 | Starts the resource. Suggested minimum timeout: 60.
1183 | 
1184 | =item B<stop>
1185 | 
1186 | Stops the resource. Suggested minimum timeout: 60.
1187 | 
1188 | =item B<reload>
1189 | 
1190 | Suggested minimum timeout: 20.
1191 | 
1192 | =item B<promote>
1193 | 
1194 | Promotes the resource to the Master role. Suggested minimum timeout: 30.
1195 | 
1196 | =item B<demote>
1197 | 
1198 | Demotes the resource to the Slave role. Suggested minimum timeout: 120.
1199 | 
1200 | =item B<monitor (Master role)>
1201 | 
1202 | Performs a detailed status check. Suggested minimum timeout: 10.
1203 | Suggested interval: 15.
1204 | 
1205 | =item B<monitor (Slave role)>
1206 | 
1207 | Performs a detailed status check. Suggested minimum timeout: 10.
1208 | Suggested interval: 16.
1209 | 
1210 | =item B<notify>
1211 | 
1212 | Suggested minimum timeout: 60
1213 | 
1214 | =item B<meta-data>
1215 | 
1216 | Retrieves resource agent metadata (internal use only).
1217 | Suggested minimum timeout: 5.
1218 | 
1219 | =item B<methods>
1220 | 
1221 | Suggested minimum timeout: 5.
1222 | 
1223 | =item B<validate-all>
1224 | 
1225 | Performs a validation of the resource configuration.
1226 | Suggested minimum timeout: 5.
1227 | 
1228 | =back
1229 | 
1230 | =cut
1231 | 
1232 | sub ocf_methods {
1233 |     print q{
1234 |         start
1235 |         stop
1236 |         reload
1237 |         promote
1238 |         demote
1239 |         monitor
1240 |         notify
1241 |         methods
1242 |         meta-data
1243 |         validate-all
1244 |     };
1245 |     return;
1246 | }
1247 | 
1248 | ############################################################
1249 | #### RA FUNCS
1250 | 
1251 | sub pgsql_validate_all {
1252 |     my $fh;
1253 |     my $ans = '';
1254 |     my $PGVERSION;
1255 |     my @content;
1256 |     my %cdata;
1257 | 
1258 |     unless (
1259 |         ocf_version_cmp( $ENV{"OCF_RESKEY_crm_feature_set"}, '3.0.9' ) == 2
1260 |     ) {
1261 |         ocf_exit_reason(
1262 |             'PAF %s is compatible with Pacemaker 1.1.13 and greater',
1263 |             $VERSION
1264 |         );
1265 |         exit $OCF_ERR_INSTALLED;
1266 |     }
1267 | 
1268 |     # check notify=true
1269 |     $ans = qx{ $CRM_RESOURCE --resource "$OCF_RESOURCE_INSTANCE" \\
1270 |                  --meta --get-parameter notify 2>/dev/null };
1271 |     chomp $ans;
1272 |     unless ( lc($ans) =~ /^true$|^on$|^yes$|^y$|^1$/ ) {
1273 |         ocf_exit_reason(
1274 |             'You must set meta parameter notify=true for your master resource'
1275 |         );
1276 |         exit $OCF_ERR_INSTALLED;
1277 |     }
1278 | 
1279 |     # check master-max=1
1280 |     unless (
1281 |         defined $ENV{'OCF_RESKEY_CRM_meta_master_max'}
1282 |             and $ENV{'OCF_RESKEY_CRM_meta_master_max'} eq '1'
1283 |     ) {
1284 |         ocf_exit_reason(
1285 |             'You must set meta parameter master-max=1 for your master resource'
1286 |         );
1287 |         exit $OCF_ERR_INSTALLED;
1288 |     }
1289 | 
1290 |     # check pgdata
1291 |     if ( ! -d $pgdata ) {
1292 |         ocf_exit_reason( 'PGDATA "%s" does not exist', $pgdata );
1293 |         exit $OCF_ERR_ARGS;
1294 |     }
1295 | 
1296 |     # check datadir
1297 |     if ( ! -d $datadir ) {
1298 |         ocf_exit_reason( 'data_directory "%s" does not exist', $datadir );
1299 |         exit $OCF_ERR_ARGS;
1300 |     }
1301 | 
1302 |     # check PG_VERSION
1303 |     if ( ! -s "$datadir/PG_VERSION" ) {
1304 |         ocf_exit_reason( 'PG_VERSION does not exist in "%s"', $datadir );
1305 |         exit $OCF_ERR_ARGS;
1306 |     }
1307 | 
1308 |     # check recovery template
1309 |     if ( ! -f $recovery_tpl ) {
1310 |         ocf_exit_reason( 'Recovery template file "%s" does not exist',
1311 |             $recovery_tpl );
1312 |         exit $OCF_ERR_ARGS;
1313 |     }
1314 | 
1315 |     # check content of the recovery template file
1316 |     unless ( open( $fh, '<', $recovery_tpl ) ) {
1317 |         ocf_exit_reason( 'Could not open file "%s": %s', $recovery_tpl, $! );
1318 |         exit $OCF_ERR_ARGS;
1319 |     }
1320 |     @content = <$fh>;
1321 |     close $fh;
1322 | 
1323 |     unless ( looks_like_number($maxlag) ) {
1324 |         ocf_exit_reason( 'maxlag is not a number: "%s"', $maxlag );
1325 |         exit $OCF_ERR_INSTALLED;
1326 |     }
1327 | 
1328 |     unless ( grep /^\s*standby_mode\s*=\s*'?on'?\s*$/, @content ) {
1329 |         ocf_exit_reason(
1330 |             'Recovery template file must contain "standby_mode = on"' );
1331 |         exit $OCF_ERR_ARGS;
1332 |     }
1333 | 
1334 |     unless ( grep /^\s*recovery_target_timeline\s*=\s*'?latest'?\s*$/, @content ) {
1335 |         ocf_exit_reason(
1336 |             "Recovery template file must contain \"recovery_target_timeline = 'latest'\""
1337 |         );
1338 |         exit $OCF_ERR_ARGS;
1339 |     }
1340 | 
1341 |     unless (
1342 |         grep /^\s*primary_conninfo\s*=.*['\s]application_name=$nodename['\s]/,
1343 |         @content
1344 |     ) {
1345 |         ocf_exit_reason(
1346 |             'Recovery template file must contain in primary_conninfo parameter "application_name=%s"',
1347 |             $nodename );
1348 |         exit $OCF_ERR_ARGS;
1349 |     }
1350 | 
1351 |     # check system user
1352 |     unless ( defined getpwnam $system_user ) {
1353 |         ocf_exit_reason( 'System user "%s" does not exist', $system_user );
1354 |         exit $OCF_ERR_ARGS;
1355 |     }
1356 | 
1357 |     # require 9.3 minimum
1358 |     unless ( open( $fh, '<', "$datadir/PG_VERSION" ) ) {
1359 |         ocf_exit_reason( "Could not open file \"$datadir/PG_VERSION\": $!" );
1360 |         exit $OCF_ERR_ARGS;
1361 |     }
1362 |     read( $fh, $PGVERSION, 64 );
1363 |     close $fh;
1364 | 
1365 |     chomp $PGVERSION;
1366 | 
1367 |     $PGVERSION =~ /^(\d+)(?:\.(\d+))?$/;
1368 | 
1369 |     $PGVERNUM = $1 * 10000;
1370 | 
1371 |     # postgresql >= 10 do not include the 2nd num in the major release
1372 |     $PGVERNUM += $2 * 100 if $1 < 10;
1373 | 
1374 |     if ( $PGVERNUM < 90300 ) {
1375 |         ocf_exit_reason( "PostgreSQL %s not supported. Require 9.3 and more",
1376 |             $PGVERSION );
1377 |         exit $OCF_ERR_INSTALLED;
1378 |     }
1379 | 
1380 |     $PGWALDUMP = "$bindir/pg_xlogdump" if $PGVERNUM < $PGVER_10;
1381 | 
1382 |     # check binaries
1383 |     unless ( -x $PGCTL and -x $PGPSQL and -x $PGCTRLDATA and -x $PGISREADY
1384 |          and -x $PGWALDUMP
1385 |      ) {
1386 |         ocf_exit_reason(
1387 |             "Missing one or more binary. Check following path: %s, %s, %s, %s, %s",
1388 |             $PGCTL, $PGPSQL, $PGCTRLDATA, $PGISREADY, $PGWALDUMP );
1389 |         exit $OCF_ERR_ARGS;
1390 |     }
1391 | 
1392 |     # require wal_level >= hot_standby
1393 |     %cdata = _get_controldata();
1394 |     unless ( $cdata{'wal_level'} =~ m{hot_standby|logical|replica} ) {
1395 |         ocf_exit_reason(
1396 |             'wal_level must be one of "hot_standby", "logical" or "replica"' );
1397 |         exit $OCF_ERR_ARGS;
1398 |     }
1399 | 
1400 |     return $OCF_SUCCESS;
1401 | }
1402 | 
1403 | 
1404 | # Start the PostgreSQL instance as a *secondary*
1405 | #
1406 | sub pgsql_start {
1407 |     my $rc         = pgsql_monitor();
1408 |     my %cdata      = _get_controldata();
1409 |     my $prev_state = $cdata{'state'};
1410 | 
1411 |     # Instance must be running as secondary or being stopped.
1412 |     # Anything else is an error.
1413 |     if ( $rc == $OCF_SUCCESS ) {
1414 |         ocf_log( 'info', 'Instance "%s" already started',
1415 |             $OCF_RESOURCE_INSTANCE );
1416 |         return $OCF_SUCCESS;
1417 |     }
1418 |     elsif ( $rc != $OCF_NOT_RUNNING ) {
1419 |         ocf_exit_reason( 'Unexpected state for instance "%s" (returned %d)',
1420 |             $OCF_RESOURCE_INSTANCE, $rc );
1421 |         return $OCF_ERR_GENERIC;
1422 |     }
1423 | 
1424 |     #
1425 |     # From here, the instance is NOT running for sure.
1426 |     #
1427 | 
1428 |     ocf_log( 'debug',
1429 |         'pgsql_start: instance "%s" is not running, starting it as a secondary',
1430 |         $OCF_RESOURCE_INSTANCE );
1431 | 
1432 |     # Create recovery.conf from the template file.
1433 |     _create_recovery_conf();
1434 | 
1435 |     # Start the instance as a secondary.
1436 |     $rc = _pg_ctl_start();
1437 | 
1438 |     if ( $rc == 0 ) {
1439 | 
1440 |         # Wait for the start to finish.
1441 |         sleep 1 while ( $rc = pgsql_monitor() ) == $OCF_NOT_RUNNING;
1442 | 
1443 |         if ( $rc == $OCF_SUCCESS ) {
1444 |             ocf_log( 'info', 'Instance "%s" started', $OCF_RESOURCE_INSTANCE );
1445 | 
1446 |             # Check if a master score exists in the cluster.
1447 |             # During the very first start of the cluster, no master score will
1448 |             # exists on any of the existing slaves, unless an admin designated
1449 |             # one of them using crm_master. If no master exists the cluster will
1450 |             # not promote a master among the slaves.
1451 |             # To solve this situation, we check if there is at least one master
1452 |             # score existing on one node in the cluster. Do nothing if at least
1453 |             # one master score is found among the clones of the resource. If no
1454 |             # master score exists, set a score of 1 only if the resource was a
1455 |             # shut downed master before the start.
1456 |             if ( $prev_state eq "shut down" and not _master_score_exists() ) {
1457 |                 ocf_log( 'info', 'No master score around. Set mine to 1' );
1458 | 
1459 |                 _set_master_score( '1' );
1460 |             }
1461 | 
1462 |             return $OCF_SUCCESS;
1463 |         }
1464 | 
1465 |         ocf_exit_reason(
1466 |             'Instance "%s" is not running as a slave (returned %d)',
1467 |              $OCF_RESOURCE_INSTANCE, $rc );
1468 | 
1469 |         return $OCF_ERR_GENERIC;
1470 |     }
1471 | 
1472 |     ocf_exit_reason( 'Instance "%s" failed to start (rc: %d)',
1473 |         $OCF_RESOURCE_INSTANCE, $rc );
1474 | 
1475 |     return $OCF_ERR_GENERIC;
1476 | }
1477 | 
1478 | # Stop the PostgreSQL instance
1479 | #
1480 | sub pgsql_stop {
1481 |     my $rc;
1482 |     my $state;
1483 |     my $pidfile = "$datadir/postmaster.pid";
1484 |     # Add 60s to the timeout or use a 24h timeout fallback to make sure
1485 |     # Pacemaker will give up before us and take decisions
1486 |     my $timeout = ( _get_action_timeout() || 60*60*24 ) + 60;
1487 | 
1488 |     # Instance must be running as secondary or primary or being stopped.
1489 |     # Anything else is an error.
1490 |     $rc = pgsql_monitor();
1491 |     if ( $rc == $OCF_NOT_RUNNING ) {
1492 |         ocf_log( 'info', 'Instance "%s" already stopped',
1493 |             $OCF_RESOURCE_INSTANCE );
1494 |         return $OCF_SUCCESS;
1495 |     }
1496 |     elsif ( $rc != $OCF_SUCCESS and $rc != $OCF_RUNNING_MASTER ) {
1497 |         ocf_exit_reason( 'Unexpected state for instance "%s" (returned %d)',
1498 |             $OCF_RESOURCE_INSTANCE, $rc );
1499 |         return $OCF_ERR_GENERIC;
1500 |     }
1501 | 
1502 |     #
1503 |     # From here, the instance is running for sure.
1504 |     #
1505 | 
1506 |     ocf_log( 'debug', 'pgsql_stop: instance "%s" is running, stopping it',
1507 |         $OCF_RESOURCE_INSTANCE );
1508 | 
1509 |     # Try to quit with proper shutdown.
1510 | 
1511 | 
1512 |     $rc = _runas( $PGCTL, '--pgdata', $pgdata, '-w', '--timeout', $timeout,
1513 |         '-m', 'fast', 'stop' );
1514 | 
1515 |     if ( $rc == 0 ) {
1516 |         # Wait for the stop to finish.
1517 |         sleep 1 while ( $rc = pgsql_monitor() ) != $OCF_NOT_RUNNING ;
1518 | 
1519 |         ocf_log( 'info', 'Instance "%s" stopped', $OCF_RESOURCE_INSTANCE );
1520 | 
1521 |         return $OCF_SUCCESS;
1522 |     }
1523 | 
1524 |     ocf_exit_reason( 'Instance "%s" failed to stop', $OCF_RESOURCE_INSTANCE );
1525 |     return $OCF_ERR_GENERIC;
1526 | }
1527 | 
1528 | # Monitor the PostgreSQL instance
1529 | #
1530 | sub pgsql_monitor {
1531 |     my $pgisready_rc;
1532 |     my $controldata_rc;
1533 | 
1534 |     ocf_log( 'debug', 'pgsql_monitor: monitor is a probe' ) if ocf_is_probe();
1535 | 
1536 |     # First check, verify if the instance is listening.
1537 |     $pgisready_rc = _pg_isready();
1538 | 
1539 |     if ( $pgisready_rc == 0 ) {
1540 |         # The instance is listening.
1541 |         # We confirm that the instance is up and return if it is a primary or a
1542 |         # secondary
1543 |         ocf_log( 'debug', 'pgsql_monitor: instance "%s" is listening',
1544 |             $OCF_RESOURCE_INSTANCE );
1545 |         return _confirm_role();
1546 |     }
1547 | 
1548 |     if ( $pgisready_rc == 1 ) {
1549 |         # The attempt was rejected.
1550 |         # This could happen in several cases:
1551 |         #   - at startup
1552 |         #   - during shutdown
1553 |         #   - during crash recovery
1554 |         #   - if instance is a warm standby
1555 |         # Except for the warm standby case, this should be a transitional state.
1556 |         # We try to confirm using pg_controldata.
1557 |         ocf_log( 'debug',
1558 |             'pgsql_monitor: instance "%s" rejects connections - checking again...',
1559 |             $OCF_RESOURCE_INSTANCE );
1560 |         $controldata_rc = _controldata_to_ocf();
1561 | 
1562 |         if ( $controldata_rc == $OCF_RUNNING_MASTER
1563 |             or $controldata_rc == $OCF_SUCCESS
1564 |         ) {
1565 |             # This state indicates that pg_isready check should succeed.
1566 |             # We check again.
1567 |             ocf_log( 'debug',
1568 |                 'pgsql_monitor: instance "%s" controldata shows a running status',
1569 |                 $OCF_RESOURCE_INSTANCE );
1570 | 
1571 |             $pgisready_rc = _pg_isready();
1572 |             if ( $pgisready_rc == 0 ) {
1573 |                 # Consistent with pg_controdata output.
1574 |                 # We can check if the instance is primary or secondary
1575 |                 ocf_log( 'debug', 'pgsql_monitor: instance "%s" is listening',
1576 |                     $OCF_RESOURCE_INSTANCE );
1577 |                 return _confirm_role();
1578 |             }
1579 | 
1580 |             # Still not consistent, raise an error.
1581 |             # NOTE: if the instance is a warm standby, we end here.
1582 |             # TODO raise an hard error here ?
1583 |             ocf_exit_reason(
1584 |                 'Instance "%s" controldata is not consistent with pg_isready (returned: %d)',
1585 |                 $OCF_RESOURCE_INSTANCE, $pgisready_rc );
1586 |             ocf_log( 'info',
1587 |                 'If this instance is in warm standby, this resource agent only supports hot standby',
1588 |                 $OCF_RESOURCE_INSTANCE, $pgisready_rc );
1589 | 
1590 |             return $OCF_ERR_GENERIC;
1591 |         }
1592 | 
1593 |         if ( $controldata_rc == $OCF_NOT_RUNNING ) {
1594 |             # This state indicates that pg_isready check should fail with rc 2.
1595 |             # We check again.
1596 |             $pgisready_rc = _pg_isready();
1597 |             if ( $pgisready_rc == 2 ) {
1598 |                 # Consistent with pg_controdata output.
1599 |                 # We check the process status using pg_ctl status and check
1600 |                 # if it was propertly shut down using pg_controldata.
1601 |                 ocf_log( 'debug',
1602 |                     'pgsql_monitor: instance "%s" is not listening',
1603 |                     $OCF_RESOURCE_INSTANCE );
1604 |                 return _confirm_stopped();
1605 |             }
1606 |             # Still not consistent, raise an error.
1607 |             # TODO raise an hard error here ?
1608 |             ocf_exit_reason(
1609 |                 'Instance "%s" controldata is not consistent with pg_isready (returned: %d)',
1610 |                 $OCF_RESOURCE_INSTANCE, $pgisready_rc );
1611 | 
1612 |             return $OCF_ERR_GENERIC;
1613 |         }
1614 | 
1615 |         # Something went wrong with the controldata check, hard fail.
1616 |         ocf_exit_reason(
1617 |             'Could not get instance "%s" status from controldata (returned: %d)',
1618 |             $OCF_RESOURCE_INSTANCE, $controldata_rc );
1619 | 
1620 |         return $OCF_ERR_INSTALLED;
1621 |     }
1622 | 
1623 |     elsif ( $pgisready_rc == 2 ) {
1624 |         # The instance is not listening.
1625 |         # We check the process status using pg_ctl status and check
1626 |         # if it was propertly shut down using pg_controldata.
1627 |         ocf_log( 'debug', 'pgsql_monitor: instance "%s" is not listening',
1628 |             $OCF_RESOURCE_INSTANCE );
1629 |         return _confirm_stopped();
1630 |     }
1631 | 
1632 |     elsif ( $pgisready_rc == 3 ) {
1633 |         # No attempt was done, probably a syntax error.
1634 |         # Hard configuration error, we don't want to retry or failover here.
1635 |         ocf_exit_reason(
1636 |             'Unknown error while checking if instance "%s" is listening (returned %d)',
1637 |             $OCF_RESOURCE_INSTANCE, $pgisready_rc );
1638 | 
1639 |         return $OCF_ERR_CONFIGURED;
1640 |     }
1641 | 
1642 |     ocf_exit_reason( 'Unexpected result when checking instance "%s" status',
1643 |         $OCF_RESOURCE_INSTANCE );
1644 | 
1645 |     return $OCF_ERR_GENERIC;
1646 | }
1647 | 
1648 | 
1649 | # Demote the PostgreSQL instance from primary to secondary
1650 | # To demote a PostgreSQL instance, we must:
1651 | #   * stop it gracefully
1652 | #   * create recovery.conf with standby_mode = on
1653 | #   * start it
1654 | #
1655 | sub pgsql_demote {
1656 |     my $rc;
1657 | 
1658 |     $rc = pgsql_monitor();
1659 | 
1660 |     # Running as primary. Normal, expected behavior.
1661 |     if ( $rc == $OCF_RUNNING_MASTER ) {
1662 |         ocf_log( 'debug', 'pgsql_demote: "%s" currently running as a primary',
1663 |             $OCF_RESOURCE_INSTANCE )  ;
1664 |     }
1665 |     elsif ( $rc == $OCF_SUCCESS ) {
1666 |         # Already running as secondary. Nothing to do.
1667 |         ocf_log( 'debug',
1668 |             'pgsql_demote: "%s" currently running as a secondary',
1669 |             $OCF_RESOURCE_INSTANCE );
1670 |             return $OCF_SUCCESS;
1671 |     }
1672 |     elsif ( $rc == $OCF_NOT_RUNNING ) {
1673 |         # Instance is stopped. Nothing to do.
1674 |         ocf_log( 'debug', 'pgsql_demote: "%s" currently shut down',
1675 |             $OCF_RESOURCE_INSTANCE );
1676 |     }
1677 |     elsif ( $rc == $OCF_ERR_CONFIGURED ) {
1678 |         # We actually prefer raising a hard or fatal error instead of leaving
1679 |         # the CRM abording its transition for a new one because of a soft error.
1680 |         # The hard error will force the CRM to move the resource immediately.
1681 |         return $OCF_ERR_CONFIGURED;
1682 |     }
1683 |     else {
1684 |         return $OCF_ERR_GENERIC;
1685 |     }
1686 | 
1687 |     # TODO we need to make sure at least one slave is connected!!
1688 | 
1689 |     # WARNING if the resource state is stopped instead of master, the ocf ra dev
1690 |     # rsc advises to return OCF_ERR_GENERIC, misleading the CRM in a loop where
1691 |     # it computes transitions of demote(failing)->stop->start->promote actions
1692 |     # until failcount == migration-threshold.
1693 |     # This is a really ugly trick to keep going with the demode action if the
1694 |     # rsc is already stopped gracefully.
1695 |     # See discussion "CRM trying to demote a stopped resource" on
1696 |     # developers@clusterlabs.org
1697 |     unless ( $rc == $OCF_NOT_RUNNING ) {
1698 |         # Add 60s to the timeout or use a 24h timeout fallback to make sure
1699 |         # Pacemaker will give up before us and take decisions
1700 |         my $timeout = ( _get_action_timeout() || 60*60*24 )  + 60;
1701 | 
1702 |         # WARNING the instance **MUST** be stopped gracefully.
1703 |         # Do **not** use pg_stop() or service or systemctl here as these
1704 |         # commands might force-stop the PostgreSQL instance using immediate
1705 |         # after some timeout and return success, which is misleading.
1706 | 
1707 |         $rc = _runas( $PGCTL, '--pgdata', $pgdata, '--mode', 'fast', '-w',
1708 |             '--timeout', $timeout , 'stop' );
1709 | 
1710 |         # No need to wait for stop to complete, this is handled in pg_ctl
1711 |         # using -w option.
1712 |         unless ( $rc == 0 ) {
1713 |             ocf_exit_reason( 'Failed to stop "%s" using pg_ctl (returned %d)',
1714 |                 $OCF_RESOURCE_INSTANCE, $rc );
1715 |             return $OCF_ERR_GENERIC;
1716 |         }
1717 | 
1718 |         # Double check that the instance is stopped correctly.
1719 |         $rc = pgsql_monitor();
1720 |         unless ( $rc == $OCF_NOT_RUNNING ) {
1721 |             ocf_exit_reason(
1722 |                 'Unexpected "%s" state: monitor status (%d) disagree with pg_ctl return code',
1723 |                 $OCF_RESOURCE_INSTANCE, $rc );
1724 |             return $OCF_ERR_GENERIC;
1725 |         }
1726 |     }
1727 | 
1728 |     #
1729 |     # At this point, the instance **MUST** be stopped gracefully.
1730 |     #
1731 | 
1732 |     # Note: We do not need to handle the recovery.conf file here as pgsql_start
1733 |     # deal with that itself. Equally, no need to wait for the start to complete
1734 |     # here, handled in pgsql_start.
1735 |     $rc = pgsql_start();
1736 |     if ( $rc == $OCF_SUCCESS ) {
1737 |         ocf_log( 'info', 'pgsql_demote: "%s" started as a secondary',
1738 |             $OCF_RESOURCE_INSTANCE );
1739 |         return $OCF_SUCCESS;
1740 |     }
1741 | 
1742 |     # NOTE: No need to double check the instance state as pgsql_start already use
1743 |     # pgsql_monitor to check the state before returning.
1744 | 
1745 |     ocf_exit_reason( 'Starting "%s" as a standby failed (returned %d)',
1746 |         $OCF_RESOURCE_INSTANCE, $rc );
1747 |     return $OCF_ERR_GENERIC;
1748 | }
1749 | 
1750 | 
1751 | # Promote the secondary instance to primary
1752 | #
1753 | sub pgsql_promote {
1754 |     my $rc;
1755 |     my $cancel_switchover;
1756 | 
1757 |     $rc = pgsql_monitor();
1758 | 
1759 |     if ( $rc == $OCF_SUCCESS ) {
1760 |         # Running as slave. Normal, expected behavior.
1761 |         ocf_log( 'debug', 'pgsql_promote: "%s" currently running as a standby',
1762 |             $OCF_RESOURCE_INSTANCE );
1763 |     }
1764 |     elsif ( $rc == $OCF_RUNNING_MASTER ) {
1765 |         # Already a master. Unexpected, but not a problem.
1766 |         ocf_log( 'info', '"%s" already running as a primary',
1767 |             $OCF_RESOURCE_INSTANCE );
1768 |         return $OCF_SUCCESS;
1769 |     }
1770 |     elsif ( $rc == $OCF_NOT_RUNNING ) { # INFO this is not supposed to happen.
1771 |         # Currently not running. Need to start before promoting.
1772 |         ocf_log( 'info', '"%s" currently not running, starting it',
1773 |             $OCF_RESOURCE_INSTANCE );
1774 | 
1775 |         $rc = pgsql_start();
1776 |         if ( $rc != $OCF_SUCCESS ) {
1777 |             ocf_exit_reason( 'Failed to start the instance "%s"',
1778 |                 $OCF_RESOURCE_INSTANCE );
1779 |             return $OCF_ERR_GENERIC;
1780 |         }
1781 |     }
1782 |     else {
1783 |         ocf_exit_reason( 'Unexpected error, cannot promote "%s"',
1784 |             $OCF_RESOURCE_INSTANCE );
1785 |         return $OCF_ERR_GENERIC;
1786 |     }
1787 | 
1788 |     #
1789 |     # At this point, the instance **MUST** be started as a secondary.
1790 |     #
1791 | 
1792 |     # Cancel the switchover if it has been considered not safe during the
1793 |     # pre-promote action
1794 |     $cancel_switchover = _get_priv_attr('cancel_switchover');
1795 |     if ( $cancel_switchover ) { # if not empty or not 0
1796 |         ocf_exit_reason(
1797 |             'Switchover has been canceled from pre-promote action' );
1798 | 
1799 |         _delete_priv_attr( 'cancel_switchover' );
1800 | 
1801 |         return $OCF_ERR_GENERIC if $cancel_switchover eq '1';
1802 |         return $OCF_ERR_ARGS; # ban the resource from the node if we have an
1803 |                               # internal error during _check_switchover
1804 |     }
1805 | 
1806 |     # Do not check for a better candidate if we try to recover the master
1807 |     # Recover of a master is detected during the pre-promote action. It sets the
1808 |     # private attribute 'recover_master' to '1' if this is a master recover.
1809 |     if ( _get_priv_attr( 'recover_master' ) eq '1' ) {
1810 |         ocf_log( 'info', 'Recovering old master, no election needed');
1811 |     }
1812 |     else {
1813 | 
1814 |         # The promotion is occurring on the best known candidate (highest
1815 |         # master score), as chosen by pacemaker during the last working monitor
1816 |         # on previous master (see pgsql_monitor/_check_locations subs).
1817 |         # To avoid any race condition between the last monitor action on the
1818 |         # previous master and the **real** most up-to-date standby, we
1819 |         # set each standby location during the "pre-promote" action, and stored
1820 |         # them using the "lsn_location" resource attribute.
1821 |         #
1822 |         # The best standby to promote would have the highest known LSN. If the
1823 |         # current resource is not the best one, we need to modify the master
1824 |         # scores accordingly, and abort the current promotion.
1825 |         ocf_log( 'debug',
1826 |             'pgsql_promote: checking if current node is the best candidate for promotion' );
1827 | 
1828 |         # Exclude nodes that are known to be unavailable (not in the current
1829 |         # partition) using the "crm_node" command
1830 |         my @active_nodes    = split /\s+/ => _get_priv_attr( 'nodes' );
1831 |         my $node_to_promote = '';
1832 |         my $ans;
1833 |         my $max_tl;
1834 |         my $max_lsn;
1835 |         my $node_tl;
1836 |         my $node_lsn;
1837 |         my $wal_num;
1838 |         my $wal_off;
1839 | 
1840 |         # Get the "lsn_location" attribute value for the current node, as set
1841 |         # during the "pre-promote" action.
1842 |         # It should be the greatest among the secondary instances.
1843 |         $ans = _get_priv_attr( 'lsn_location' );
1844 | 
1845 |         if ( $ans eq '' ) {
1846 |             # This should not happen as the "lsn_location" attribute should have
1847 |             # been updated during the "pre-promote" action.
1848 |             ocf_exit_reason( 'Can not get current node LSN location' );
1849 |             return $OCF_ERR_GENERIC;
1850 |         }
1851 | 
1852 |         chomp $ans;
1853 |         ( $max_tl, $max_lsn ) = split /#/, $ans;
1854 | 
1855 |         ocf_log( 'debug', 'pgsql_promote: current node TL#LSN location: %s#%s',
1856 |             $max_tl, $max_lsn );
1857 | 
1858 |         # Now we compare with the other available nodes.
1859 |         foreach my $node ( @active_nodes ) {
1860 |             # We exclude the current node from the check.
1861 |             next if $node eq $nodename;
1862 | 
1863 |             # Get the "lsn_location" attribute value for the node, as set during
1864 |             # the "pre-promote" action.
1865 |             $ans = _get_priv_attr( 'lsn_location', $node );
1866 | 
1867 |             if ( $ans eq '' ) {
1868 |                 # This should not happen as the "lsn_location" attribute should
1869 |                 # have been updated during the "pre-promote" action.
1870 |                 ocf_exit_reason( 'Can not get LSN location for "%s"', $node );
1871 |                 return $OCF_ERR_GENERIC;
1872 |             }
1873 | 
1874 |             # convert location to decimal
1875 |             chomp $ans;
1876 |             ( $node_tl, $node_lsn ) = split /#/, $ans;
1877 | 
1878 |             ocf_log( 'debug',
1879 |                 'pgsql_promote: comparing with "%s": TL#LSN is %s#%s',
1880 |                 $node, $node_tl, $node_lsn );
1881 | 
1882 |             # If the node has a higher LSN, select it as a best candidate to
1883 |             # promotion and keep looping to check the TL/LSN of other nodes.
1884 |             if ( $node_tl > $max_tl
1885 |                 or ( $node_tl == $max_tl and $node_lsn > $max_lsn )
1886 |             ) {
1887 |                 ocf_log( 'debug',
1888 |                     'pgsql_promote: "%s" is a better candidate to promote (%s#%s > %s#%s)',
1889 |                     $node, $node_tl, $node_lsn, $max_tl, $max_lsn );
1890 |                 $node_to_promote = $node;
1891 |                 $max_tl          = $node_tl;
1892 |                 $max_lsn         = $node_lsn;
1893 |             }
1894 |         }
1895 | 
1896 |         # If any node has been selected, we adapt the master scores accordingly
1897 |         # and break the current promotion.
1898 |         if ( $node_to_promote ne '' ) {
1899 |             ocf_exit_reason(
1900 |                 '%s is the best candidate to promote, aborting current promotion',
1901 |                 $node_to_promote );
1902 | 
1903 |             # Reset current node master score.
1904 |             _set_master_score( '1' );
1905 | 
1906 |             # Set promotion candidate master score.
1907 |             _set_master_score( '1000', $node_to_promote );
1908 | 
1909 |             # We fail the promotion to trigger another promotion transition
1910 |             # with the new scores.
1911 |             return $OCF_ERR_GENERIC;
1912 |         }
1913 | 
1914 |         # Else, we will keep on promoting the current node.
1915 |     }
1916 | 
1917 |     unless (
1918 |         # Promote the instance on the current node.
1919 |         _runas( $PGCTL, '--pgdata', $pgdata, '-w', 'promote' ) == 0 )
1920 |     {
1921 |         ocf_exit_reason( 'Error during promotion command' );
1922 |         return $OCF_ERR_GENERIC;
1923 |     }
1924 | 
1925 |     # The instance promotion is asynchronous, so we need to wait for this
1926 |     # process to complete.
1927 |     while ( pgsql_monitor() != $OCF_RUNNING_MASTER ) {
1928 |         ocf_log( 'info', 'Waiting for the promote to complete' );
1929 |         sleep 1;
1930 |     }
1931 | 
1932 |     ocf_log( 'info', 'Promote complete' );
1933 | 
1934 |     return $OCF_SUCCESS;
1935 | }
1936 | 
1937 | # This action is called **before** the actual promotion when a failing master is
1938 | # considered unreclaimable, recoverable or a new master must be promoted
1939 | # (switchover or first start).
1940 | # As every "notify" action, it is executed almost simultaneously on all
1941 | # available nodes.
1942 | sub pgsql_notify_pre_promote {
1943 |     my $rc;
1944 |     my $node_tl;
1945 |     my $node_lsn;
1946 |     my %cdata;
1947 |     my %active_nodes;
1948 |     my $attr_nodes;
1949 | 
1950 |     ocf_log( 'info', 'Promoting instance on node "%s"',
1951 |         $OCF_NOTIFY_ENV{'promote'}[0]{'uname'} );
1952 | 
1953 |     # No need to do an election between slaves if this is recovery of the master
1954 |     if ( _is_master_recover( $OCF_NOTIFY_ENV{'promote'}[0]{'uname'} ) ) {
1955 |         ocf_log( 'warning', 'This is a master recovery!' );
1956 | 
1957 |         _set_priv_attr( 'recover_master', '1' )
1958 |             if $OCF_NOTIFY_ENV{'promote'}[0]{'uname'} eq $nodename;
1959 | 
1960 |         return $OCF_SUCCESS;
1961 |     }
1962 | 
1963 |     # Environment cleanup!
1964 |     _delete_priv_attr( 'lsn_location'      );
1965 |     _delete_priv_attr( 'recover_master'    );
1966 |     _delete_priv_attr( 'nodes'             );
1967 |     _delete_priv_attr( 'cancel_switchover' );
1968 | 
1969 |     # check for the last received entry of WAL from the master if we are
1970 |     # the designated slave to promote
1971 |     if ( _is_switchover( $nodename ) and scalar
1972 |          grep { $_->{'uname'} eq $nodename } @{ $OCF_NOTIFY_ENV{'promote'} }
1973 |     ) {
1974 |         $rc = _check_switchover();
1975 | 
1976 |         unless ( $rc == 0 ) {
1977 |             # Shortcut the election process as the switchover will be
1978 |             # canceled
1979 |             _set_priv_attr( 'cancel_switchover', $rc );
1980 |             return $OCF_SUCCESS; # return code is ignored during notify
1981 |         }
1982 | 
1983 |         # If the sub keeps going, that means the switchover is safe.
1984 |         # Keep going with the election process in case the switchover was
1985 |         # instruct to the wrong node.
1986 |         # FIXME: should we allow a switchover to a lagging slave?
1987 |     }
1988 | 
1989 |     # We need to trigger an election between existing slaves to promote the best
1990 |     # one based on its current LSN location. Each node set a private attribute
1991 |     # "lsn_location" with its TL and LSN location.
1992 |     #
1993 |     # During the following promote action, The designated standby for
1994 |     # promotion use these attributes to check if the instance to be promoted
1995 |     # is the best one, so we can avoid a race condition between the last
1996 |     # successful monitor on the previous master and the current promotion.
1997 | 
1998 |     # As we can not break the transition from a notification action, we check
1999 |     # during the promotion if each node TL and LSN are valid.
2000 | 
2001 |     # Force a checpoint to make sure the controldata shows the very last TL
2002 |     _query( q{ CHECKPOINT }, {} );
2003 |     %cdata    = _get_controldata();
2004 |     $node_lsn = _get_last_received_lsn( 'in decimal' );
2005 | 
2006 |     unless ( defined $node_lsn ) {
2007 |         ocf_log( 'warning', 'Unknown current node LSN' );
2008 |         # Return code are ignored during notifications...
2009 |         return $OCF_SUCCESS;
2010 |     }
2011 | 
2012 |     $node_lsn = "$cdata{'tl'}#$node_lsn";
2013 | 
2014 |     ocf_log( 'info', 'Current node TL#LSN: %s', $node_lsn );
2015 | 
2016 |     # Set the "lsn_location" attribute value for this node so we can use it
2017 |     # during the following "promote" action.
2018 |     _set_priv_attr( 'lsn_location', $node_lsn );
2019 | 
2020 |     ocf_log( 'warning', 'Could not set the current node LSN' )
2021 |         if $? != 0 ;
2022 | 
2023 |     # If this node is the future master, keep track of the slaves that
2024 |     # received the same notification to compare our LSN with them during
2025 |     # promotion
2026 |     if ( $OCF_NOTIFY_ENV{'promote'}[0]{'uname'} eq $nodename ) {
2027 |         # Build the list of active nodes:
2028 |         #   master + slave + start - stop
2029 |         # FIXME: Deal with rsc started during the same transaction but **after**
2030 |         #        the promotion ?
2031 |         $active_nodes{ $_->{'uname'} }++ foreach @{ $OCF_NOTIFY_ENV{'active'} },
2032 |                                                  @{ $OCF_NOTIFY_ENV{'start'} };
2033 |         $active_nodes{ $_->{'uname'} }-- foreach @{ $OCF_NOTIFY_ENV{'stop'} };
2034 | 
2035 |         $attr_nodes = join " "
2036 |             => grep { $active_nodes{$_} > 0 } keys %active_nodes;
2037 | 
2038 |         _set_priv_attr( 'nodes', $attr_nodes );
2039 |     }
2040 | 
2041 |     return $OCF_SUCCESS;
2042 | }
2043 | 
2044 | # This action is called after a promote action.
2045 | sub pgsql_notify_post_promote {
2046 | 
2047 |     # We have a new master (or the previous one recovered).
2048 |     # Environment cleanup!
2049 |     _delete_priv_attr( 'lsn_location'      );
2050 |     _delete_priv_attr( 'recover_master'    );
2051 |     _delete_priv_attr( 'nodes'             );
2052 |     _delete_priv_attr( 'cancel_switchover' );
2053 | 
2054 |     return $OCF_SUCCESS;
2055 | }
2056 | 
2057 | # This is called before a demote occurs.
2058 | sub pgsql_notify_pre_demote {
2059 |     my $rc;
2060 |     my %cdata;
2061 | 
2062 |     # do nothing if the local node will not be demoted
2063 |     return $OCF_SUCCESS unless scalar
2064 |         grep { $_->{'uname'} eq $nodename } @{ $OCF_NOTIFY_ENV{'demote'} };
2065 | 
2066 |     $rc = pgsql_monitor();
2067 | 
2068 |     # do nothing if this is not a master recovery
2069 |     return $OCF_SUCCESS unless _is_master_recover( $nodename )
2070 |                            and $rc == $OCF_FAILED_MASTER;
2071 | 
2072 |     # in case of master crash, we need to detect if the CRM tries to recover
2073 |     # the master clone. The usual transition is to do:
2074 |     #   demote->stop->start->promote
2075 |     #
2076 |     # There are multiple flaws with this transition:
2077 |     #  * the 1st and 2nd actions will fail because the instance is in
2078 |     #    OCF_FAILED_MASTER step
2079 |     #  * the usual start action is dangerous as the instance will start with
2080 |     #    a recovery.conf instead of entering a normal recovery process
2081 |     #
2082 |     # To avoid this, we try to start the instance in recovery from here.
2083 |     # If it success, at least it will be demoted correctly with a normal
2084 |     # status. If it fails, it will be catched up in next steps.
2085 | 
2086 |     ocf_log( 'info', 'Trying to start failing master "%s"...',
2087 |         $OCF_RESOURCE_INSTANCE );
2088 | 
2089 |     # Either the instance managed to start or it couldn't.
2090 |     # We rely on the pg_ctk '-w' switch to take care of this. If it couldn't
2091 |     # start, this error will be catched up later during the various checks
2092 |     _pg_ctl_start();
2093 | 
2094 |     %cdata = _get_controldata();
2095 | 
2096 |     ocf_log( 'info', 'State is "%s" after recovery attempt', $cdata{'state'} );
2097 | 
2098 |     return $OCF_SUCCESS;
2099 | }
2100 | 
2101 | # This is called before a stop occurs.
2102 | sub pgsql_notify_pre_stop {
2103 |     my $rc;
2104 |     my %cdata;
2105 | 
2106 |     # do nothing if the local node will not be stopped
2107 |     return $OCF_SUCCESS unless scalar
2108 |         grep { $_->{'uname'} eq $nodename } @{ $OCF_NOTIFY_ENV{'stop'} };
2109 | 
2110 |     $rc = _controldata_to_ocf();
2111 | 
2112 |     # do nothing if this is not a slave recovery
2113 |     return $OCF_SUCCESS unless _is_slave_recover( $nodename )
2114 |                            and $rc == $OCF_RUNNING_SLAVE;
2115 | 
2116 |     # in case of slave crash, we need to detect if the CRM tries to recover
2117 |     # the slaveclone. The usual transition is to do: stop->start
2118 |     #
2119 |     # This transition can no twork because the instance is in
2120 |     # OCF_ERR_GENERIC step. So the stop action will fail, leading most
2121 |     # probably to fencing action.
2122 |     #
2123 |     # To avoid this, we try to start the instance in recovery from here.
2124 |     # If it success, at least it will be stopped correctly with a normal
2125 |     # status. If it fails, it will be catched up in next steps.
2126 | 
2127 |     ocf_log( 'info', 'Trying to start failing slave "%s"...',
2128 |         $OCF_RESOURCE_INSTANCE );
2129 | 
2130 |     # Either the instance managed to start or it couldn't.
2131 |     # We rely on the pg_ctk '-w' switch to take care of this. If it couldn't
2132 |     # start, this error will be catched up later during the various checks
2133 |     _pg_ctl_start();
2134 | 
2135 |     %cdata = _get_controldata();
2136 | 
2137 |     ocf_log( 'info', 'State is "%s" after recovery attempt', $cdata{'state'} );
2138 | 
2139 |     return $OCF_SUCCESS;
2140 | }
2141 | 
2142 | # Notify type actions, called on all available nodes before (pre) and after
2143 | # (post) other actions, like promote, start, ...
2144 | #
2145 | sub pgsql_notify {
2146 |     my $type_op;
2147 | 
2148 |     ocf_log( 'debug', "pgsql_notify: environment variables: %s",
2149 |         Data::Dumper->new( [ \%OCF_NOTIFY_ENV ] )->Sortkeys(1)->Terse(1)->Dump );
2150 | 
2151 |     return unless %OCF_NOTIFY_ENV;
2152 | 
2153 |     $type_op = "$OCF_NOTIFY_ENV{'type'}-$OCF_NOTIFY_ENV{'operation'}";
2154 | 
2155 |     for ( $type_op ) {
2156 |         if    ( /^pre-promote$/  ) { return pgsql_notify_pre_promote()  }
2157 |         elsif ( /^post-promote$/ ) { return pgsql_notify_post_promote() }
2158 |         elsif ( /^pre-demote$/   ) { return pgsql_notify_pre_demote()   }
2159 |         elsif ( /^pre-stop$/     ) { return pgsql_notify_pre_stop()     }
2160 |     }
2161 | 
2162 |     return $OCF_SUCCESS;
2163 | }
2164 | 
2165 | # Action used to allow for online modification of resource parameters value.
2166 | #
2167 | sub pgsql_reload {
2168 | 
2169 |     # No action necessary, the action declaration is enough to inform pacemaker
2170 |     # that the modification of any non-unique parameter can be applied without
2171 |     # having to restart the resource.
2172 |     ocf_log( 'info', 'Instance "%s" reloaded', $OCF_RESOURCE_INSTANCE );
2173 |     return $OCF_SUCCESS;
2174 | 
2175 | }
2176 | 
2177 | ############################################################
2178 | #### MAIN
2179 | 
2180 | # Avoid "could not change directory" when executing commands as "system-user".
2181 | chdir File::Spec->tmpdir();
2182 | 
2183 | # Set current node name.
2184 | $nodename = ocf_local_nodename();
2185 | 
2186 | if ( $OCF_ACTION =~ /^(?:start|stop|reload|monitor|promote|demote|notify)$/ ) {
2187 |     pgsql_validate_all();
2188 |     # No need to validate for meta-data, methods or validate-all.
2189 | }
2190 | 
2191 | # Run action
2192 | for ( $OCF_ACTION ) {
2193 |     if    ( /^start$/        ) { $exit_code = pgsql_start()        }
2194 |     elsif ( /^stop$/         ) { $exit_code = pgsql_stop()         }
2195 |     elsif ( /^monitor$/      ) { $exit_code = pgsql_monitor()      }
2196 |     elsif ( /^promote$/      ) { $exit_code = pgsql_promote()      }
2197 |     elsif ( /^demote$/       ) { $exit_code = pgsql_demote()       }
2198 |     elsif ( /^notify$/       ) { $exit_code = pgsql_notify()       }
2199 |     elsif ( /^reload$/       ) { $exit_code = pgsql_reload()       }
2200 |     elsif ( /^validate-all$/ ) { $exit_code = pgsql_validate_all() }
2201 |     elsif ( /^meta-data$/    ) { ocf_meta_data() }
2202 |     elsif ( /^methods$/      ) { ocf_methods()   }
2203 |     else  { $exit_code = $OCF_ERR_UNIMPLEMENTED }
2204 | }
2205 | 
2206 | exit $exit_code;
2207 | 
2208 | 
2209 | =head1 EXAMPLE CRM SHELL
2210 | 
2211 | The following is an example configuration for a pgsqlms resource using the
2212 | crm(8) shell:
2213 | 
2214 |   primitive pgsqld pgsqlms                                                 \
2215 |     params pgdata="/var/lib/postgresql/9.6/main"                           \
2216 |       bindir="/usr/lib/postgresql/9.6/bin"                                 \
2217 |       pghost="/var/run/postgresql"                                         \
2218 |       recovery_template="/etc/postgresql/9.6/main/recovery.conf.pcmk"      \
2219 |       start_opts="-c config_file=/etc/postgresql/9.6/main/postgresql.conf" \
2220 |     op start timeout=60s                                                   \
2221 |     op stop timeout=60s                                                    \
2222 |     op promote timeout=30s                                                 \
2223 |     op demote timeout=120s                                                 \
2224 |     op monitor interval=15s timeout=10s role="Master"                      \
2225 |     op monitor interval=16s timeout=10s role="Slave"                       \
2226 |     op notify timeout=60s
2227 | 
2228 |   ms pgsql-ha pgsqld meta notify=true
2229 | 
2230 | 
2231 | =head1 EXAMPLE PCS
2232 | 
2233 | The following is an example configuration for a pgsqlms resource using pcs(8):
2234 | 
2235 |   pcs resource create pgsqld ocf:heartbeat:pgsqlms            \
2236 |     bindir=/usr/pgsql-9.6/bin pgdata=/var/lib/pgsql/9.6/data  \
2237 |     op start timeout=60s                                      \
2238 |     op stop timeout=60s                                       \
2239 |     op promote timeout=30s                                    \
2240 |     op demote timeout=120s                                    \
2241 |     op monitor interval=15s timeout=10s role="Master"         \
2242 |     op monitor interval=16s timeout=10s role="Slave"          \
2243 |     op notify timeout=60s --master notify=true
2244 | 
2245 | =head1 SEE ALSO
2246 | 
2247 | http://clusterlabs.org/
2248 | 
2249 | =head1 AUTHOR
2250 | 
2251 | Jehan-Guillaume de Rorthais and Mael Rimbault.
2252 | 
2253 | =cut
2254 | 


--------------------------------------------------------------------------------
/files/resource-agents-paf-1.1.0-1.noarch.rpm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YanChii/ansible-role-postgres-ha/20361e1d8183858a6a8656903250a465937c98db/files/resource-agents-paf-1.1.0-1.noarch.rpm


--------------------------------------------------------------------------------
/files/resource-agents-paf-2.2.0-1.noarch.rpm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YanChii/ansible-role-postgres-ha/20361e1d8183858a6a8656903250a465937c98db/files/resource-agents-paf-2.2.0-1.noarch.rpm


--------------------------------------------------------------------------------
/files/resource-agents-paf-2.2.1-1.noarch.rpm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YanChii/ansible-role-postgres-ha/20361e1d8183858a6a8656903250a465937c98db/files/resource-agents-paf-2.2.1-1.noarch.rpm


--------------------------------------------------------------------------------
/handlers/main.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | # vim: set filetype=yaml expandtab tabstop=2 shiftwidth=2 softtabstop=2 background=dark :
 3 | # handlers file for postgresql-ha
 4 | 
 5 | - name: restart postgresql
 6 |   service: name="{{ postgres_ha_pg_systemd_svcname }}" state=restarted
 7 | 
 8 | - name: reload postgresql
 9 |   shell: "{{ postgres_ha_pg_bindir }}/pg_ctl -D {{ postgres_ha_pg_data }} reload"
10 |   become: yes
11 |   become_user: postgres
12 |   vars:
13 |     ansible_ssh_pipelining: no
14 | 
15 | - name: reload corosync
16 |   shell: pcs cluster reload corosync
17 | 
18 | - name: restart corosync
19 |   service: name=corosync state=restarted
20 | 
21 | - name: reload systemd
22 |   shell: /bin/systemctl daemon-reload
23 | 
24 | 


--------------------------------------------------------------------------------
/library/pcs_property:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | # -*- coding: utf-8 -*-
 3 | 
 4 | DOCUMENTATION = '''
 5 | ---
 6 | module: pcs_property
 7 | short_description: Manages I(pacemaker) cluster properties with pcs tool.
 8 | options:
 9 |   state:
10 |     required: false
11 |     default: present
12 |     choices: [ "absent", "present" ]
13 |   name:
14 |     required: true
15 |     description: name of the property.
16 |   value:
17 |     required: true
18 |     description: value of the property.
19 | '''
20 | 
21 | def main():
22 |     module = AnsibleModule(
23 |         argument_spec = dict(
24 |             state     = dict(default='present', choices=['present', 'absent']),
25 |             name      = dict(required=True),
26 |             value     = dict(required=True),
27 |         ),
28 |         supports_check_mode=True,
29 |     )
30 | 
31 |     # TODO check pcs command is available.
32 |     # TODO check pacemaker/corosync is running.
33 | 
34 |     # Get current property value.
35 |     cmd = "pcs property list %(name)s | awk '/^ / { print $2}'"  % module.params
36 |     rc, out, err = module.run_command(cmd, use_unsafe_shell=True)
37 |     value = out.strip()
38 | 
39 |     if module.params['state'] == 'absent':
40 |         print "absent?=?"
41 |         if value != '':
42 |             changed = True
43 |             if not module.check_mode:
44 |                 cmd = 'pcs property unset %(name)s' % module.params
45 |                 module.run_command(cmd)
46 |         else:
47 |             changed = False
48 |         module.exit_json(changed=changed)
49 |     else:
50 |         print "VALUES: %s - %s" % (value, module.params['value'])
51 |         if value != module.params['value']:
52 |             changed = True
53 |             if not module.check_mode:
54 |                 cmd = 'pcs property set %(name)s=%(value)s' % module.params
55 |                 module.run_command(cmd)
56 |         else:
57 |             changed = False
58 |         module.exit_json(changed=changed, prev="|%s|" % value,  msg="%(name)s=%(value)s" % module.params)
59 | 
60 | # import module snippets
61 | from ansible.module_utils.basic import *
62 | main()
63 | 
64 | 


--------------------------------------------------------------------------------
/library/pcs_resource:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | # -*- coding: utf-8 -*-
  3 | 
  4 | DOCUMENTATION = '''
  5 | ---
  6 | module: pcs_resource
  7 | short_description: Manages I(pacemaker) cluster resources with pcs tool.
  8 | options:
  9 |   command:
 10 |     required: true
 11 |     description: Supported commands.
 12 |     choices: [ "create", "master"]
 13 |   resource_id:
 14 |     required: true
 15 |     description: Id of the resource.
 16 |   type:
 17 |     required: true
 18 |     description: type of resource. Used in «create» command.
 19 |   ms_name:
 20 |     required: true
 21 |     description: name of the resource. Used in «master» command.
 22 |   group:
 23 |     required: false
 24 |     description: add the resource to specified group.
 25 |   options:
 26 |     required: false
 27 |     description: hash of resource options.
 28 |   operations:
 29 |     required: false
 30 |     description: list of hashes of operations. Used in «create» command.
 31 |   disabled:
 32 |     required: false
 33 |     type: bool
 34 |     description: don't start resource after creation.
 35 | '''
 36 | 
 37 | def main():
 38 |     module = AnsibleModule(
 39 |         argument_spec   = dict(
 40 |             command    = dict(choices=['create', 'master']),
 41 |             name       = dict(required=True, aliases=['resource_id']),
 42 |             ms_name    = dict(required=False, type='str'),
 43 |             type       = dict(required=False),
 44 |             group      = dict(required=False, type='str'),
 45 |             options    = dict(required=False, type='dict'),
 46 |             operations = dict(required=False, type='raw'),
 47 |             disabled   = dict(required=False, type='bool'),
 48 |         ),
 49 |         supports_check_mode=True,
 50 |     )
 51 | 
 52 |     # TODO check pcs command is available.
 53 |     # TODO check pacemaker/corosync is running.
 54 | 
 55 |     # Check if resource already exists.
 56 |     cmd = "pcs resource show %(name)s"  % module.params
 57 |     rc, out, err = module.run_command(cmd)
 58 |     exists = (rc is 0)
 59 | 
 60 |     if exists:
 61 |         module.exit_json(changed=False, msg="Resource already exists.")
 62 |     elif module.check_mode:
 63 |         module.exit_json(changed=True)
 64 | 
 65 |     # Validate and process command specific params.
 66 |     if module.params['command'] == 'create':
 67 |         if not module.params.has_key('type'):
 68 |             module.fail_json(msg="missing required arguments: type.")
 69 |         if not module.params.has_key('options'):
 70 |             module.fail_json(msg="missing required arguments: options.")
 71 |         # Command template.
 72 |         cmd = 'pcs resource %(command)s %(resource_id)s %(type)s %(options)s'
 73 |         # Process operations.
 74 |         if module.params.has_key('operations'):
 75 |             cmd += ' %(operations)s'
 76 |             operations = []
 77 |             for op in module.params['operations']:
 78 |                 op['options'] = ' '.join(['%s="%s"' % (key, value) for (key, value) in op['options'].items()])
 79 |                 operations.append('op %(action)s %(options)s' % op)
 80 |             module.params['operations'] = ' '.join(operations)
 81 | 
 82 |     elif module.params['command'] == 'master':
 83 |         if not module.params.has_key('options'):
 84 |             module.fail_json(msg="missing required arguments: options.")
 85 |         if not module.params.has_key('ms_name'):
 86 |             module.fail_json(msg="missing required arguments: ms_name.")
 87 |         # Command template.
 88 |         cmd = 'pcs resource %(command)s %(name)s %(ms_name)s %(options)s'
 89 | 
 90 |     # Process options.
 91 |     if module.params.has_key('options'):
 92 |         options = module.params['options']
 93 |         if options:
 94 |             options = ' '.join(['%s="%s"' % (key, value) for (key, value) in options.items()])
 95 |             module.params['options'] = options
 96 | 
 97 |     if module.params.has_key('group'):
 98 |         if module.params['group']:
 99 |             cmd += ' --group ' + module.params['group']
100 | 
101 |     if module.params.has_key('disabled'):
102 |         if module.params['disabled']:
103 |             cmd += ' --disabled'
104 | 
105 |     # Run command.
106 |     cmd = cmd % module.params
107 |     message = 'Running cmd: %s' % cmd
108 |     rc, out, err = module.run_command(cmd)
109 |     if rc is 1:
110 |         module.fail_json(msg="Execution failed.\nCommand: `%s`\nError: %s" % (cmd, err))
111 | 
112 |     module.exit_json(changed=True, msg=message)
113 | 
114 | # import module snippets
115 | from ansible.module_utils.basic import *
116 | main()
117 | 
118 | 


--------------------------------------------------------------------------------
/meta/main.yml:
--------------------------------------------------------------------------------
  1 | galaxy_info:
  2 |   author: YanChii
  3 |   description: 
  4 |   company: Danube Cloud
  5 |   
  6 |   # If the issue tracker for your role is not on github, uncomment the
  7 |   # next line and provide a value
  8 |   # issue_tracker_url: http://example.com/issue/tracker
  9 |   
 10 |   license: Apache
 11 |   
 12 |   min_ansible_version: 2.0
 13 | 
 14 |   # Optionally specify the branch Galaxy will use when accessing the GitHub
 15 |   # repo for this role. During role install, if no tags are available,
 16 |   # Galaxy will use this branch. During import Galaxy will access files on
 17 |   # this branch. If travis integration is cofigured, only notification for this
 18 |   # branch will be accepted. Otherwise, in all cases, the repo's default branch
 19 |   # (usually master) will be used.
 20 |   #github_branch:
 21 |   
 22 |   #
 23 |   # Below are all platforms currently available. Just uncomment
 24 |   # the ones that apply to your role. If you don't see your
 25 |   # platform on this list, let us know and we'll get it added!
 26 |   #
 27 |   platforms:
 28 |   #- name: OpenBSD
 29 |   #  versions:
 30 |   #  - all
 31 |   #  - 5.6
 32 |   #  - 5.7
 33 |   #  - 5.8
 34 |   #  - 5.9
 35 |   #  - 6.0
 36 |   #- name: Fedora
 37 |   #  versions:
 38 |   #  - all
 39 |   #  - 16
 40 |   #  - 17
 41 |   #  - 18
 42 |   #  - 19
 43 |   #  - 20
 44 |   #  - 21
 45 |   #  - 22
 46 |   #  - 23
 47 |   #  - 24
 48 |   #  - 25
 49 |   #- name: DellOS
 50 |   #  versions:
 51 |   #  - all
 52 |   #  - 10
 53 |   #  - 6
 54 |   #  - 9
 55 |   #- name: MacOSX
 56 |   #  versions:
 57 |   #  - all
 58 |   #  - 10.10
 59 |   #  - 10.11
 60 |   #  - 10.12
 61 |   #  - 10.7
 62 |   #  - 10.8
 63 |   #  - 10.9
 64 |   #- name: Synology
 65 |   #  versions:
 66 |   #  - all
 67 |   #  - any
 68 |   #- name: Junos
 69 |   #  versions:
 70 |   #  - all
 71 |   #  - any
 72 |   #- name: GenericBSD
 73 |   #  versions:
 74 |   #  - all
 75 |   #  - any
 76 |   #- name: Void Linux
 77 |   #  versions:
 78 |   #  - all
 79 |   #  - any
 80 |   #- name: GenericLinux
 81 |   #  versions:
 82 |   #  - all
 83 |   #  - any
 84 |   #- name: NXOS
 85 |   #  versions:
 86 |   #  - all
 87 |   #  - any
 88 |   #- name: IOS
 89 |   #  versions:
 90 |   #  - all
 91 |   #  - any
 92 |   #- name: Amazon
 93 |   #  versions:
 94 |   #  - all
 95 |   #  - 2013.03
 96 |   #  - 2013.09
 97 |   #  - 2016.03
 98 |   #  - 2016.09
 99 |   #- name: ArchLinux
100 |   #  versions:
101 |   #  - all
102 |   #  - any
103 |   #- name: FreeBSD
104 |   #  versions:
105 |   #  - all
106 |   #  - 10.0
107 |   #  - 10.1
108 |   #  - 10.2
109 |   #  - 10.3
110 |   #  - 11.0
111 |   #  - 8.0
112 |   #  - 8.1
113 |   #  - 8.2
114 |   #  - 8.3
115 |   #  - 8.4
116 |   #  - 9.0
117 |   #  - 9.1
118 |   #  - 9.1
119 |   #  - 9.2
120 |   #  - 9.3
121 |   #- name: Ubuntu
122 |   #  versions:
123 |   #  - all
124 |   #  - lucid
125 |   #  - maverick
126 |   #  - natty
127 |   #  - oneiric
128 |   #  - precise
129 |   #  - quantal
130 |   #  - raring
131 |   #  - saucy
132 |   #  - trusty
133 |   #  - utopic
134 |   #  - vivid
135 |   #  - wily
136 |   #  - xenial
137 |   #  - yakkety
138 |   #- name: Debian
139 |   #  versions:
140 |   #  - all
141 |   #  - etch
142 |   #  - jessie
143 |   #  - lenny
144 |   #  - sid
145 |   #  - squeeze
146 |   #  - stretch
147 |   #  - wheezy
148 |   #- name: Alpine
149 |   #  versions:
150 |   #  - all
151 |   #  - any
152 |   - name: EL
153 |     versions:
154 |   #  - all
155 |   #  - 5
156 |   #  - 6
157 |     - 7
158 |   #- name: Windows
159 |   #  versions:
160 |   #  - all
161 |   #  - 2012R2
162 |   #- name: SmartOS
163 |   #  versions:
164 |   #  - all
165 |   #  - any
166 |   #- name: opensuse
167 |   #  versions:
168 |   #  - all
169 |   #  - 12.1
170 |   #  - 12.2
171 |   #  - 12.3
172 |   #  - 13.1
173 |   #  - 13.2
174 |   #- name: SLES
175 |   #  versions:
176 |   #  - all
177 |   #  - 10SP3
178 |   #  - 10SP4
179 |   #  - 11
180 |   #  - 11SP1
181 |   #  - 11SP2
182 |   #  - 11SP3
183 |   #  - 11SP4
184 |   #  - 12
185 |   #  - 12SP1
186 |   #- name: GenericUNIX
187 |   #  versions:
188 |   #  - all
189 |   #  - any
190 |   #- name: Solaris
191 |   #  versions:
192 |   #  - all
193 |   #  - 10
194 |   #  - 11.0
195 |   #  - 11.1
196 |   #  - 11.2
197 |   #  - 11.3
198 |   #- name: eos
199 |   #  versions:
200 |   #  - all
201 |   #  - Any
202 |   
203 |   galaxy_tags:
204 |       - database
205 |       - clustering
206 |       - postgresql
207 | 
208 | dependencies: []
209 |   # List your role dependencies here, one per line.
210 |   # Be sure to remove the '[]' above if you add dependencies
211 |   # to this list.
212 | 


--------------------------------------------------------------------------------
/tasks/constraints.yml:
--------------------------------------------------------------------------------
 1 | # vim: set filetype=yaml expandtab tabstop=2 shiftwidth=2 softtabstop=2 background=dark :
 2 | 
 3 | # location constraints
 4 | - name: setting VIP location constraints
 5 |   shell: pcs constraint location "{{ postgres_ha_cluster_vip_res_name }}" prefers {% for clhost in ansible_play_batch %}{{clhost}}=100 {% endfor %}
 6 |   when: inventory_hostname == postgres_ha_cluster_master_host    # run only on one node
 7 | 
 8 | - name: setting DB location constraints
 9 |   shell: pcs constraint location "{{ postgres_ha_cluster_pg_HA_res_name }}" prefers {% for clhost in ansible_play_batch %}{{clhost}}=100 {% endfor %}
10 |   when: inventory_hostname == postgres_ha_cluster_master_host    # run only on one node
11 | 
12 | 
13 | # colocation constraints
14 | - name: setting resources colocation group 1
15 |   shell: pcs constraint colocation add "{{ postgres_ha_cluster_vip_res_name }}" with master "{{ postgres_ha_cluster_pg_HA_res_name }}" INFINITY
16 |   when: inventory_hostname == postgres_ha_cluster_master_host    # run only on one node
17 | 
18 | # start order constraints
19 | - name: setting resources start order
20 |   shell: pcs constraint order promote "{{ postgres_ha_cluster_pg_HA_res_name }}" then start "{{ postgres_ha_cluster_vip_res_name }}" symmetrical=false
21 |   when: inventory_hostname == postgres_ha_cluster_master_host    # run only on one node
22 | 
23 | - name: setting resources stop order
24 |   shell: pcs constraint order demote  "{{ postgres_ha_cluster_pg_HA_res_name }}" then stop  "{{ postgres_ha_cluster_vip_res_name }}" symmetrical=false
25 |   when: inventory_hostname == postgres_ha_cluster_master_host    # run only on one node
26 | 
27 | - name: marking constraints as processed
28 |   shell: echo "LOCKFILE. Do not delete!" > "/var/lib/pgsql/{{ postgres_ha_pg_version }}/.{{postgres_ha_cluster_name}}_constraints_processed"
29 |   args:
30 |     creates: "/var/lib/pgsql/{{ postgres_ha_pg_version }}/.{{postgres_ha_cluster_name}}_constraints_processed"
31 | 


--------------------------------------------------------------------------------
/tasks/finalize.yml:
--------------------------------------------------------------------------------
 1 | # vim: set filetype=yaml expandtab tabstop=2 shiftwidth=2 softtabstop=2 background=dark :
 2 | 
 3 | - name: enable database cluster resource
 4 |   shell: pcs resource enable "{{ postgres_ha_cluster_pg_HA_res_name }}" 
 5 |   when: inventory_hostname == postgres_ha_cluster_master_host    # run only on one node
 6 | 
 7 | - name: refresh database cluster resource
 8 |   shell: pcs resource manage "{{ postgres_ha_cluster_pg_HA_res_name }}" && pcs resource clear "{{ postgres_ha_cluster_pg_HA_res_name }}" && pcs resource refresh "{{ postgres_ha_cluster_pg_HA_res_name }}"
 9 |   when: inventory_hostname == postgres_ha_cluster_master_host    # run only on one node
10 | 
11 | - name: check if all slaves are connected
12 |   shell: psql -Aqtc "SELECT count(*) FROM pg_stat_replication"
13 |   when: inventory_hostname == postgres_ha_cluster_master_host    # run only on one node
14 |   become: yes
15 |   become_user: postgres
16 |   register: slavecount
17 |   vars:
18 |     ansible_ssh_pipelining: no
19 |   until: (slavecount.stdout|string) == ((ansible_play_batch|length - 1)|string)
20 |   retries: 16
21 |   delay: 2
22 | 
23 | 


--------------------------------------------------------------------------------
/tasks/main.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | # vim: set filetype=yaml expandtab tabstop=2 shiftwidth=2 softtabstop=2 background=dark :
 3 | 
 4 | - include: pre-tasks.yml
 5 | - include: pcs.yml
 6 | - include: vip.yml
 7 | - include: postgresql_sync.yml
 8 | - include: paf.yml
 9 | 
10 | - name: test constraints presence
11 |   stat: path="/var/lib/pgsql/{{ postgres_ha_pg_version }}/.{{postgres_ha_cluster_name}}_constraints_processed"
12 |   register: constraints_processed
13 | 
14 | - include: constraints.yml
15 |   when: not constraints_processed.stat.exists
16 | 
17 | - include: finalize.yml
18 | 
19 | - include: maintenance.yml
20 | 


--------------------------------------------------------------------------------
/tasks/maintenance.yml:
--------------------------------------------------------------------------------
 1 | # vim: set filetype=yaml expandtab tabstop=2 shiftwidth=2 softtabstop=2 background=dark :
 2 | 
 3 | - name: create cluster maintenance directory
 4 |   file:
 5 |     path: "{{ postgres_ha_maint_scripts_path }}"
 6 |     state: directory
 7 |     owner: postgres
 8 |     group: postgres
 9 |     mode: 0755
10 | 
11 | - name: create servers IP list
12 |   set_fact:
13 |     all_ips: ""
14 | 
15 | - name: append servers IP list
16 |   set_fact: 
17 |     all_ips: "{% if all_ips != '' %}{{ all_ips }} {% endif %}{{ hostvars[item].ansible_default_ipv4.address }}"
18 |   with_items: "{{ ansible_play_batch }}"
19 | 
20 | - name: generate cluster maintenance scripts
21 |   template: src="{{ item }}" dest="{{ postgres_ha_maint_scripts_path }}/{{ item | basename | regex_replace('\.j2$', '') }}"
22 |   args:
23 |     owner: postgres
24 |     group: postgres
25 |     mode:  0750
26 |   with_fileglob: "{{ role_path }}/templates/maint/*"
27 | 


--------------------------------------------------------------------------------
/tasks/paf.yml:
--------------------------------------------------------------------------------
  1 | # vim: set filetype=yaml expandtab tabstop=2 shiftwidth=2 softtabstop=2 background=dark :
  2 | 
  3 | - name: select proper PAF package (centos7)
  4 |   set_fact:
  5 |     paf_pkg: 'resource-agents-paf-{{ postgres_ha_paf_version }}-1.noarch.rpm'
  6 |   when: os_version == 'centos7'
  7 | 
  8 | - name: select proper PAF package (centos6)
  9 |   set_fact:
 10 |     paf_pkg: 'resource-agents-paf-1.1.0-1.noarch.rpm'
 11 |   when: os_version == 'centos6'
 12 | 
 13 | # this rpm is also available on github.. but when multiple servers start to download 
 14 | # the same file from github simultanneously, github will likely block you
 15 | # and the role will fail
 16 | # that's why it's embedded in the role
 17 | 
 18 | - name: copy PAF rpm to hosts
 19 |   copy: src="{{ paf_pkg }}" dest="/tmp/{{ paf_pkg }}"
 20 | 
 21 | - name: install PAF DB failover agent
 22 |   yum:
 23 |     name: "/tmp/{{ paf_pkg }}"
 24 |     state: present
 25 | 
 26 | - name: apply PAF v2.2.0 fix for newest pacemaker
 27 |   copy:
 28 |     src: 'pgsqlms-2.2.0-fix-pg10'
 29 |     dest: '/usr/lib/ocf/resource.d/heartbeat/pgsqlms'
 30 |   args:
 31 |     owner: root
 32 |     group: root
 33 |     mode:  0555
 34 |   when: postgres_ha_paf_version == '2.2.0' and
 35 |         not postgres_ha_paf_geo_patch
 36 | 
 37 | - name: apply geo-HA patches to DB failover agent
 38 |   copy:
 39 |     src: 'pgsqlms-{{ postgres_ha_paf_version }}-geo-patched'
 40 |     dest: '/usr/lib/ocf/resource.d/heartbeat/pgsqlms'
 41 |   args:
 42 |     owner: root
 43 |     group: root
 44 |     mode:  0555
 45 |   when: postgres_ha_paf_geo_patch
 46 | 
 47 | - name: prepare DB recovery config
 48 |   template: src=recovery.conf.pcmk.j2 dest="{{ postgres_ha_pg_data }}/../recovery.conf.{{postgres_ha_cluster_name}}.pcmk"
 49 |   args:
 50 |     owner: postgres
 51 |     group: postgres
 52 |     mode:  0644
 53 | 
 54 | - name: stop database for clustering
 55 |   service: name="{{ postgres_ha_pg_systemd_svcname }}" state=stopped enabled=false
 56 | 
 57 | - name: create database cluster resource
 58 |   when: inventory_hostname == postgres_ha_cluster_master_host    # run only on one node
 59 |   pcs_resource: command=create resource_id="{{ postgres_ha_cluster_pg_res_name }}" type=ocf:heartbeat:pgsqlms
 60 |   args:
 61 |     disabled: True
 62 |     options:
 63 |       bindir: "{{ postgres_ha_pg_bindir }}"
 64 |       pgdata: "{{ postgres_ha_pg_data }}"
 65 |       pgport: "{{ postgres_ha_pg_port }}"
 66 |       recovery_template: "{{ postgres_ha_pg_data }}/../recovery.conf.{{postgres_ha_cluster_name}}.pcmk"
 67 |     operations:
 68 |       - action: start
 69 |         options:
 70 |           timeout: 60s
 71 |       - action: stop
 72 |         options:
 73 |           timeout: 60s
 74 |       - action: promote
 75 |         options:
 76 |           timeout: 30s
 77 |       - action: demote
 78 |         options:
 79 |           timeout: 120s
 80 |       - action: notify
 81 |         options:
 82 |           timeout: 60s
 83 |       - action: monitor
 84 |         options:
 85 |           interval: "{{ postgres_ha_monitor_interval_pgmaster }}"
 86 |           timeout: 10s
 87 |           role: Master
 88 |       - action: monitor
 89 |         options:
 90 |           interval: "{{ postgres_ha_monitor_interval_pgslave }}"
 91 |           timeout: 10s
 92 |           role: Slave
 93 | 
 94 | - name: create master DB resource
 95 |   when: inventory_hostname == postgres_ha_cluster_master_host    # run only on one node
 96 |   pcs_resource: command=master resource_id="{{ postgres_ha_cluster_pg_HA_res_name }}" ms_name="{{ postgres_ha_cluster_pg_res_name }}" disabled=True
 97 |   args:
 98 |     options:
 99 |       master-max      : 1
100 |       master-node-max : 1
101 |       clone-max       : "{{ ansible_play_batch|length }}"
102 |       clone-node-max  : 1
103 |       notify          : true
104 | 
105 | 


--------------------------------------------------------------------------------
/tasks/pcs.yml:
--------------------------------------------------------------------------------
  1 | # vim: set filetype=yaml expandtab tabstop=2 shiftwidth=2 softtabstop=2 background=dark :
  2 | 
  3 | - debug: msg='cluster_members={{ansible_play_batch}}'
  4 |   run_once: true
  5 | 
  6 | - name: install cluster pkgs
  7 |   yum:
  8 |     name: pcs
  9 |     state: present
 10 | 
 11 | - name: install additional cluster pkgs for centos 6
 12 |   yum:
 13 |     name: '{{ item }}'
 14 |     state: present
 15 |   when: os_version == 'centos6'
 16 |   with_items:
 17 |     - pacemaker
 18 |     - libselinux-python
 19 | 
 20 | - name: "Build hosts file"
 21 |   lineinfile: dest=/etc/hosts regexp='.*{{ item }}$' line="{{ hostvars[item].ansible_default_ipv4.address }} {{ item }}" state=present
 22 |   when: hostvars[item].ansible_default_ipv4.address is defined
 23 |   with_items: "{{ ansible_play_batch }}"
 24 | 
 25 | # For each host add hostnames for all postgres_ha_network_rings interfaces to /etc/hosts
 26 | # example output: "10.10.90.200 myhost-ring1"
 27 | - name: add additional network rings to hosts file
 28 |   lineinfile: dest=/etc/hosts regexp='.*{{ item[0] }}-{{ item[1] }}$' line="{{ hostvars[item[0]][['ansible_', postgres_ha_network_rings[item[1]]]|join]['ipv4']['address'] }} {{ item[0] }}-{{ item[1] }}" state=present
 29 |   with_nested:
 30 |     - "{{ ansible_play_batch }}"
 31 |     - "{{ postgres_ha_network_rings }}"
 32 |   when: postgres_ha_network_rings and
 33 |         hostvars[item[0]][['ansible_', postgres_ha_network_rings[item[1]]]|join] is defined
 34 | 
 35 | # create hostname string for "pcs cluster" command that lists also all ring hostnames (if exist)
 36 | # example: "node01,node01-ring1"
 37 | # (the lowest element (e.g. "ring0") is omitted in the first name and the actual inventory_hostname is used as a default name)
 38 | - name: generate pcs hostname string
 39 |   set_fact:
 40 |     pcs_hostname: "{{ inventory_hostname }}{% if postgres_ha_network_rings %}{% for ring in postgres_ha_network_rings|difference(postgres_ha_network_rings|min)|sort %},{{ inventory_hostname }}-{{ ring }}{% endfor %}{% endif %}"
 41 | 
 42 | # output: "--addr0 net.work.ip.addr --addr1 other.net.ip.addr ..."
 43 | - name: compute mcast addr settings
 44 |   set_fact:
 45 |     pcs_ring_addrs: "{% set ring_num = 0 %}{% if postgres_ha_network_rings %}{% for ring in postgres_ha_network_rings|sort %}--addr{{ ring_num }} {{ hostvars[inventory_hostname][['ansible_', postgres_ha_network_rings[ring]]|join]['ipv4']['network'] }} {% set ring_num = ring_num + 1 %}{% endfor %}{% endif %}"
 46 |   when: postgres_ha_mcast_enable
 47 | 
 48 | - name: enable GUI if required
 49 |   lineinfile: dest=/etc/sysconfig/pcsd regexp='^PCSD_DISABLE_GUI=' line="PCSD_DISABLE_GUI={% if postgres_ha_gui_enable %}false{% else %}true{% endif %}" state=present
 50 | 
 51 | - name: service pcsd start                         
 52 |   service: name=pcsd state=started enabled=yes
 53 | 
 54 | - name: setup hacluster password
 55 |   user:
 56 |     name: hacluster
 57 |     state: present
 58 |     update_password: always
 59 |     password: "{{ postgres_ha_cluster_ha_password_hash }}"
 60 | 
 61 | - name: setup cluster auth
 62 |   shell: pcs cluster auth {{ ansible_play_batch | join( " ") }} -u hacluster -p "{{ postgres_ha_cluster_ha_password }}"
 63 | 
 64 | # We create cluster in two steps:
 65 | # 1. create one-node cluster
 66 | # 2. join other cluster nodes (the task below)
 67 | # The reason is that we want to support adding new nodes by re-running the role.
 68 | - name: create cluster (centos7)
 69 |   shell: pcs cluster setup --name {{ postgres_ha_cluster_name }} "{{ pcs_hostname }}" {% if postgres_ha_pcs_advanced_params %}{% for param in postgres_ha_pcs_advanced_params|difference(['addr0', 'addr1', 'addr2', 'addr3', 'transport']) %}--{{ param }} {{ postgres_ha_pcs_advanced_params[param] }} {% endfor %}{% endif %} {% if postgres_ha_mcast_enable %}--transport udp {{ pcs_ring_addrs }}{% endif %}
 70 |   args:
 71 |     creates: /etc/corosync/corosync.conf
 72 |   when: os_version == 'centos7' and
 73 |         inventory_hostname == postgres_ha_cluster_master_host   # run only on master node
 74 | 
 75 | # ignore these parameters from postgres_ha_pcs_advanced_params: 'addr0', 'addr1', 'addr2', 'addr3', 'transport'
 76 | - name: create cluster (centos6)
 77 |   shell: pcs cluster setup --name {{ postgres_ha_cluster_name }} {{ pcs_hostname }} {% if postgres_ha_pcs_advanced_params %}{% for param in postgres_ha_pcs_advanced_params|difference(['addr0', 'addr1', 'addr2', 'addr3', 'transport']) %}--{{ param }} {{ postgres_ha_pcs_advanced_params[param] }} {% endfor %}{% endif %} {% if postgres_ha_mcast_enable %}--transport udp {{ pcs_ring_addrs }}{% else %}--transport udpu{% endif %}
 78 |   args:
 79 |     creates: /etc/cluster/cluster.conf
 80 |   when: os_version == 'centos6' and
 81 |         inventory_hostname == postgres_ha_cluster_master_host    # run only on master node
 82 | 
 83 | - name: join cluster nodes (centos7)
 84 |   shell: /bin/sh -c "if ! grep -q 'ring0_addr[:] *{{ item }}[\t ]*$' /etc/corosync/corosync.conf; then pcs cluster node add {{ hostvars[item]['pcs_hostname'] }}; fi"
 85 |   with_items: '{{ ansible_play_batch | difference([inventory_hostname]) }}'   # all hosts except me
 86 |   when: os_version == 'centos7' and
 87 |         inventory_hostname == postgres_ha_cluster_master_host    # run only on master node
 88 | 
 89 | - name: join cluster nodes (centos6)
 90 |   shell: /bin/sh -c "if ! grep -q '<clusternode .*name=\"{{ item }}\"' /etc/cluster/cluster.conf; then pcs cluster node add {{ hostvars[item]['pcs_hostname'] }}; fi"
 91 |   with_items: '{{ ansible_play_batch | difference([inventory_hostname]) }}'   # all hosts except me
 92 |   when: os_version == 'centos6' and
 93 |         inventory_hostname == postgres_ha_cluster_master_host    # run only on master node
 94 | 
 95 | # start cluster on every node separately (can be run multiple times without failure)
 96 | - name: start cluster
 97 |   shell: pcs cluster start
 98 |   #args:
 99 |   # creates: /var/lib/pacemaker/cib/cib.xml
100 |  
101 | # restart corosync if needed
102 | - meta: flush_handlers
103 | 
104 | #- name: query cluster status
105 | #  shell: pcs cluster status
106 | #  #register: cluster_state
107 | #  #failed_when: True
108 | 
109 | - name: alter stonith settings
110 |   pcs_property: name=stonith-enabled value=false
111 |   run_once: true
112 | 
113 | - name: alter cluster policy settings
114 |   pcs_property: name=symmetric-cluster value=false
115 |   run_once: true
116 | 
117 | - name: alter cluster transition settings
118 |   pcs_property: name=crmd-transition-delay value=3s
119 |   run_once: true
120 | 
121 | - name: verify cluster configuration
122 |   shell: crm_verify -L -V
123 |   run_once: true
124 | 
125 | - name: enable cluster autostart
126 |   shell: pcs cluster enable
127 | 
128 | # reload corosync if neccessary (done automatically on the end of the tasklist)
129 | #- meta: flush_handlers
130 | 
131 | 


--------------------------------------------------------------------------------
/tasks/postgresql_sync.yml:
--------------------------------------------------------------------------------
  1 | # vim: set filetype=yaml expandtab tabstop=2 shiftwidth=2 softtabstop=2 background=dark :
  2 | 
  3 | # The name of the rpm file that imports the pg repo depends on the OS family and on the pg version.
  4 | # This variable sets the last version digit of the repo file (e.g 9.6-3).
  5 | - name: set pg repo rpm version suffix (v9.x)
  6 |   set_fact:
  7 |     pg_pkg_vers_suffix: '3'
  8 | 
  9 | - name: set pg repo rpm version suffix for (v10+)
 10 |   set_fact:
 11 |     pg_pkg_vers_suffix: '2'
 12 |   when: postgres_ha_pg_version >= 10
 13 | 
 14 | - name: determine the correct postgres package name (all systems)
 15 |   set_fact:
 16 |     pg_pkg_name: 'pgdg-centos{{ postgres_ha_pg_version | replace(".", "") }}-{{ postgres_ha_pg_version }}-{{ pg_pkg_vers_suffix }}.noarch.rpm'
 17 |   #when: ansible_distribution == 'CentOS'  # this is default
 18 | 
 19 | - name: determine the correct postgres package name (RHEL)
 20 |   set_fact:
 21 |     pg_pkg_name: 'pgdg-redhat{{ postgres_ha_pg_version | replace(".", "") }}-{{ postgres_ha_pg_version }}-{{ pg_pkg_vers_suffix }}.noarch.rpm'
 22 |   when: ansible_distribution == 'Red Hat Enterprise Linux'
 23 | 
 24 | - name: 'import pg{{ postgres_ha_pg_version | replace(".", "") }} repo'
 25 |   yum:
 26 |     name: "{{ postgres_ha_repo_url }}"
 27 |     state: installed
 28 |   when: postgres_ha_import_repo
 29 | 
 30 | - name: 'install pg{{ postgres_ha_pg_version | replace(".", "") }}'
 31 |   yum:
 32 |     name: 'postgresql{{ postgres_ha_pg_version | replace(".", "") }}-server, postgresql{{ postgres_ha_pg_version | replace(".", "") }}-contrib, python-psycopg2'
 33 |     state: installed
 34 | 
 35 | - name: init DB dir on master if necessary (centos 7 and postgresql 9.6 or older)
 36 |   shell: '{{ postgres_ha_pg_bindir }}/postgresql{{ postgres_ha_pg_version | replace(".", "") }}-setup initdb'
 37 |   args:
 38 |     creates: "{{ postgres_ha_pg_data }}/PG_VERSION"
 39 |   # run only on one node
 40 |   when: inventory_hostname == postgres_ha_cluster_master_host
 41 |     and os_version == 'centos7'
 42 |     and postgres_ha_pg_version |int < 10
 43 | 
 44 | - name: init DB dir on master if necessary (centos 7 and postgresql 10 or above)
 45 |   shell: '{{ postgres_ha_pg_bindir }}/postgresql-{{ postgres_ha_pg_version | replace(".", "") }}-setup initdb'
 46 |   args:
 47 |     creates: "{{ postgres_ha_pg_data }}/PG_VERSION"
 48 |   # run only on one node
 49 |   when: inventory_hostname == postgres_ha_cluster_master_host
 50 |     and os_version == 'centos7'
 51 |     and postgres_ha_pg_version |int >= 10
 52 | 
 53 | - name: init DB dir on master if necessary (centos 6)
 54 |   shell: '/etc/init.d/postgresql-{{ postgres_ha_pg_version }} initdb'
 55 |   args:
 56 |     creates: "{{ postgres_ha_pg_data }}/PG_VERSION"
 57 |   # run only on one node
 58 |   when: inventory_hostname == postgres_ha_cluster_master_host
 59 |     and os_version == 'centos6'
 60 | 
 61 | # if the file does not exist, DB sync is needed
 62 | - name: check if DB was synchronized before
 63 |   stat: path="{{ postgres_ha_pg_data }}/.synchronized"
 64 |   register: db_prevsync_file
 65 | 
 66 | #- name: push DB config (clustering)
 67 | #  template: src=postgresql.conf.j2 dest="{{ postgres_ha_pg_data }}/postgresql.conf"
 68 | #  when: inventory_hostname == postgres_ha_cluster_master_host    # run only on one node
 69 | #  args:
 70 | #    owner: postgres
 71 | #    group: postgres
 72 | #    mode:  0600
 73 | 
 74 | - name: alter clustering-related settings in postgresql.conf
 75 |   replace: 
 76 |     dest="{{ postgres_ha_pg_data }}/postgresql.conf"
 77 |     regexp="^([# ]*{{ item.key }} *=.*$)"
 78 |     replace="{{ item.key }} = {{ item.value }}"
 79 |   when: inventory_hostname == postgres_ha_cluster_master_host   # run only on master node
 80 |   with_dict: "{{ postgres_ha_postgresql_conf_vars }}"
 81 |   notify: restart postgresql
 82 | 
 83 | - meta: flush_handlers
 84 | 
 85 | - name: alter DB ACL in pg_hba.conf
 86 |   lineinfile: dest="{{ postgres_ha_pg_data }}/pg_hba.conf"
 87 |     regexp='^host    postgres .*{{ hostvars[item].ansible_default_ipv4.address }}/32'
 88 |     insertbefore='^host'
 89 |     line='host    postgres       {{ postgres_ha_pg_repl_user }}    {{ hostvars[item].ansible_default_ipv4.address }}/32          md5'
 90 |   with_items: "{{ ansible_play_batch }}"
 91 |   when: inventory_hostname == postgres_ha_cluster_master_host or
 92 |         db_prevsync_file.stat.exists
 93 |         # run only on master node or on synchronized slave
 94 |   register: alter_pg_acl
 95 | 
 96 | - name: alter DB replication ACL in pg_hba.conf on master
 97 |   lineinfile: dest="{{ postgres_ha_pg_data }}/pg_hba.conf"
 98 |     regexp='^host    replication .*{{ hostvars[item].ansible_default_ipv4.address }}/32'
 99 |     insertbefore='^host'
100 |     line='host    replication     {{ postgres_ha_pg_repl_user }}    {{ hostvars[item].ansible_default_ipv4.address }}/32          {% if item == inventory_hostname %}reject{% else %}md5{% endif %}'
101 |   with_items: "{{ ansible_play_batch }}"
102 |   when: inventory_hostname == postgres_ha_cluster_master_host or
103 |         db_prevsync_file.stat.exists
104 |         # run only on master node or on synchronized slave
105 |   register: alter_repl_acl
106 | 
107 | #hostname:port:database:username:password
108 | - name: setup DB cluster auth (master IP)
109 |   lineinfile: dest="~postgres/.pgpass" line="{{ postgres_ha_cluster_vip }}:{{ postgres_ha_pg_port }}:replication:postgres:{{ postgres_ha_pg_repl_pass }}" state=present
110 |   args:
111 |     owner: postgres
112 |     group: postgres
113 |     mode:  0600
114 |     create: yes
115 | 
116 | - name: setup .pgpass replication auth for master IP
117 |   lineinfile: dest="~postgres/.pgpass" line="{{ postgres_ha_cluster_vip }}:{{ postgres_ha_pg_port }}:replication:{{ postgres_ha_pg_repl_user }}:{{ postgres_ha_pg_repl_pass }}" state=present
118 |   args:
119 |     owner: postgres
120 |     group: postgres
121 |     mode:  0600
122 |     create: yes
123 | 
124 | - name: setup .pgpass replication auth for other IPs
125 |   lineinfile: dest="~postgres/.pgpass" line="{{ hostvars[item].ansible_default_ipv4.address }}:{{ postgres_ha_pg_port }}:replication:{{ postgres_ha_pg_repl_user }}:{{ postgres_ha_pg_repl_pass }}" state=present
126 |   with_items: "{{ansible_play_batch}}"
127 |   args:
128 |     owner: postgres
129 |     group: postgres
130 |     mode:  0600
131 |     create: yes
132 | 
133 | - name: check if master host "{{ postgres_ha_cluster_master_host }}" is really a DB master
134 |   shell: psql -Aqtc "SELECT pg_is_in_recovery()"
135 |   when: inventory_hostname == postgres_ha_cluster_master_host    # run only on one node
136 |   become: yes
137 |   become_user: postgres
138 |   vars:
139 |     ansible_ssh_pipelining: no
140 |   register: is_slave
141 |   failed_when: is_slave.stdout == 't'
142 | 
143 | - name: mark master DB
144 |   lineinfile: dest="{{ postgres_ha_pg_data }}/.synchronized" line="DO NOT REMOVE THIS FILE! Otherwise DB sync can go wrong." state=present
145 |   args:
146 |     owner: postgres
147 |     group: postgres
148 |     mode:  0600
149 |     create: yes
150 |   when: inventory_hostname == postgres_ha_cluster_master_host    # run only on one node
151 |   register: create_syncfile
152 | 
153 | - name: check if DB is running (failure is OK)
154 |   #shell: /bin/sh -c 'if {{ postgres_ha_pg_bindir }}/pg_ctl -D "{{ postgres_ha_pg_data }}" status &> /dev/null; then echo True; else echo False; fi'
155 |   shell: "{{ postgres_ha_pg_bindir }}/pg_ctl -D {{ postgres_ha_pg_data }} status"
156 |   register: db_running
157 |   become: yes
158 |   become_user: postgres
159 |   ignore_errors: True
160 | 
161 | # check if the DB is already clustered
162 | # If the resource constraint already exists, it means that DB clustering was fully applied before
163 | # and the database should run only from cluster resource.
164 | 
165 | - name: check if DB is running in cluster (failure is OK)
166 |   #shell: /bin/sh -c 'if pcs resource show "{{ postgres_ha_cluster_pg_HA_res_name }}" &> /dev/null; then echo True; else echo False; fi'
167 |   #shell: pcs resource show "{{ postgres_ha_cluster_pg_HA_res_name }}"
168 |   shell: pcs constraint location show resources "{{ postgres_ha_cluster_pg_HA_res_name }}" | grep -q Enabled
169 |   register: db_resource_exists
170 |   ignore_errors: True
171 | 
172 | - name: reload DB ACLs
173 |   shell: "{{ postgres_ha_pg_bindir }}/pg_ctl -D {{ postgres_ha_pg_data }} reload"
174 |   become: yes
175 |   become_user: postgres
176 |   vars:
177 |     ansible_ssh_pipelining: no
178 |   when: (alter_pg_acl.changed or alter_repl_acl.changed) and db_running is succeeded
179 | 
180 | #- name: start master DB if necessary (without cluster)
181 | #  service: name="{{ postgres_ha_pg_systemd_svcname }}" state=started enabled=yes
182 | #  when: (inventory_hostname == postgres_ha_cluster_master_host) and
183 | #        (db_resource_exists is failed) and
184 | #        (db_running is failed)
185 | 
186 | # if DB is clustered and is not running, we have a problem.. try at least cleaning the resource status
187 | - name: start master DB if necessary (in cluster)
188 |   shell: pcs resource cleanup "{{ postgres_ha_cluster_pg_HA_res_name }}" && sleep 15
189 |   when: (inventory_hostname == postgres_ha_cluster_master_host) and
190 |         (db_resource_exists is succeeded) and
191 |         (db_running is failed)
192 | 
193 | - name: setup DB replication auth
194 |   postgresql_user:
195 |     name: "{{ postgres_ha_pg_repl_user }}"
196 |     state: present
197 |     port: "{{ postgres_ha_pg_port }}"
198 |     password: "{{ postgres_ha_pg_repl_pass }}"
199 |     role_attr_flags: SUPERUSER,LOGIN
200 |   become: yes
201 |   become_user: postgres
202 |   vars:
203 |     ansible_ssh_pipelining: no
204 |   when: inventory_hostname == postgres_ha_cluster_master_host    # run only on one node
205 | 
206 | #- name: re-check DB datadir
207 | #  find: paths="{{ postgres_ha_pg_data }}"
208 | #  register: datadir_files
209 | 
210 | # if the file does not exist, DB sync is needed
211 | - name: check if DB sync is required
212 |   stat: path="{{ postgres_ha_pg_data }}/.synchronized"
213 |   register: db_sync_file
214 | 
215 | - name: stop slave DB
216 |   service: name="{{ postgres_ha_pg_systemd_svcname }}" state=stopped enabled=false
217 |   when: (not db_sync_file.stat.exists) and
218 |         (db_resource_exists is failed) and
219 |         (db_running is succeeded)
220 | 
221 | - name: remove slave DB datadir before sync
222 |   file: state=absent path="{{ postgres_ha_pg_data }}"
223 |   #shell: rm -rf "{{ postgres_ha_pg_data }}" 
224 |   #args:
225 |   #  creates: "{{ postgres_ha_pg_data }}/.synchronized"     # will not delete if the file exists (means that DB is already synced)
226 |   when: not db_sync_file.stat.exists
227 |   register: delete_datadir
228 | 
229 | # replicate slave node (assumes firewall is already setup)
230 | # run only on slave nodes and only when postgres datadir is empty
231 | - name: synchronize slave databases
232 |   shell: '{{ postgres_ha_pg_bindir }}/pg_basebackup -h "{{ hostvars[postgres_ha_cluster_master_host].ansible_default_ipv4.address }}" -p {{ postgres_ha_pg_port }} -R -D "{{ postgres_ha_pg_data }}" -U "{{ postgres_ha_pg_repl_user }}" -v -P -X stream'
233 |   become: yes
234 |   become_user: postgres
235 |   vars:
236 |     ansible_ssh_pipelining: no
237 |   when: delete_datadir.changed or not db_sync_file.stat.exists
238 |   #when: (inventory_hostname != postgres_ha_cluster_master_host) and
239 |   #      (datadir_files.matched|int == 0)
240 |   register: slave_resync
241 | 
242 | - name: forbid self-replication in pg_hba.conf on slaves
243 |   lineinfile: dest="{{ postgres_ha_pg_data }}/pg_hba.conf"
244 |     regexp='^host    replication .*{{ hostvars[item].ansible_default_ipv4.address }}/32'
245 |     insertbefore='^host'
246 |     line='host    replication     {{ postgres_ha_pg_repl_user }}    {{ hostvars[item].ansible_default_ipv4.address }}/32          {% if item == inventory_hostname %}reject{% else %}md5{% endif %}'
247 |   with_items: "{{ ansible_play_batch }}"
248 |   # run only on freshly synchronized slave
249 |   when: inventory_hostname != postgres_ha_cluster_master_host and slave_resync.changed
250 | 
251 | # Pause ansible execution to manually correct the postgres sync if necessary
252 | #- name: Exterminate mankind
253 | #  pause: prompt='Please confirm you want to exterminate mankind! Press return to continue. Press Ctrl+c and then "a" to abort'
254 | 
255 | - name: start slave DBs
256 |   service: name="{{ postgres_ha_pg_systemd_svcname }}" state=started enabled=yes
257 |   when: (inventory_hostname != postgres_ha_cluster_master_host) and
258 |         (db_resource_exists is failed) #and delete_datadir.changed
259 | 
260 | - name: check if slaves are connected
261 |   shell: psql -Aqtc "SELECT count(*) FROM pg_stat_replication"
262 |   when: inventory_hostname == postgres_ha_cluster_master_host    # run only on one node
263 |   become: yes
264 |   become_user: postgres
265 |   register: slavecount
266 |   vars:
267 |     ansible_ssh_pipelining: no
268 |   until: (slavecount.stdout|string) == ((ansible_play_batch|length - 1)|string)
269 |   retries: 15
270 |   delay: 2
271 | 


--------------------------------------------------------------------------------
/tasks/pre-tasks.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | - debug: msg='MASTER NODE SET TO {{ postgres_ha_cluster_master_host }}'
 3 |   run_once: true
 4 | 
 5 | - name: verify postgres_ha_cluster_master_host
 6 |   fail:
 7 |     msg: "CRITICAL: defined master host ({{ postgres_ha_cluster_master_host }}) is not in host list ({{ ansible_play_batch }})"
 8 |   when: postgres_ha_cluster_master_host not in ansible_play_batch
 9 | 
10 | # Set default os_version.
11 | # Without os_version set, the role will fail.
12 | # This is a preparation for multi-OS.
13 | - name: identify the OS (default)
14 |   set_fact:
15 |     os_version: 'centos7'
16 | 
17 | - name: identify the OS (centos)
18 |   set_fact:
19 |     os_version: 'centos{{ ansible_distribution_major_version }}'
20 |   when: ansible_distribution == 'CentOS' or ansible_distribution == 'Red Hat Enterprise Linux'
21 | 
22 | - debug: msg='The OS version is set to {{ os_version }}'
23 |   run_once: true
24 | 
25 | - name: verify PG cluster monitor intervals
26 |   fail:
27 |     msg: 'Paremeters "postgres_ha_monitor_interval_pgmaster" and "postgres_ha_monitor_interval_pgslave" cannot have the same value!'
28 |   when: postgres_ha_monitor_interval_pgmaster == postgres_ha_monitor_interval_pgslave
29 | 
30 | - name: verify specific RRP setting for Centos 6
31 |   fail:
32 |     msg: 'Centos 6 requires at least 2 ring interfaces for RRP. Add one more or remove the current interface in postgres_ha_network_rings config parameter.'
33 |   when: os_version == 'centos6' and postgres_ha_mcast_enable and postgres_ha_network_rings and (postgres_ha_network_rings|length == 1)
34 | 
35 | # not necessary, cluster automatically selects the default interface
36 | #- name: set default network ring
37 | #  set_fact:
38 | #    network_rings: '{% if postgres_ha_network_rings %}{{ postgres_ha_network_rings }}{% else %}{{ { "ring0": ansible_default_ipv4.alias } }}{% endif %}'
39 | 
40 | 


--------------------------------------------------------------------------------
/tasks/vip.yml:
--------------------------------------------------------------------------------
 1 | # vim: set filetype=yaml expandtab tabstop=2 shiftwidth=2 softtabstop=2 background=dark :
 2 | 
 3 | - name: create virtual IP resource
 4 |   when: inventory_hostname == postgres_ha_cluster_master_host   # run only on one node
 5 |   pcs_resource: command=create resource_id="{{ postgres_ha_cluster_vip_res_name }}" type=ocf:heartbeat:IPaddr2
 6 |   args:
 7 |     options:
 8 |       ip: "{{ postgres_ha_cluster_vip }}"
 9 |       cidr_netmask: "{{ postgres_ha_cluster_vip_mask }}"
10 |     operations:
11 |       - action: monitor
12 |         options:
13 |           interval: 10s
14 | 
15 | 
16 | 


--------------------------------------------------------------------------------
/templates/maint/clone_clusterdb_from_master.sh.j2:
--------------------------------------------------------------------------------
  1 | #!/bin/sh
  2 | 
  3 | if [ $(whoami) != "postgres" ]; then 
  4 | 	echo "Please run this script as user postgres!"
  5 | 	exit 1
  6 | fi
  7 | 
  8 | CLUSTER_NAME="{{ postgres_ha_cluster_name }}"
  9 | DBMASTER_IP="{{ postgres_ha_cluster_vip }}"
 10 | DBPORT="{{ postgres_ha_pg_port }}"
 11 | PGDATA="{{ postgres_ha_pg_data }}"
 12 | PGBINDIR="{{ postgres_ha_pg_bindir }}"
 13 | DBVERSION="{{ postgres_ha_pg_version }}"
 14 | REPL_USER="{{ postgres_ha_pg_repl_user }}"
 15 | CLUSTER_RES_NAME="{{ postgres_ha_cluster_pg_HA_res_name }}"
 16 | RECOVERY_TEMPLATE="$(dirname "${PGDATA}")/recovery.conf.${CLUSTER_NAME}.pcmk"
 17 | RECOVERY_DEST="${PGDATA}/recovery.conf"
 18 | 
 19 | # usage: if check_reply_yes "Some question?"; then echo yes; else echo no; fi
 20 | function check_reply_yes()
 21 | {
 22 | 	echo -n "*** $1 (Y/n)"
 23 | 	read reply
 24 | 	if [[ -z "$reply" ]] || [[ $reply == "y" ]] || [[ $reply == "Y" ]]
 25 | 	then
 26 | 		return 0
 27 | 	else
 28 | 		return 1
 29 | 	fi
 30 | }
 31 | 
 32 | function check_retval()
 33 | {
 34 | 	retval=$?
 35 | 	if [ $retval -ne 0 ]
 36 | 	then
 37 | 		echo
 38 | 		echo "***** Error in module $1 *****"
 39 | 		echo
 40 | 		exit $retval
 41 | 	fi
 42 | }
 43 | 
 44 | 
 45 | #######################################################################################
 46 | # PROGRAM START
 47 | #######################################################################################
 48 | 
 49 | PGDATA=${PGDATA%%+(/)}	# remove trailing slash
 50 | cat << EOF
 51 | ****************************************************************************************
 52 | This script will sync postgresql datadir from master. It can also be used to convert
 53 | a failed master node to slave and reconnect it to the cluster.
 54 | Please make sure that postgres database is not running on this node!
 55 | Cluster command to shut down database locally (for 60 minutes):
 56 | pcs resource ban $CLUSTER_RES_NAME {{ inventory_hostname }} lifetime=PT60M
 57 | And to bring it up before lifetime expiration:
 58 | pcs resource clear $CLUSTER_RES_NAME {{ inventory_hostname }}
 59 | 
 60 | Cluster parameters:
 61 | CLUSTER NAME: $CLUSTER_NAME
 62 | DB MASTER IP: $DBMASTER_IP
 63 | DB PORT:      $DBPORT
 64 | PG DATADIR:   $PGDATA
 65 | 
 66 | EOF
 67 | 
 68 | # check if the master IP is on this host (makes no sense to sync from myself)
 69 | if /usr/sbin/ip addr show | grep -q "[ \t]$DBMASTER_IP/"; then
 70 |     echo "THIS IS THE MASTER NODE!!!"
 71 |     echo "Aborting action."
 72 |     exit 111
 73 | fi
 74 | 
 75 | # check if master DB is running
 76 | if ! ${PGBINDIR}/pg_isready -qh "$DBMASTER_IP" -p "$DBPORT" -U "$REPL_USER"; then
 77 |     echo "The master database is not responding!"
 78 |     echo "Aborting action."
 79 |     exit 112
 80 | fi
 81 | 
 82 | # checks succeeded, we are ready to run the sync
 83 | if ! check_reply_yes "Continue?"; then
 84 | 	echo
 85 | 	echo "Exiting without changes.."
 86 | 	exit 0
 87 | fi
 88 | echo "****************************************************************************************"
 89 | echo
 90 | 
 91 | 
 92 | if ps auxww | grep -v grep | grep -Eq -- "-D +${PGDATA}( |$)"; then
 93 | 	echo "**************************************************"
 94 | 	echo "Postgresql server is still running. Stop it first!"
 95 | 	echo "**************************************************"
 96 | 	exit 1
 97 | fi
 98 | if ${PGBINDIR}/pg_ctl -D "$PGDATA" status > /dev/null; then
 99 | 	echo "***********************************"
100 | 	echo "Postgresql server is still running!"
101 | 	echo "***********************************"
102 | 	
103 | 	echo ${PGBINDIR}/pg_ctl -D "$PGDATA" status
104 | 	${PGBINDIR}/pg_ctl -D "$PGDATA" status
105 | 	echo
106 | 	echo Please stop the database and run this script again.
107 | 	exit 1
108 | fi
109 | 
110 | OLD_PGDATA="${PGDATA}.backup-$(date +%Y%m%d%H%M)"
111 | if [ -d ${PGDATA} ]; then
112 | 	if ! check_reply_yes "Remove old datadir? (n = rename old datadir)"; then
113 | 		echo "Moving old datadir.."
114 | 		echo mv ${PGDATA} ${OLD_PGDATA}
115 | 		mv ${PGDATA} ${OLD_PGDATA}
116 | 		check_retval rename_old_dir
117 | 	else
118 | 		echo "Removing old datadir.."
119 | 		echo rm -rf ${PGDATA}
120 | 		rm -rf "${PGDATA}"
121 | 		check_retval delete_old_dir
122 | 	fi
123 | else
124 | 	echo "Old datadir ${PGDATA} does not exist. Continuing without backing it up."
125 | 	echo
126 | fi
127 | echo
128 | echo "Cloning the DB:"
129 | echo pg_basebackup -h $DBMASTER_IP -D "$PGDATA" -X stream -P -p $DBPORT -U $REPL_USER
130 | pg_basebackup -h $DBMASTER_IP -D "$PGDATA" -X stream -P -p $DBPORT -U $REPL_USER
131 | check_retval clone_DB
132 | echo
133 | echo Writing recovery.conf
134 | echo cp -f "${RECOVERY_TEMPLATE}" "${RECOVERY_DEST}"
135 | 
136 | cp -f "${RECOVERY_TEMPLATE}" "${RECOVERY_DEST}"
137 | 
138 | echo Altering pg_hba.conf to forbid self-replication
139 | # permit all hosts first
140 | sed -ri'' -e 's,^(host    replication .*({{ all_ips.split(" ") | join("|") }})/32 *)(reject|md5),\1md5,g' "${PGDATA}/pg_hba.conf"
141 | # deny myself
142 | sed -ri'' -e 's,^(host    replication .*({{ hostvars[inventory_hostname].ansible_default_ipv4.address }})/32 *)md5,\1reject,g' "${PGDATA}/pg_hba.conf"
143 | 
144 | cat << EOF
145 | Slave database cloned successfully.
146 | 
147 | You can start it now:
148 | pcs resource clear $CLUSTER_RES_NAME {{ inventory_hostname }}
149 | 
150 | Or (in case of emergency) manually by command:
151 | ${PGBINDIR}/pg_ctl -D "$PGDATA" start
152 | 
153 | Also check logs:
154 | tail -f ${PGDATA}/pg_log/\$(cd ${PGDATA}/pg_log; ls -1 | tail -1)
155 | or
156 | journalctl --follow
157 | 
158 | EOF
159 | 
160 | if [ -d ${OLD_PGDATA} ]; then
161 | 	echo "Old datadir can be safely removed:"
162 | 	echo "rm -rf ${OLD_PGDATA}"
163 | 	echo
164 | fi
165 | echo May the Force be with you..
166 | echo
167 | exit 0
168 | 


--------------------------------------------------------------------------------
/templates/pg_hba.conf.j2:
--------------------------------------------------------------------------------
 1 | # TYPE  DATABASE        USER            ADDRESS                 METHOD
 2 | 
 3 | # Default:
 4 | #####!!!!!######local  all  postgres    ident map=superuser
 5 | local  all  postgres    trust
 6 | # "local" is for Unix domain socket connections only
 7 | local  all  all    md5
 8 | # replication ACLs
 9 | {% for node in ansible_play_batch %}
10 | host  replication  {{ postgres_ha_pg_repl_user }}  {{ hostvars[node].ansible_default_ipv4.address }}/32  md5
11 | host  postgres     {{ postgres_ha_pg_repl_user }}  {{ hostvars[node].ansible_default_ipv4.address }}/32  md5
12 | {% endfor %}
13 | # IPv4 local connections
14 | host  all  all  127.0.0.1/32  md5
15 | # IPv6 local connections
16 | host  all  all  ::1/128  md5
17 | 


--------------------------------------------------------------------------------
/templates/recovery.conf.pcmk.j2:
--------------------------------------------------------------------------------
1 | standby_mode = on
2 | primary_conninfo = 'port={{ postgres_ha_pg_port }} host={{ postgres_ha_cluster_vip }} user={{ postgres_ha_pg_repl_user }} application_name={{ inventory_hostname }}'
3 | recovery_target_timeline = 'latest'
4 | 


--------------------------------------------------------------------------------
/tests/inventory:
--------------------------------------------------------------------------------
1 | localhost


--------------------------------------------------------------------------------
/tests/test.yml:
--------------------------------------------------------------------------------
1 | ---
2 | - hosts: localhost
3 |   remote_user: root
4 |   roles:
5 |     - postgresql-ha


--------------------------------------------------------------------------------
/vars/main.yml:
--------------------------------------------------------------------------------
1 | ---
2 | # vars file for postgresql-ha
3 | 


--------------------------------------------------------------------------------