├── .gitignore
├── .gitreview
├── .zuul.yaml
├── LICENSE
├── README.rst
├── doc
    ├── requirements.txt
    └── source
    │   ├── common
    │       ├── app-support.rst
    │       ├── appendix.rst
    │       ├── conventions.rst
    │       └── glossary.rst
    │   ├── compute-node-ha.rst
    │   ├── conf.py
    │   ├── control-plane-stateful.rst
    │   ├── control-plane-stateless.rst
    │   ├── control-plane.rst
    │   ├── figures
    │       ├── Cluster-deployment-collapsed.png
    │       └── Cluster-deployment-segregated.png
    │   ├── ha-community.rst
    │   ├── index.rst
    │   ├── intro-ha-common-tech.rst
    │   ├── intro-ha-key-concepts.rst
    │   ├── intro-ha.rst
    │   ├── intro-os-ha-cluster.rst
    │   ├── intro-os-ha-memcached.rst
    │   ├── intro-os-ha-state.rst
    │   ├── intro-os-ha.rst
    │   ├── monitoring.rst
    │   ├── networking-ha-l3-agent.rst
    │   ├── networking-ha-neutron-l3-analysis.rst
    │   ├── networking-ha-neutron-server.rst
    │   ├── networking-ha.rst
    │   ├── overview.rst
    │   ├── ref-arch-examples.rst
    │   ├── storage-ha-backend.rst
    │   ├── storage-ha-block.rst
    │   ├── storage-ha-file-systems.rst
    │   ├── storage-ha-image.rst
    │   ├── storage-ha.rst
    │   └── testing.rst
├── setup.cfg
└── tox.ini


/.gitignore:
--------------------------------------------------------------------------------
 1 | .DS_Store
 2 | *.xpr
 3 | 
 4 | # Packages
 5 | .venv
 6 | *.egg
 7 | *.egg-info
 8 | 
 9 | # Testenvironment
10 | .tox
11 | 
12 | # Build directories
13 | doc/build
14 | 
15 | # Transifex Client Setting
16 | .tx
17 | 
18 | # Editors
19 | *~
20 | .*.swp
21 | .bak
22 | *.pyc
23 | 
24 | doc/source/.doctrees
25 | 


--------------------------------------------------------------------------------
/.gitreview:
--------------------------------------------------------------------------------
1 | [gerrit]
2 | host=review.opendev.org
3 | port=29418
4 | project=openstack/ha-guide.git
5 | 


--------------------------------------------------------------------------------
/.zuul.yaml:
--------------------------------------------------------------------------------
1 | - project:
2 |     templates:
3 |       - build-openstack-docs-pti
4 |     promote:
5 |       jobs:
6 |         - promote-openstack-tox-docs-direct
7 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 | 
  2 |                                  Apache License
  3 |                            Version 2.0, January 2004
  4 |                         http://www.apache.org/licenses/
  5 | 
  6 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  7 | 
  8 |    1. Definitions.
  9 | 
 10 |       "License" shall mean the terms and conditions for use, reproduction,
 11 |       and distribution as defined by Sections 1 through 9 of this document.
 12 | 
 13 |       "Licensor" shall mean the copyright owner or entity authorized by
 14 |       the copyright owner that is granting the License.
 15 | 
 16 |       "Legal Entity" shall mean the union of the acting entity and all
 17 |       other entities that control, are controlled by, or are under common
 18 |       control with that entity. For the purposes of this definition,
 19 |       "control" means (i) the power, direct or indirect, to cause the
 20 |       direction or management of such entity, whether by contract or
 21 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 22 |       outstanding shares, or (iii) beneficial ownership of such entity.
 23 | 
 24 |       "You" (or "Your") shall mean an individual or Legal Entity
 25 |       exercising permissions granted by this License.
 26 | 
 27 |       "Source" form shall mean the preferred form for making modifications,
 28 |       including but not limited to software source code, documentation
 29 |       source, and configuration files.
 30 | 
 31 |       "Object" form shall mean any form resulting from mechanical
 32 |       transformation or translation of a Source form, including but
 33 |       not limited to compiled object code, generated documentation,
 34 |       and conversions to other media types.
 35 | 
 36 |       "Work" shall mean the work of authorship, whether in Source or
 37 |       Object form, made available under the License, as indicated by a
 38 |       copyright notice that is included in or attached to the work
 39 |       (an example is provided in the Appendix below).
 40 | 
 41 |       "Derivative Works" shall mean any work, whether in Source or Object
 42 |       form, that is based on (or derived from) the Work and for which the
 43 |       editorial revisions, annotations, elaborations, or other modifications
 44 |       represent, as a whole, an original work of authorship. For the purposes
 45 |       of this License, Derivative Works shall not include works that remain
 46 |       separable from, or merely link (or bind by name) to the interfaces of,
 47 |       the Work and Derivative Works thereof.
 48 | 
 49 |       "Contribution" shall mean any work of authorship, including
 50 |       the original version of the Work and any modifications or additions
 51 |       to that Work or Derivative Works thereof, that is intentionally
 52 |       submitted to Licensor for inclusion in the Work by the copyright owner
 53 |       or by an individual or Legal Entity authorized to submit on behalf of
 54 |       the copyright owner. For the purposes of this definition, "submitted"
 55 |       means any form of electronic, verbal, or written communication sent
 56 |       to the Licensor or its representatives, including but not limited to
 57 |       communication on electronic mailing lists, source code control systems,
 58 |       and issue tracking systems that are managed by, or on behalf of, the
 59 |       Licensor for the purpose of discussing and improving the Work, but
 60 |       excluding communication that is conspicuously marked or otherwise
 61 |       designated in writing by the copyright owner as "Not a Contribution."
 62 | 
 63 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 64 |       on behalf of whom a Contribution has been received by Licensor and
 65 |       subsequently incorporated within the Work.
 66 | 
 67 |    2. Grant of Copyright License. Subject to the terms and conditions of
 68 |       this License, each Contributor hereby grants to You a perpetual,
 69 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 70 |       copyright license to reproduce, prepare Derivative Works of,
 71 |       publicly display, publicly perform, sublicense, and distribute the
 72 |       Work and such Derivative Works in Source or Object form.
 73 | 
 74 |    3. Grant of Patent License. Subject to the terms and conditions of
 75 |       this License, each Contributor hereby grants to You a perpetual,
 76 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 77 |       (except as stated in this section) patent license to make, have made,
 78 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 79 |       where such license applies only to those patent claims licensable
 80 |       by such Contributor that are necessarily infringed by their
 81 |       Contribution(s) alone or by combination of their Contribution(s)
 82 |       with the Work to which such Contribution(s) was submitted. If You
 83 |       institute patent litigation against any entity (including a
 84 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 85 |       or a Contribution incorporated within the Work constitutes direct
 86 |       or contributory patent infringement, then any patent licenses
 87 |       granted to You under this License for that Work shall terminate
 88 |       as of the date such litigation is filed.
 89 | 
 90 |    4. Redistribution. You may reproduce and distribute copies of the
 91 |       Work or Derivative Works thereof in any medium, with or without
 92 |       modifications, and in Source or Object form, provided that You
 93 |       meet the following conditions:
 94 | 
 95 |       (a) You must give any other recipients of the Work or
 96 |           Derivative Works a copy of this License; and
 97 | 
 98 |       (b) You must cause any modified files to carry prominent notices
 99 |           stating that You changed the files; and
100 | 
101 |       (c) You must retain, in the Source form of any Derivative Works
102 |           that You distribute, all copyright, patent, trademark, and
103 |           attribution notices from the Source form of the Work,
104 |           excluding those notices that do not pertain to any part of
105 |           the Derivative Works; and
106 | 
107 |       (d) If the Work includes a "NOTICE" text file as part of its
108 |           distribution, then any Derivative Works that You distribute must
109 |           include a readable copy of the attribution notices contained
110 |           within such NOTICE file, excluding those notices that do not
111 |           pertain to any part of the Derivative Works, in at least one
112 |           of the following places: within a NOTICE text file distributed
113 |           as part of the Derivative Works; within the Source form or
114 |           documentation, if provided along with the Derivative Works; or,
115 |           within a display generated by the Derivative Works, if and
116 |           wherever such third-party notices normally appear. The contents
117 |           of the NOTICE file are for informational purposes only and
118 |           do not modify the License. You may add Your own attribution
119 |           notices within Derivative Works that You distribute, alongside
120 |           or as an addendum to the NOTICE text from the Work, provided
121 |           that such additional attribution notices cannot be construed
122 |           as modifying the License.
123 | 
124 |       You may add Your own copyright statement to Your modifications and
125 |       may provide additional or different license terms and conditions
126 |       for use, reproduction, or distribution of Your modifications, or
127 |       for any such Derivative Works as a whole, provided Your use,
128 |       reproduction, and distribution of the Work otherwise complies with
129 |       the conditions stated in this License.
130 | 
131 |    5. Submission of Contributions. Unless You explicitly state otherwise,
132 |       any Contribution intentionally submitted for inclusion in the Work
133 |       by You to the Licensor shall be under the terms and conditions of
134 |       this License, without any additional terms or conditions.
135 |       Notwithstanding the above, nothing herein shall supersede or modify
136 |       the terms of any separate license agreement you may have executed
137 |       with Licensor regarding such Contributions.
138 | 
139 |    6. Trademarks. This License does not grant permission to use the trade
140 |       names, trademarks, service marks, or product names of the Licensor,
141 |       except as required for reasonable and customary use in describing the
142 |       origin of the Work and reproducing the content of the NOTICE file.
143 | 
144 |    7. Disclaimer of Warranty. Unless required by applicable law or
145 |       agreed to in writing, Licensor provides the Work (and each
146 |       Contributor provides its Contributions) on an "AS IS" BASIS,
147 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
148 |       implied, including, without limitation, any warranties or conditions
149 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
150 |       PARTICULAR PURPOSE. You are solely responsible for determining the
151 |       appropriateness of using or redistributing the Work and assume any
152 |       risks associated with Your exercise of permissions under this License.
153 | 
154 |    8. Limitation of Liability. In no event and under no legal theory,
155 |       whether in tort (including negligence), contract, or otherwise,
156 |       unless required by applicable law (such as deliberate and grossly
157 |       negligent acts) or agreed to in writing, shall any Contributor be
158 |       liable to You for damages, including any direct, indirect, special,
159 |       incidental, or consequential damages of any character arising as a
160 |       result of this License or out of the use or inability to use the
161 |       Work (including but not limited to damages for loss of goodwill,
162 |       work stoppage, computer failure or malfunction, or any and all
163 |       other commercial damages or losses), even if such Contributor
164 |       has been advised of the possibility of such damages.
165 | 
166 |    9. Accepting Warranty or Additional Liability. While redistributing
167 |       the Work or Derivative Works thereof, You may choose to offer,
168 |       and charge a fee for, acceptance of support, warranty, indemnity,
169 |       or other liability obligations and/or rights consistent with this
170 |       License. However, in accepting such obligations, You may act only
171 |       on Your own behalf and on Your sole responsibility, not on behalf
172 |       of any other Contributor, and only if You agree to indemnify,
173 |       defend, and hold each Contributor harmless for any liability
174 |       incurred by, or claims asserted against, such Contributor by reason
175 |       of your accepting any such warranty or additional liability.
176 | 
177 | 


--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
 1 | =================================
 2 | OpenStack High Availability Guide
 3 | =================================
 4 | 
 5 | This repository contains the source files for the OpenStack High Availability
 6 | Guide.
 7 | 
 8 | You can read this guide at `docs.openstack.org/ha-guide
 9 | <https://docs.openstack.org/ha-guide/>`_.
10 | 
11 | Prerequisites
12 | -------------
13 | 
14 | At a minimum, you will need git and the git-review tool installed in order to
15 | contribute documentation. You will also need a `Gerrit account
16 | <https://docs.openstack.org/infra/manual/developers.html#account-setup>`_ to
17 | submit the change.
18 | 
19 | Git is available for Linux, Mac, and Windows environments. Some platforms come
20 | with it preinstalled, but you can review the `installation instructions
21 | <https://git-scm.com/book/en/v2/Getting-Started-Installing-Git>`_ if you
22 | do not have it by default.
23 | 
24 | Once git is installed, you can follow the instructions for your platform to
25 | `install git-review <https://docs.opendev.org/opendev/git-review/latest/installation.html>`_.
26 | 
27 | The last step is to configure git with your name and email address used for
28 | your Gerrit account set up so it can link you patch to your user. Run the
29 | following to set these values:
30 | 
31 | .. code-block:: console
32 | 
33 |   git config --global user.name "First Last"
34 |   git config --global user.email "your_email@youremail.com"
35 | 
36 | 
37 | Submitting Updates
38 | ------------------
39 | 
40 | Proposing updates to the documentation is fairly straight forward once you've
41 | done it, but there are a few steps that can appear intimidating your first
42 | couple times through. Here is a suggested workflow to help you along the way.
43 | 
44 | .. code-block:: console
45 | 
46 |   git clone https://opendev.org/openstack/ha-guide
47 |   cd ha-guide
48 | 
49 |   # it is useful to make changes on a separate branch in case you need to make
50 |   # other changes
51 |   git checkout -b my-topic
52 | 
53 |   # edit your files
54 |   git add .
55 |   git commit # Add a descriptive commit message
56 | 
57 |   # submit your changes for review
58 |   git review
59 | 
60 | The changes will then be run through a few tests to make sure the docs build
61 | and it will be ready for reviews. Once reviewed, if no problems are found with
62 | the changes they will be merged to the repo and the changes will be published
63 | to the docs.openstack.org site.
64 | 
65 | Local Testing
66 | -------------
67 | 
68 | If you would like to build the docs locally to make sure there are no issues
69 | with the changes, and to view locally generated HTML files, you will need to do
70 | a couple extra steps.
71 | 
72 | The jobs are run using a tool called `tox`. You will need to install tox on
73 | your platform first following its `installation guide
74 | <https://tox.wiki/en/latest/installation.html>`_.
75 | 
76 | You can then run the following to perform a local build with some tests:
77 | 
78 | .. code-block:: console
79 | 
80 |   tox -e docs
81 | 
82 | If you have any questions, please reach out on the #openstack-operators IRC
83 | channel or through the openstack-ops mailing list.
84 | 


--------------------------------------------------------------------------------
/doc/requirements.txt:
--------------------------------------------------------------------------------
1 | # The order of packages is significant, because pip processes them in the order
2 | # of appearance. Changing the order has an impact on the overall integration
3 | # process, which may cause wedges in the gate later.
4 | openstackdocstheme>=3.5.0 # Apache-2.0
5 | sphinx>=8.1.3 # BSD
6 | doc8>=1.1.2 # Apache-2.0
7 | 


--------------------------------------------------------------------------------
/doc/source/common/app-support.rst:
--------------------------------------------------------------------------------
  1 | .. ## WARNING ##########################################################
  2 | .. This file is synced from openstack/openstack-manuals repository to
  3 | .. other related repositories. If you need to make changes to this file,
  4 | .. make the changes in openstack-manuals. After any change merged to,
  5 | .. openstack-manuals, automatically a patch for others will be proposed.
  6 | .. #####################################################################
  7 | 
  8 | =================
  9 | Community support
 10 | =================
 11 | 
 12 | The following resources are available to help you run and use OpenStack.
 13 | The OpenStack community constantly improves and adds to the main
 14 | features of OpenStack, but if you have any questions, do not hesitate to
 15 | ask. Use the following resources to get OpenStack support and
 16 | troubleshoot your installations.
 17 | 
 18 | Documentation
 19 | ~~~~~~~~~~~~~
 20 | 
 21 | For the available OpenStack documentation, see
 22 | `docs.openstack.org <https://docs.openstack.org>`_.
 23 | 
 24 | The following guides explain how to install a Proof-of-Concept OpenStack cloud
 25 | and its associated components:
 26 | 
 27 | * `2025.1 Epoxy Installation Guides <https://docs.openstack.org/2025.1/install/>`_
 28 | 
 29 | The following books explain how to configure and run an OpenStack cloud:
 30 | 
 31 | *  `Architecture Design Guide <https://docs.openstack.org/arch-design/>`_
 32 | 
 33 | *  `Administrator Guides <https://docs.openstack.org/2025.1/admin/>`_
 34 | 
 35 | *  `Configuration Guides <https://docs.openstack.org/2025.1/configuration/>`_
 36 | 
 37 | *  `Networking Guide <https://docs.openstack.org/neutron/2025.1/admin/>`_
 38 | 
 39 | *  `High Availability Guide <https://docs.openstack.org/ha-guide/>`_
 40 | 
 41 | *  `Security Guide <https://docs.openstack.org/security-guide/>`_
 42 | 
 43 | *  `Virtual Machine Image Guide <https://docs.openstack.org/image-guide/>`_
 44 | 
 45 | The following book explains how to use the command-line clients:
 46 | 
 47 | *  `API Bindings
 48 |    <https://docs.openstack.org/2025.1/language-bindings.html>`_
 49 | 
 50 | The following documentation provides reference and guidance information
 51 | for the OpenStack APIs:
 52 | 
 53 | *  `API Documentation <https://docs.openstack.org/api-quick-start/>`_
 54 | 
 55 | The following guide provides information on how to contribute to OpenStack
 56 | documentation:
 57 | 
 58 | *  `Documentation Contributor Guide <https://docs.openstack.org/doc-contrib-guide/>`_
 59 | 
 60 | The OpenStack wiki
 61 | ~~~~~~~~~~~~~~~~~~
 62 | 
 63 | The `OpenStack wiki <https://wiki.openstack.org/>`_ contains a broad
 64 | range of topics but some of the information can be difficult to find or
 65 | is a few pages deep. Fortunately, the wiki search feature enables you to
 66 | search by title or content. If you search for specific information, such
 67 | as about networking or OpenStack Compute, you can find a large amount
 68 | of relevant material. More is being added all the time, so be sure to
 69 | check back often. You can find the search box in the upper-right corner
 70 | of any OpenStack wiki page.
 71 | 
 72 | The Launchpad bugs area
 73 | ~~~~~~~~~~~~~~~~~~~~~~~
 74 | 
 75 | The OpenStack community values your set up and testing efforts and wants
 76 | your feedback. To log a bug, you must `sign up for a Launchpad account
 77 | <https://launchpad.net/+login>`_. You can view existing bugs and report bugs
 78 | in the Launchpad Bugs area. Use the search feature to determine whether
 79 | the bug has already been reported or already been fixed. If it still
 80 | seems like your bug is unreported, fill out a bug report.
 81 | 
 82 | Some tips:
 83 | 
 84 | *  Give a clear, concise summary.
 85 | 
 86 | *  Provide as much detail as possible in the description. Paste in your
 87 |    command output or stack traces, links to screen shots, and any other
 88 |    information which might be useful.
 89 | 
 90 | *  Be sure to include the software and package versions that you are
 91 |    using, especially if you are using a development branch, such as,
 92 |    ``"Kilo release" vs git commit bc79c3ecc55929bac585d04a03475b72e06a3208``.
 93 | 
 94 | *  Any deployment-specific information is helpful, such as whether you
 95 |    are using CentOS Stream 9 or are performing a multi-node installation.
 96 | 
 97 | The following Launchpad Bugs areas are available:
 98 | 
 99 | *  `Bugs: OpenStack Block Storage
100 |    (cinder) <https://bugs.launchpad.net/cinder>`_
101 | 
102 | *  `Bugs: OpenStack Compute (nova) <https://bugs.launchpad.net/nova>`_
103 | 
104 | *  `Bugs: OpenStack Dashboard
105 |    (horizon) <https://bugs.launchpad.net/horizon>`_
106 | 
107 | *  `Bugs: OpenStack Identity
108 |    (keystone) <https://bugs.launchpad.net/keystone>`_
109 | 
110 | *  `Bugs: OpenStack Image service
111 |    (glance) <https://bugs.launchpad.net/glance>`_
112 | 
113 | *  `Bugs: OpenStack Networking
114 |    (neutron) <https://bugs.launchpad.net/neutron>`_
115 | 
116 | *  `Bugs: OpenStack Object Storage
117 |    (swift) <https://bugs.launchpad.net/swift>`_
118 | 
119 | *  `Bugs: Bare metal service (ironic) <https://bugs.launchpad.net/ironic>`_
120 | 
121 | *  `Bugs: Clustering service (senlin) <https://bugs.launchpad.net/senlin>`_
122 | 
123 | *  `Bugs: Container Infrastructure Management service (magnum) <https://bugs.launchpad.net/magnum>`_
124 | 
125 | *  `Bugs: Database service (trove) <https://storyboard.openstack.org/#!/project_group/trove>`_
126 | 
127 | *  `Bugs: DNS service (designate) <https://bugs.launchpad.net/designate>`_
128 | 
129 | *  `Bugs: Key Manager Service (barbican) <https://bugs.launchpad.net/barbican>`_
130 | 
131 | *  `Bugs: Monitoring (monasca) <https://storyboard.openstack.org/#!/project_group/monasca>`_
132 | 
133 | *  `Bugs: Orchestration (heat) <https://bugs.launchpad.net/heat>`_
134 | 
135 | *  `Bugs: Rating (cloudkitty) <https://bugs.launchpad.net/cloudkitty>`_
136 | 
137 | *  `Bugs: Shared file systems (manila) <https://bugs.launchpad.net/manila>`_
138 | 
139 | *  `Bugs: Telemetry
140 |    (ceilometer) <https://bugs.launchpad.net/ceilometer>`_
141 | 
142 | *  `Bugs: Workflow service
143 |    (mistral) <https://bugs.launchpad.net/mistral>`_
144 | 
145 | *  `Bugs: Messaging service
146 |    (zaqar) <https://bugs.launchpad.net/zaqar>`_
147 | 
148 | *  `Bugs: Container service
149 |    (zun) <https://bugs.launchpad.net/zun>`_
150 | 
151 | *  `Bugs: OpenStack API Documentation
152 |    (developer.openstack.org) <https://bugs.launchpad.net/openstack-api-site>`_
153 | 
154 | *  `Bugs: OpenStack Documentation
155 |    (docs.openstack.org) <https://bugs.launchpad.net/openstack-manuals>`_
156 | 
157 | Documentation feedback
158 | ~~~~~~~~~~~~~~~~~~~~~~
159 | 
160 | To provide feedback on documentation, join our IRC channel ``#openstack-doc``
161 | on the OFTC IRC network, or `report a bug in Launchpad
162 | <https://bugs.launchpad.net/openstack/+filebug>`_ and choose the particular
163 | project that the documentation is a part of.
164 | 
165 | The OpenStack IRC channel
166 | ~~~~~~~~~~~~~~~~~~~~~~~~~
167 | 
168 | The OpenStack community lives in the #openstack IRC channel on the
169 | OFTC network. You can hang out, ask questions, or get immediate
170 | feedback for urgent and pressing issues. To install an IRC client or use
171 | a browser-based client, go to
172 | `https://webchat.oftc.net/ <https://webchat.oftc.net>`_. You can
173 | also use `Colloquy <https://colloquy.app/>`_ (Mac OS X),
174 | `mIRC <https://www.mirc.com/>`_ (Windows),
175 | or XChat (Linux). When you are in the IRC channel
176 | and want to share code or command output, the generally accepted method
177 | is to use a Paste Bin. The OpenStack project has one at `Paste
178 | <https://paste.opendev.org>`_. Just paste your longer amounts of text or
179 | logs in the web form and you get a URL that you can paste into the
180 | channel. The OpenStack IRC channel is ``#openstack`` on
181 | ``irc.oftc.net``. You can find a list of all OpenStack IRC channels on
182 | the `IRC page on the wiki <https://wiki.openstack.org/wiki/IRC>`_.
183 | 
184 | OpenStack mailing lists
185 | ~~~~~~~~~~~~~~~~~~~~~~~
186 | 
187 | A great way to get answers and insights is to post your question or
188 | problematic scenario to the OpenStack mailing list. You can learn from
189 | and help others who might have similar issues. To subscribe or view the
190 | archives, go to the `general OpenStack mailing list
191 | <https://lists.openstack.org/mailman3/lists/openstack-discuss.lists.openstack.org/>`_. If you are
192 | interested in the other mailing lists for specific projects or development,
193 | refer to `Mailing Lists <https://wiki.openstack.org/wiki/Mailing_Lists>`_.
194 | 
195 | OpenStack distribution packages
196 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
197 | 
198 | The following Linux distributions provide community-supported packages
199 | for OpenStack:
200 | 
201 | *  **CentOS Stream and Red Hat Enterprise Linux:**
202 |    https://www.rdoproject.org/
203 | 
204 | *  **Ubuntu:** https://wiki.ubuntu.com/OpenStack/CloudArchive
205 | 


--------------------------------------------------------------------------------
/doc/source/common/appendix.rst:
--------------------------------------------------------------------------------
1 | Appendix
2 | ~~~~~~~~
3 | 
4 | .. toctree::
5 |    :maxdepth: 1
6 | 
7 |    app-support.rst
8 |    glossary.rst
9 | 


--------------------------------------------------------------------------------
/doc/source/common/conventions.rst:
--------------------------------------------------------------------------------
 1 | .. ## WARNING ##########################################################
 2 | .. This file is synced from openstack/openstack-manuals repository to
 3 | .. other related repositories. If you need to make changes to this file,
 4 | .. make the changes in openstack-manuals. After any change merged to,
 5 | .. openstack-manuals, automatically a patch for others will be proposed.
 6 | .. #####################################################################
 7 | 
 8 | ===========
 9 | Conventions
10 | ===========
11 | 
12 | The OpenStack documentation uses several typesetting conventions.
13 | 
14 | Notices
15 | ~~~~~~~
16 | 
17 | Notices take these forms:
18 | 
19 | .. note:: A comment with additional information that explains a part of the
20 |           text.
21 | 
22 | .. important:: Something you must be aware of before proceeding.
23 | 
24 | .. tip:: An extra but helpful piece of practical advice.
25 | 
26 | .. caution:: Helpful information that prevents the user from making mistakes.
27 | 
28 | .. warning:: Critical information about the risk of data loss or security
29 |              issues.
30 | 
31 | Command prompts
32 | ~~~~~~~~~~~~~~~
33 | 
34 | .. code-block:: console
35 | 
36 |    $ command
37 | 
38 | Any user, including the ``root`` user, can run commands that are
39 | prefixed with the ``$`` prompt.
40 | 
41 | .. code-block:: console
42 | 
43 |    # command
44 | 
45 | The ``root`` user must run commands that are prefixed with the ``#``
46 | prompt. You can also prefix these commands with the :command:`sudo`
47 | command, if available, to run them.
48 | 


--------------------------------------------------------------------------------
/doc/source/compute-node-ha.rst:
--------------------------------------------------------------------------------
 1 | ============================
 2 | Configuring the compute node
 3 | ============================
 4 | 
 5 | The `Installation Guides
 6 | <https://docs.openstack.org/ocata/install/>`_
 7 | provide instructions for installing multiple compute nodes.
 8 | To make the compute nodes highly available, you must configure the
 9 | environment to include multiple instances of the API and other services.
10 | 
11 | Configuring high availability for instances
12 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
13 | 
14 | As of September 2016, the OpenStack High Availability community is
15 | designing and developing an official and unified way to provide high
16 | availability for instances. We are developing automatic
17 | recovery from failures of hardware or hypervisor-related software on
18 | the compute node, or other failures that could prevent instances from
19 | functioning correctly, such as, issues with a cinder volume I/O path.
20 | 
21 | More details are available in the `user story
22 | <http://specs.openstack.org/openstack/openstack-user-stories/user-stories/proposed/ha_vm.html>`_
23 | co-authored by OpenStack's HA community and `Product Working Group
24 | <https://wiki.openstack.org/wiki/ProductTeam>`_ (PWG), where this feature is
25 | identified as missing functionality in OpenStack, which
26 | should be addressed with high priority.
27 | 
28 | Existing solutions
29 | ~~~~~~~~~~~~~~~~~~
30 | 
31 | The architectural challenges of instance HA and several currently
32 | existing solutions were presented in `a talk at the Austin summit
33 | <https://www.openstack.org/videos/video/high-availability-for-pets-and-hypervisors-state-of-the-nation>`_,
34 | for which `slides are also available <http://aspiers.github.io/openstack-summit-2016-austin-compute-ha/>`_.
35 | 
36 | The code for three of these solutions can be found online at the following
37 | links:
38 | 
39 | * `a mistral-based auto-recovery workflow
40 |   <https://github.com/gryf/mistral-evacuate>`_, by Intel
41 | * `masakari <https://launchpad.net/masakari>`_, by NTT
42 | * `OCF RAs
43 |   <http://aspiers.github.io/openstack-summit-2016-austin-compute-ha/#/ocf-pros-cons>`_,
44 |   as used by Red Hat and SUSE
45 | 
46 | Current upstream work
47 | ~~~~~~~~~~~~~~~~~~~~~
48 | 
49 | Work is in progress on a unified approach, which combines the best
50 | aspects of existing upstream solutions. More details are available on
51 | `the HA VMs user story wiki
52 | <https://wiki.openstack.org/wiki/ProductTeam/User_Stories/HA_VMs>`_.
53 | 
54 | To get involved with this work, see the section on the
55 | :doc:`ha-community`.
56 | 


--------------------------------------------------------------------------------
/doc/source/conf.py:
--------------------------------------------------------------------------------
  1 | # Licensed under the Apache License, Version 2.0 (the "License");
  2 | # you may not use this file except in compliance with the License.
  3 | # You may obtain a copy of the License at
  4 | #
  5 | #    http://www.apache.org/licenses/LICENSE-2.0
  6 | #
  7 | # Unless required by applicable law or agreed to in writing, software
  8 | # distributed under the License is distributed on an "AS IS" BASIS,
  9 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
 10 | # implied.
 11 | # See the License for the specific language governing permissions and
 12 | # limitations under the License.
 13 | 
 14 | # This file is execfile()d with the current directory set to its
 15 | # containing dir.
 16 | #
 17 | # Note that not all possible configuration values are present in this
 18 | # autogenerated file.
 19 | #
 20 | # All configuration values have a default; values that are commented out
 21 | # serve to show the default.
 22 | 
 23 | import os
 24 | # import sys
 25 | 
 26 | # If extensions (or modules to document with autodoc) are in another directory,
 27 | # add these directories to sys.path here. If the directory is relative to the
 28 | # documentation root, use os.path.abspath to make it absolute, like shown here.
 29 | # sys.path.insert(0, os.path.abspath('.'))
 30 | 
 31 | # -- General configuration ------------------------------------------------
 32 | 
 33 | # If your documentation needs a minimal Sphinx version, state it here.
 34 | # needs_sphinx = '1.0'
 35 | 
 36 | # Add any Sphinx extension module names here, as strings. They can be
 37 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 38 | # ones.
 39 | extensions = ['openstackdocstheme']
 40 | 
 41 | # Add any paths that contain templates here, relative to this directory.
 42 | # templates_path = ['_templates']
 43 | 
 44 | # The suffix of source filenames.
 45 | source_suffix = '.rst'
 46 | 
 47 | # The encoding of source files.
 48 | # source_encoding = 'utf-8-sig'
 49 | 
 50 | # The master toctree document.
 51 | master_doc = 'index'
 52 | 
 53 | # General information about the project.
 54 | openstackdocs_repo_name = "openstack/ha-guide"
 55 | openstackdocs_use_storyboard = True
 56 | copyright = '2016-present, OpenStack contributors'
 57 | 
 58 | # The version info for the project you're documenting, acts as replacement for
 59 | # |version| and |release|, also used in various other places throughout the
 60 | # built documents.
 61 | #
 62 | # The short X.Y version.
 63 | version = ''
 64 | # The full version, including alpha/beta/rc tags.
 65 | release = ''
 66 | 
 67 | # The language for content autogenerated by Sphinx. Refer to documentation
 68 | # for a list of supported languages.
 69 | # language = None
 70 | 
 71 | # There are two options for replacing |today|: either, you set today to some
 72 | # non-false value, then it is used:
 73 | # today = ''
 74 | # Else, today_fmt is used as the format for a strftime call.
 75 | # today_fmt = '%B %d, %Y'
 76 | 
 77 | # List of patterns, relative to source directory, that match files and
 78 | # directories to ignore when looking for source files.
 79 | exclude_patterns = ['common/cli*', 'common/nova*',
 80 |                     'common/get-started*', 'common/dashboard*']
 81 | 
 82 | # The reST default role (used for this markup: `text`) to use for all
 83 | # documents.
 84 | # default_role = None
 85 | 
 86 | # If true, '()' will be appended to :func: etc. cross-reference text.
 87 | # add_function_parentheses = True
 88 | 
 89 | # If true, the current module name will be prepended to all description
 90 | # unit titles (such as .. function::).
 91 | # add_module_names = True
 92 | 
 93 | # If true, sectionauthor and moduleauthor directives will be shown in the
 94 | # output. They are ignored by default.
 95 | # show_authors = False
 96 | 
 97 | # The name of the Pygments (syntax highlighting) style to use.
 98 | pygments_style = 'sphinx'
 99 | 
100 | # A list of ignored prefixes for module index sorting.
101 | # modindex_common_prefix = []
102 | 
103 | # If true, keep warnings as "system message" paragraphs in the built documents.
104 | # keep_warnings = False
105 | 
106 | 
107 | # -- Options for HTML output ----------------------------------------------
108 | 
109 | # The theme to use for HTML and HTML Help pages.  See the documentation for
110 | # a list of builtin themes.
111 | html_theme = 'openstackdocs'
112 | 
113 | # Theme options are theme-specific and customize the look and feel of a theme
114 | # further.  For a list of options available for each theme, see the
115 | # documentation.
116 | html_theme_options = {
117 |     'display_badge': False
118 | }
119 | 
120 | # Add any paths that contain custom themes here, relative to this directory.
121 | # html_theme_path = [openstackdocstheme.get_html_theme_path()]
122 | 
123 | # The name for this set of Sphinx documents.  If None, it defaults to
124 | # "<project> v<release> documentation".
125 | # html_title = None
126 | 
127 | # A shorter title for the navigation bar.  Default is the same as html_title.
128 | # html_short_title = None
129 | 
130 | # The name of an image file (relative to this directory) to place at the top
131 | # of the sidebar.
132 | # html_logo = None
133 | 
134 | # The name of an image file (within the static path) to use as favicon of the
135 | # docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32
136 | # pixels large.
137 | # html_favicon = None
138 | 
139 | # Add any paths that contain custom static files (such as style sheets) here,
140 | # relative to this directory. They are copied after the builtin static files,
141 | # so a file named "default.css" will overwrite the builtin "default.css".
142 | # html_static_path = []
143 | 
144 | # Add any extra paths that contain custom files (such as robots.txt or
145 | # .htaccess) here, relative to this directory. These files are copied
146 | # directly to the root of the documentation.
147 | # html_extra_path = []
148 | 
149 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
150 | # using the given strftime format.
151 | # So that we can enable "log-a-bug" links from each output HTML page, this
152 | # variable must be set to a format that includes year, month, day, hours and
153 | # minutes.
154 | # html_last_updated_fmt = '%Y-%m-%d %H:%M'
155 | 
156 | # If true, SmartyPants will be used to convert quotes and dashes to
157 | # typographically correct entities.
158 | # html_use_smartypants = True
159 | 
160 | # Custom sidebar templates, maps document names to template names.
161 | # html_sidebars = {}
162 | 
163 | # Additional templates that should be rendered to pages, maps page names to
164 | # template names.
165 | # html_additional_pages = {}
166 | 
167 | # If false, no module index is generated.
168 | # html_domain_indices = True
169 | 
170 | # If false, no index is generated.
171 | html_use_index = False
172 | 
173 | # If true, the index is split into individual pages for each letter.
174 | # html_split_index = False
175 | 
176 | # If true, links to the reST sources are added to the pages.
177 | html_show_sourcelink = False
178 | 
179 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
180 | # html_show_sphinx = True
181 | 
182 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
183 | # html_show_copyright = True
184 | 
185 | # If true, an OpenSearch description file will be output, and all pages will
186 | # contain a <link> tag referring to it.  The value of this option must be the
187 | # base URL from which the finished HTML is served.
188 | # html_use_opensearch = ''
189 | 
190 | # This is the file name suffix for HTML files (e.g. ".xhtml").
191 | # html_file_suffix = None
192 | 
193 | # Output file base name for HTML help builder.
194 | htmlhelp_basename = 'ha-guide'
195 | 
196 | # If true, publish source files
197 | html_copy_source = False
198 | 
199 | # -- Options for LaTeX output ---------------------------------------------
200 | 
201 | # Grouping the document tree into LaTeX files. List of tuples
202 | # (source start file, target name, title,
203 | #  author, documentclass [howto, manual, or own class]).
204 | latex_documents = [
205 |     ('index', 'HAGuide.tex', 'HA Guide',
206 |      'OpenStack contributors', 'manual'),
207 | ]
208 | 
209 | # The name of an image file (relative to this directory) to place at the top of
210 | # the title page.
211 | # latex_logo = None
212 | 
213 | # For "manual" documents, if this is true, then toplevel headings are parts,
214 | # not chapters.
215 | # latex_use_parts = False
216 | 
217 | # If true, show page references after internal links.
218 | # latex_show_pagerefs = False
219 | 
220 | # If true, show URL addresses after external links.
221 | # latex_show_urls = False
222 | 
223 | # Documents to append as an appendix to all manuals.
224 | # latex_appendices = []
225 | 
226 | # If false, no module index is generated.
227 | # latex_domain_indices = True
228 | 
229 | 
230 | # -- Options for manual page output ---------------------------------------
231 | 
232 | # One entry per manual page. List of tuples
233 | # (source start file, name, description, authors, manual section).
234 | man_pages = [
235 |     ('index', 'haguide', 'High Availability Guide',
236 |      ['OpenStack contributors'], 1)
237 | ]
238 | 
239 | # If true, show URL addresses after external links.
240 | # man_show_urls = False
241 | 
242 | 
243 | # -- Options for Texinfo output -------------------------------------------
244 | 
245 | # Grouping the document tree into Texinfo files. List of tuples
246 | # (source start file, target name, title, author,
247 | #  dir menu entry, description, category)
248 | texinfo_documents = [
249 |     ('index', 'HAGuide', 'High Availability Guide',
250 |      'OpenStack contributors', 'HAGuide',
251 |      'This guide shows OpenStack operators and deployers how to configure'
252 |      'OpenStack to be robust and fault-tolerant.', 'Miscellaneous'),
253 | ]
254 | 
255 | # Documents to append as an appendix to all manuals.
256 | # texinfo_appendices = []
257 | 
258 | # If false, no module index is generated.
259 | # texinfo_domain_indices = True
260 | 
261 | # How to display URL addresses: 'footnote', 'no', or 'inline'.
262 | # texinfo_show_urls = 'footnote'
263 | 
264 | # If true, do not generate a @detailmenu in the "Top" node's menu.
265 | # texinfo_no_detailmenu = False
266 | 
267 | # -- Options for Internationalization output ------------------------------
268 | locale_dirs = ['locale/']
269 | 


--------------------------------------------------------------------------------
/doc/source/control-plane-stateful.rst:
--------------------------------------------------------------------------------
  1 | =================================
  2 | Configuring the stateful services
  3 | =================================
  4 | .. to do: scope how in depth we want these sections to be
  5 | 
  6 | Database for high availability
  7 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  8 | 
  9 | Galera
 10 | ------
 11 | 
 12 | The first step is to install the database that sits at the heart of the
 13 | cluster. To implement high availability, run an instance of the database on
 14 | each controller node and use Galera Cluster to provide replication between
 15 | them. Galera Cluster is a synchronous multi-master database cluster, based
 16 | on MySQL and the InnoDB storage engine. It is a high-availability service
 17 | that provides high system uptime, no data loss, and scalability for growth.
 18 | 
 19 | You can achieve high availability for the OpenStack database in many
 20 | different ways, depending on the type of database that you want to use.
 21 | There are three implementations of Galera Cluster available to you:
 22 | 
 23 | - `Galera Cluster for MySQL <http://galeracluster.com/>`_: The MySQL
 24 |   reference implementation from Codership, Oy.
 25 | - `MariaDB Galera Cluster <https://mariadb.org/>`_: The MariaDB
 26 |   implementation of Galera Cluster, which is commonly supported in
 27 |   environments based on Red Hat distributions.
 28 | - `Percona XtraDB Cluster <https://www.percona.com/>`_: The XtraDB
 29 |   implementation of Galera Cluster from Percona.
 30 | 
 31 | In addition to Galera Cluster, you can also achieve high availability
 32 | through other database options, such as PostgreSQL, which has its own
 33 | replication system.
 34 | 
 35 | Pacemaker active/passive with HAproxy
 36 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 37 | 
 38 | Replicated storage
 39 | ------------------
 40 | 
 41 | For example: DRBD
 42 | 
 43 | Shared storage
 44 | --------------
 45 | 
 46 | Messaging service for high availability
 47 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 48 | 
 49 | RabbitMQ
 50 | --------
 51 | 
 52 | An AMQP (Advanced Message Queuing Protocol) compliant message bus is
 53 | required for most OpenStack components in order to coordinate the
 54 | execution of jobs entered into the system.
 55 | 
 56 | The most popular AMQP implementation used in OpenStack installations
 57 | is RabbitMQ.
 58 | 
 59 | RabbitMQ nodes fail over on the application and the infrastructure layers.
 60 | 
 61 | The application layer is controlled by the ``oslo.messaging``
 62 | configuration options for multiple AMQP hosts. If the AMQP node fails,
 63 | the application reconnects to the next one configured within the
 64 | specified reconnect interval. The specified reconnect interval
 65 | constitutes its SLA.
 66 | 
 67 | On the infrastructure layer, the SLA is the time for which RabbitMQ
 68 | cluster reassembles. Several cases are possible. The Mnesia keeper
 69 | node is the master of the corresponding Pacemaker resource for
 70 | RabbitMQ. When it fails, the result is a full AMQP cluster downtime
 71 | interval. Normally, its SLA is no more than several minutes. Failure
 72 | of another node that is a slave of the corresponding Pacemaker
 73 | resource for RabbitMQ results in no AMQP cluster downtime at all.
 74 | 
 75 | .. until we've determined the content depth, I've transferred RabbitMQ
 76 |    configuration below from the old HA guide (darrenc)
 77 | 
 78 | Making the RabbitMQ service highly available involves the following steps:
 79 | 
 80 | - :ref:`Install RabbitMQ<rabbitmq-install>`
 81 | 
 82 | - :ref:`Configure RabbitMQ for HA queues<rabbitmq-configure>`
 83 | 
 84 | - :ref:`Configure OpenStack services to use RabbitMQ HA queues
 85 |   <rabbitmq-services>`
 86 | 
 87 | .. note::
 88 | 
 89 |    Access to RabbitMQ is not normally handled by HAProxy. Instead,
 90 |    consumers must be supplied with the full list of hosts running
 91 |    RabbitMQ with ``rabbit_hosts`` and turn on the ``rabbit_ha_queues``
 92 |    option. For more information, read the `core issue
 93 |    <http://people.redhat.com/jeckersb/private/vip-failover-tcp-persist.html>`_.
 94 |    For more detail, read the `history and solution
 95 |    <http://john.eckersberg.com/improving-ha-failures-with-tcp-timeouts.html>`_.
 96 | 
 97 | .. _rabbitmq-install:
 98 | 
 99 | Install RabbitMQ
100 | ^^^^^^^^^^^^^^^^
101 | 
102 | The commands for installing RabbitMQ are specific to the Linux distribution
103 | you are using.
104 | 
105 | For Ubuntu or Debian:
106 | 
107 | .. code-block: console
108 | 
109 |    # apt-get install rabbitmq-server
110 | 
111 | For RHEL, Fedora, or CentOS:
112 | 
113 | .. code-block: console
114 | 
115 |    # yum install rabbitmq-server
116 | 
117 | For openSUSE:
118 | 
119 | .. code-block: console
120 | 
121 |    # zypper install rabbitmq-server
122 | 
123 | For SLES 12:
124 | 
125 | .. code-block: console
126 | 
127 |    # zypper addrepo -f obs://Cloud:OpenStack:Kilo/SLE_12 Kilo
128 |    [Verify the fingerprint of the imported GPG key. See below.]
129 |    # zypper install rabbitmq-server
130 | 
131 | .. note::
132 | 
133 |    For SLES 12, the packages are signed by GPG key 893A90DAD85F9316.
134 |    You should verify the fingerprint of the imported GPG key before using it.
135 | 
136 |    .. code-block:: none
137 | 
138 |       Key ID: 893A90DAD85F9316
139 |       Key Name: Cloud:OpenStack OBS Project <Cloud:OpenStack@build.opensuse.org>
140 |       Key Fingerprint: 35B34E18ABC1076D66D5A86B893A90DAD85F9316
141 |       Key Created: Tue Oct  8 13:34:21 2013
142 |       Key Expires: Thu Dec 17 13:34:21 2015
143 | 
144 | For more information, see the official installation manual for the
145 | distribution:
146 | 
147 | - `Debian and Ubuntu <https://www.rabbitmq.com/install-debian.html>`_
148 | - `RPM based <https://www.rabbitmq.com/install-rpm.html>`_
149 |   (RHEL, Fedora, CentOS, openSUSE)
150 | 
151 | .. _rabbitmq-configure:
152 | 
153 | Configure RabbitMQ for HA queues
154 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
155 | 
156 | .. [TODO: This section should begin with a brief mention
157 | .. about what HA queues are and why they are valuable, etc]
158 | 
159 | .. [TODO: replace "currently" with specific release names]
160 | 
161 | .. [TODO: Does this list need to be updated? Perhaps we need a table
162 | .. that shows each component and the earliest release that allows it
163 | .. to work with HA queues.]
164 | 
165 | The following components/services can work with HA queues:
166 | 
167 | - OpenStack Compute
168 | - OpenStack Block Storage
169 | - OpenStack Networking
170 | - Telemetry
171 | 
172 | Consider that, while exchanges and bindings survive the loss of individual
173 | nodes, queues and their messages do not because a queue and its contents
174 | are located on one node. If we lose this node, we also lose the queue.
175 | 
176 | Mirrored queues in RabbitMQ improve the availability of service since
177 | it is resilient to failures.
178 | 
179 | Production servers should run (at least) three RabbitMQ servers for testing
180 | and demonstration purposes, however it is possible to run only two servers.
181 | In this section, we configure two nodes, called ``rabbit1`` and ``rabbit2``.
182 | To build a broker, ensure that all nodes have the same Erlang cookie file.
183 | 
184 | .. [TODO: Should the example instead use a minimum of three nodes?]
185 | 
186 | #. Stop RabbitMQ and copy the cookie from the first node to each of the
187 |    other node(s):
188 | 
189 |    .. code-block:: console
190 | 
191 |       # scp /var/lib/rabbitmq/.erlang.cookie root@NODE:/var/lib/rabbitmq/.erlang.cookie
192 | 
193 | #. On each target node, verify the correct owner,
194 |    group, and permissions of the file :file:`erlang.cookie`:
195 | 
196 |    .. code-block:: console
197 | 
198 |       # chown rabbitmq:rabbitmq /var/lib/rabbitmq/.erlang.cookie
199 |       # chmod 400 /var/lib/rabbitmq/.erlang.cookie
200 | 
201 | #. Start the message queue service on all nodes and configure it to start
202 |    when the system boots. On Ubuntu, it is configured by default.
203 | 
204 |    On CentOS, RHEL, openSUSE, and SLES:
205 | 
206 |    .. code-block:: console
207 | 
208 |       # systemctl enable rabbitmq-server.service
209 |       # systemctl start rabbitmq-server.service
210 | 
211 | #. Verify that the nodes are running:
212 | 
213 |    .. code-block:: console
214 | 
215 |       # rabbitmqctl cluster_status
216 |       Cluster status of node rabbit@NODE...
217 |       [{nodes,[{disc,[rabbit@NODE]}]},
218 |        {running_nodes,[rabbit@NODE]},
219 |        {partitions,[]}]
220 |       ...done.
221 | 
222 | #. Run the following commands on each node except the first one:
223 | 
224 |    .. code-block:: console
225 | 
226 |       # rabbitmqctl stop_app
227 |       Stopping node rabbit@NODE...
228 |       ...done.
229 |       # rabbitmqctl join_cluster --ram rabbit@rabbit1
230 |       # rabbitmqctl start_app
231 |       Starting node rabbit@NODE ...
232 |       ...done.
233 | 
234 | .. note::
235 | 
236 |    The default node type is a disc node. In this guide, nodes
237 |    join the cluster as RAM nodes.
238 | 
239 | #. Verify the cluster status:
240 | 
241 |    .. code-block:: console
242 | 
243 |       # rabbitmqctl cluster_status
244 |       Cluster status of node rabbit@NODE...
245 |       [{nodes,[{disc,[rabbit@rabbit1]},{ram,[rabbit@NODE]}]}, \
246 |           {running_nodes,[rabbit@NODE,rabbit@rabbit1]}]
247 | 
248 |    If the cluster is working, you can create usernames and passwords
249 |    for the queues.
250 | 
251 | #. To ensure that all queues except those with auto-generated names
252 |    are mirrored across all running nodes,
253 |    set the ``ha-mode`` policy key to all
254 |    by running the following command on one of the nodes:
255 | 
256 |    .. code-block:: console
257 | 
258 |       # rabbitmqctl set_policy ha-all '^(?!amq\.).*' '{"ha-mode": "all"}'
259 | 
260 | More information is available in the RabbitMQ documentation:
261 | 
262 | - `Highly Available Queues <https://www.rabbitmq.com/ha.html>`_
263 | - `Clustering Guide <https://www.rabbitmq.com/clustering.html>`_
264 | 
265 | .. note::
266 | 
267 |    As another option to make RabbitMQ highly available, RabbitMQ contains the
268 |    OCF scripts for the Pacemaker cluster resource agents since version 3.5.7.
269 |    It provides the active/active RabbitMQ cluster with mirrored queues.
270 |    For more information, see `Auto-configuration of a cluster with
271 |    a Pacemaker <https://www.rabbitmq.com/pacemaker.html>`_.
272 | 
273 | .. _rabbitmq-services:
274 | 
275 | Configure OpenStack services to use Rabbit HA queues
276 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
277 | 
278 | Configure the OpenStack components to use at least two RabbitMQ nodes.
279 | 
280 | Use these steps to configurate all services using RabbitMQ:
281 | 
282 | #. RabbitMQ HA cluster ``host:port`` pairs:
283 | 
284 |    .. code-block:: console
285 | 
286 |       rabbit_hosts=rabbit1:5672,rabbit2:5672,rabbit3:5672
287 | 
288 | #. Retry connecting with RabbitMQ:
289 | 
290 |    .. code-block:: console
291 | 
292 |       rabbit_retry_interval=1
293 | 
294 | #. How long to back-off for between retries when connecting to RabbitMQ:
295 | 
296 |    .. code-block:: console
297 | 
298 |       rabbit_retry_backoff=2
299 | 
300 | #. Maximum retries with trying to connect to RabbitMQ (infinite by default):
301 | 
302 |    .. code-block:: console
303 | 
304 |       rabbit_max_retries=0
305 | 
306 | #. Use durable queues in RabbitMQ:
307 | 
308 |    .. code-block:: console
309 | 
310 |       rabbit_durable_queues=true
311 | 
312 | #. Use HA queues in RabbitMQ (``x-ha-policy: all``):
313 | 
314 |    .. code-block:: console
315 | 
316 |       rabbit_ha_queues=true
317 | 
318 | .. note::
319 | 
320 |    If you change the configuration from an old set-up
321 |    that did not use HA queues, restart the service:
322 | 
323 |    .. code-block:: console
324 | 
325 |       # rabbitmqctl stop_app
326 |       # rabbitmqctl reset
327 |       # rabbitmqctl start_app
328 | 
329 | 
330 | 
331 | 
332 | 
333 | Pacemaker active/passive
334 | ------------------------
335 | 
336 | 
337 | 
338 | Mirrored queues
339 | ---------------
340 | 
341 | Qpid
342 | ----
343 | 


--------------------------------------------------------------------------------
/doc/source/control-plane-stateless.rst:
--------------------------------------------------------------------------------
  1 | ==============================
  2 | Configuring stateless services
  3 | ==============================
  4 | 
  5 | .. to do: scope what details we want on the following services
  6 | 
  7 | API services
  8 | ~~~~~~~~~~~~
  9 | 
 10 | Load-balancer
 11 | ~~~~~~~~~~~~~
 12 | 
 13 | HAProxy
 14 | -------
 15 | 
 16 | HAProxy provides a fast and reliable HTTP reverse proxy and load balancer
 17 | for TCP or HTTP applications. It is particularly suited for web crawling
 18 | under very high loads while needing persistence or Layer 7 processing.
 19 | It realistically supports tens of thousands of connections with recent
 20 | hardware.
 21 | 
 22 | Each instance of HAProxy configures its front end to accept connections only
 23 | to the virtual IP (VIP) address. The HAProxy back end (termination
 24 | point) is a list of all the IP addresses of instances for load balancing.
 25 | 
 26 | .. note::
 27 | 
 28 |    Ensure your HAProxy installation is not a single point of failure,
 29 |    it is advisable to have multiple HAProxy instances running.
 30 | 
 31 |    You can also ensure the availability by other means, using Keepalived
 32 |    or Pacemaker.
 33 | 
 34 | Alternatively, you can use a commercial load balancer, which is hardware
 35 | or software. We recommend a hardware load balancer as it generally has
 36 | good performance.
 37 | 
 38 | For detailed instructions about installing HAProxy on your nodes,
 39 | see the HAProxy `official documentation <http://www.haproxy.org/#docs>`_.
 40 | 
 41 | Configuring HAProxy
 42 | ^^^^^^^^^^^^^^^^^^^
 43 | 
 44 | #. Restart the HAProxy service.
 45 | 
 46 | #. Locate your HAProxy instance on each OpenStack controller in your
 47 |    environment. The following is an example ``/etc/haproxy/haproxy.cfg``
 48 |    configuration file. Configure your instance using the following
 49 |    configuration file, you will need a copy of it on each
 50 |    controller node.
 51 | 
 52 | 
 53 |    .. code-block:: none
 54 | 
 55 |         global
 56 |          chroot  /var/lib/haproxy
 57 |          daemon
 58 |          group  haproxy
 59 |          maxconn  4000
 60 |          pidfile  /var/run/haproxy.pid
 61 |          user  haproxy
 62 | 
 63 |        defaults
 64 |          log  global
 65 |          maxconn  4000
 66 |          option  redispatch
 67 |          retries  3
 68 |          timeout  http-request 10s
 69 |          timeout  queue 1m
 70 |          timeout  connect 10s
 71 |          timeout  client 1m
 72 |          timeout  server 1m
 73 |          timeout  check 10s
 74 | 
 75 |         listen dashboard_cluster
 76 |          bind <Virtual IP>:443
 77 |          balance  source
 78 |          option  tcpka
 79 |          option  httpchk
 80 |          option  tcplog
 81 |          server controller1 10.0.0.12:443 check inter 2000 rise 2 fall 5
 82 |          server controller2 10.0.0.13:443 check inter 2000 rise 2 fall 5
 83 |          server controller3 10.0.0.14:443 check inter 2000 rise 2 fall 5
 84 | 
 85 |         listen galera_cluster
 86 |          bind <Virtual IP>:3306
 87 |          balance  source
 88 |          option  mysql-check
 89 |          server controller1 10.0.0.12:3306 check port 9200 inter 2000 rise 2 fall 5
 90 |          server controller2 10.0.0.13:3306 backup check port 9200 inter 2000 rise 2 fall 5
 91 |          server controller3 10.0.0.14:3306 backup check port 9200 inter 2000 rise 2 fall 5
 92 | 
 93 |         listen glance_api_cluster
 94 |          bind <Virtual IP>:9292
 95 |          balance  source
 96 |          option  tcpka
 97 |          option  httpchk
 98 |          option  tcplog
 99 |          server controller1 10.0.0.12:9292 check inter 2000 rise 2 fall 5
100 |          server controller2 10.0.0.13:9292 check inter 2000 rise 2 fall 5
101 |          server controller3 10.0.0.14:9292 check inter 2000 rise 2 fall 5
102 | 
103 |         listen glance_registry_cluster
104 |          bind <Virtual IP>:9191
105 |          balance  source
106 |          option  tcpka
107 |          option  tcplog
108 |          server controller1 10.0.0.12:9191 check inter 2000 rise 2 fall 5
109 |          server controller2 10.0.0.13:9191 check inter 2000 rise 2 fall 5
110 |          server controller3 10.0.0.14:9191 check inter 2000 rise 2 fall 5
111 | 
112 |         listen keystone_admin_cluster
113 |          bind <Virtual IP>:35357
114 |          balance  source
115 |          option  tcpka
116 |          option  httpchk
117 |          option  tcplog
118 |          server controller1 10.0.0.12:35357 check inter 2000 rise 2 fall 5
119 |          server controller2 10.0.0.13:35357 check inter 2000 rise 2 fall 5
120 |          server controller3 10.0.0.14:35357 check inter 2000 rise 2 fall 5
121 | 
122 |         listen keystone_public_internal_cluster
123 |          bind <Virtual IP>:5000
124 |          balance  source
125 |          option  tcpka
126 |          option  httpchk
127 |          option  tcplog
128 |          server controller1 10.0.0.12:5000 check inter 2000 rise 2 fall 5
129 |          server controller2 10.0.0.13:5000 check inter 2000 rise 2 fall 5
130 |          server controller3 10.0.0.14:5000 check inter 2000 rise 2 fall 5
131 | 
132 |         listen nova_ec2_api_cluster
133 |          bind <Virtual IP>:8773
134 |          balance  source
135 |          option  tcpka
136 |          option  tcplog
137 |          server controller1 10.0.0.12:8773 check inter 2000 rise 2 fall 5
138 |          server controller2 10.0.0.13:8773 check inter 2000 rise 2 fall 5
139 |          server controller3 10.0.0.14:8773 check inter 2000 rise 2 fall 5
140 | 
141 |         listen nova_compute_api_cluster
142 |          bind <Virtual IP>:8774
143 |          balance  source
144 |          option  tcpka
145 |          option  httpchk
146 |          option  tcplog
147 |          server controller1 10.0.0.12:8774 check inter 2000 rise 2 fall 5
148 |          server controller2 10.0.0.13:8774 check inter 2000 rise 2 fall 5
149 |          server controller3 10.0.0.14:8774 check inter 2000 rise 2 fall 5
150 | 
151 |         listen nova_metadata_api_cluster
152 |          bind <Virtual IP>:8775
153 |          balance  source
154 |          option  tcpka
155 |          option  tcplog
156 |          server controller1 10.0.0.12:8775 check inter 2000 rise 2 fall 5
157 |          server controller2 10.0.0.13:8775 check inter 2000 rise 2 fall 5
158 |          server controller3 10.0.0.14:8775 check inter 2000 rise 2 fall 5
159 | 
160 |         listen cinder_api_cluster
161 |          bind <Virtual IP>:8776
162 |          balance  source
163 |          option  tcpka
164 |          option  httpchk
165 |          option  tcplog
166 |          server controller1 10.0.0.12:8776 check inter 2000 rise 2 fall 5
167 |          server controller2 10.0.0.13:8776 check inter 2000 rise 2 fall 5
168 |          server controller3 10.0.0.14:8776 check inter 2000 rise 2 fall 5
169 | 
170 |         listen ceilometer_api_cluster
171 |          bind <Virtual IP>:8777
172 |          balance  source
173 |          option  tcpka
174 |          option  tcplog
175 |          server controller1 10.0.0.12:8777 check inter 2000 rise 2 fall 5
176 |          server controller2 10.0.0.13:8777 check inter 2000 rise 2 fall 5
177 |          server controller3 10.0.0.14:8777 check inter 2000 rise 2 fall 5
178 | 
179 |         listen nova_vncproxy_cluster
180 |          bind <Virtual IP>:6080
181 |          balance  source
182 |          option  tcpka
183 |          option  tcplog
184 |          server controller1 10.0.0.12:6080 check inter 2000 rise 2 fall 5
185 |          server controller2 10.0.0.13:6080 check inter 2000 rise 2 fall 5
186 |          server controller3 10.0.0.14:6080 check inter 2000 rise 2 fall 5
187 | 
188 |         listen neutron_api_cluster
189 |          bind <Virtual IP>:9696
190 |          balance  source
191 |          option  tcpka
192 |          option  httpchk
193 |          option  tcplog
194 |          server controller1 10.0.0.12:9696 check inter 2000 rise 2 fall 5
195 |          server controller2 10.0.0.13:9696 check inter 2000 rise 2 fall 5
196 |          server controller3 10.0.0.14:9696 check inter 2000 rise 2 fall 5
197 | 
198 |         listen swift_proxy_cluster
199 |          bind <Virtual IP>:8080
200 |          balance  source
201 |          option  tcplog
202 |          option  tcpka
203 |          server controller1 10.0.0.12:8080 check inter 2000 rise 2 fall 5
204 |          server controller2 10.0.0.13:8080 check inter 2000 rise 2 fall 5
205 |          server controller3 10.0.0.14:8080 check inter 2000 rise 2 fall 5
206 | 
207 |    .. note::
208 | 
209 |       The Galera cluster configuration directive ``backup`` indicates
210 |       that two of the three controllers are standby nodes.
211 |       This ensures that only one node services write requests
212 |       because OpenStack support for multi-node writes is not yet production-ready.
213 | 
214 |    .. note::
215 | 
216 |       The Telemetry API service configuration does not have the ``option httpchk``
217 |       directive as it cannot process this check properly.
218 | 
219 | .. TODO: explain why the Telemetry API is so special
220 | 
221 | #. Configure the kernel parameter to allow non-local IP binding. This allows
222 |    running HAProxy instances to bind to a VIP for failover. Add following line
223 |    to ``/etc/sysctl.conf``:
224 | 
225 |    .. code-block:: none
226 | 
227 |       net.ipv4.ip_nonlocal_bind = 1
228 | 
229 | #. Restart the host or, to make changes work immediately, invoke:
230 | 
231 |    .. code-block:: console
232 | 
233 |       $ sysctl -p
234 | 
235 | #. Add HAProxy to the cluster and ensure the VIPs can only run on machines
236 |    where HAProxy is active:
237 | 
238 |    ``pcs``
239 | 
240 |    .. code-block:: console
241 | 
242 |       $ pcs resource create lb-haproxy systemd:haproxy --clone
243 |       $ pcs constraint order start vip then lb-haproxy-clone kind=Optional
244 |       $ pcs constraint colocation add lb-haproxy-clone with vip
245 | 
246 |    ``crmsh``
247 | 
248 |    .. code-block:: console
249 | 
250 |       $ crm cib new conf-haproxy
251 |       $ crm configure primitive haproxy lsb:haproxy op monitor interval="1s"
252 |       $ crm configure clone haproxy-clone haproxy
253 |       $ crm configure colocation vip-with-haproxy inf: vip haproxy-clone
254 |       $ crm configure order haproxy-after-vip mandatory: vip haproxy-clone
255 | 
256 | 
257 | Pacemaker versus systemd
258 | ------------------------
259 | 
260 | Memcached
261 | ---------
262 | 
263 | Memcached is a general-purpose distributed memory caching system. It
264 | is used to speed up dynamic database-driven websites by caching data
265 | and objects in RAM to reduce the number of times an external data
266 | source must be read.
267 | 
268 | Memcached is a memory cache demon that can be used by most OpenStack
269 | services to store ephemeral data, such as tokens.
270 | 
271 | Access to Memcached is not handled by HAProxy because replicated
272 | access is currently in an experimental state. Instead, OpenStack
273 | services must be supplied with the full list of hosts running
274 | Memcached.
275 | 
276 | The Memcached client implements hashing to balance objects among the
277 | instances. Failure of an instance impacts only a percentage of the
278 | objects and the client automatically removes it from the list of
279 | instances. The SLA is several minutes.
280 | 
281 | 
282 | Highly available API services
283 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
284 | 
285 | Identity API
286 | ------------
287 | 
288 | Ensure you have read the
289 | `OpenStack Identity service getting started documentation
290 | <https://docs.openstack.org/admin-guide/common/get-started-identity.html>`_.
291 | 
292 | .. to do: reference controller-ha-identity and see if section involving
293 |    adding to pacemaker is in scope
294 | 
295 | 
296 | Add OpenStack Identity resource to Pacemaker
297 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
298 | 
299 | The following section(s) detail how to add the Identity service
300 | to Pacemaker on SUSE and Red Hat.
301 | 
302 | SUSE
303 | ----
304 | 
305 | SUSE Enterprise Linux and SUSE-based distributions, such as openSUSE,
306 | use a set of OCF agents for controlling OpenStack services.
307 | 
308 | #. Run the following commands to download the OpenStack Identity resource
309 |    to Pacemaker:
310 | 
311 |    .. code-block:: console
312 | 
313 |       # cd /usr/lib/ocf/resource.d
314 |       # mkdir openstack
315 |       # cd openstack
316 |       # wget https://opendev.org/x/openstack-resource-agents/raw/branch/master/ocf/keystone
317 |       # chmod a+rx *
318 | 
319 | #. Add the Pacemaker configuration for the OpenStack Identity resource
320 |    by running the following command to connect to the Pacemaker cluster:
321 | 
322 |    .. code-block:: console
323 | 
324 |       # crm configure
325 | 
326 | #. Add the following cluster resources:
327 | 
328 |    .. code-block:: console
329 | 
330 |       clone p_keystone ocf:openstack:keystone \
331 |       params config="/etc/keystone/keystone.conf" os_password="secretsecret" os_username="admin" os_tenant_name="admin" os_auth_url="http://10.0.0.11:5000/v2.0/" \
332 |       op monitor interval="30s" timeout="30s"
333 | 
334 |    .. note::
335 | 
336 |       This configuration creates ``p_keystone``,
337 |       a resource for managing the OpenStack Identity service.
338 | 
339 | #. Commit your configuration changes from the :command:`crm configure` menu
340 |    with the following command:
341 | 
342 |    .. code-block:: console
343 | 
344 |       # commit
345 | 
346 |    The :command:`crm configure` supports batch input. You may have to copy and
347 |    paste the above lines into your live Pacemaker configuration, and then make
348 |    changes as required.
349 | 
350 |    For example, you may enter ``edit p_ip_keystone`` from the
351 |    :command:`crm configure` menu and edit the resource to match your preferred
352 |    virtual IP address.
353 | 
354 |    Pacemaker now starts the OpenStack Identity service and its dependent
355 |    resources on all of your nodes.
356 | 
357 | Red Hat
358 | --------
359 | 
360 | For Red Hat Enterprise Linux and Red Hat-based Linux distributions,
361 | the following process uses Systemd unit files.
362 | 
363 | .. code-block:: console
364 | 
365 |    # pcs resource create openstack-keystone systemd:openstack-keystone --clone interleave=true
366 | 
367 | .. _identity-config-identity:
368 | 
369 | Configure OpenStack Identity service
370 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
371 | 
372 | #. Edit the :file:`keystone.conf` file
373 |    to change the values of the :manpage:`bind(2)` parameters:
374 | 
375 |    .. code-block:: ini
376 | 
377 |       bind_host = 10.0.0.12
378 |       public_bind_host = 10.0.0.12
379 |       admin_bind_host = 10.0.0.12
380 | 
381 |    The ``admin_bind_host`` parameter
382 |    lets you use a private network for admin access.
383 | 
384 | #. To be sure that all data is highly available,
385 |    ensure that everything is stored in the MySQL database
386 |    (which is also highly available):
387 | 
388 |    .. code-block:: ini
389 | 
390 |       [catalog]
391 |       driver = keystone.catalog.backends.sql.Catalog
392 |       # ...
393 |       [identity]
394 |       driver = keystone.identity.backends.sql.Identity
395 |       # ...
396 | 
397 | #. If the Identity service will be sending ceilometer notifications
398 |    and your message bus is configured for high availability, you will
399 |    need to ensure that the Identity service is correctly configured to
400 |    use it.
401 | 
402 | .. _identity-services-config:
403 | 
404 | Configure OpenStack services to use the highly available OpenStack Identity
405 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
406 | 
407 | Your OpenStack services now point their OpenStack Identity configuration
408 | to the highly available virtual cluster IP address.
409 | 
410 | #. For OpenStack Compute service, (if your OpenStack Identity service
411 |    IP address is 10.0.0.11) use the following configuration in the
412 |    :file:`api-paste.ini` file:
413 | 
414 |   .. code-block:: ini
415 | 
416 |      auth_host = 10.0.0.11
417 | 
418 | #. Create the OpenStack Identity Endpoint with this IP address.
419 | 
420 |    .. note::
421 | 
422 |       If you are using both private and public IP addresses,
423 |       create two virtual IP addresses and define the endpoint. For
424 |       example:
425 | 
426 |    .. code-block:: console
427 | 
428 |       $ openstack endpoint create --region $KEYSTONE_REGION \
429 |       $service-type public http://PUBLIC_VIP:5000/v2.0
430 |       $ openstack endpoint create --region $KEYSTONE_REGION \
431 |       $service-type admin http://10.0.0.11:35357/v2.0
432 |       $ openstack endpoint create --region $KEYSTONE_REGION \
433 |       $service-type internal http://10.0.0.11:5000/v2.0
434 | 
435 | #. If you are using Dashboard (horizon), edit the :file:`local_settings.py`
436 |    file to include the following:
437 | 
438 |       .. code-block:: ini
439 | 
440 |          OPENSTACK_HOST = 10.0.0.11
441 | 
442 | 
443 | Telemetry API
444 | -------------
445 | 
446 | The Telemetry polling agent can be configured to partition its polling
447 | workload between multiple agents. This enables high availability (HA).
448 | 
449 | Both the central and the compute agent can run in an HA deployment.
450 | This means that multiple instances of these services can run in
451 | parallel with workload partitioning among these running instances.
452 | 
453 | The `Tooz <https://pypi.org/project/tooz>`_ library provides
454 | the coordination within the groups of service instances.
455 | It provides an API above several back ends that can be used for building
456 | distributed applications.
457 | 
458 | Tooz supports
459 | `various drivers <https://docs.openstack.org/tooz/latest/user/drivers.html>`_
460 | including the following back end solutions:
461 | 
462 | * `Zookeeper <http://zookeeper.apache.org/>`_:
463 |     Recommended solution by the Tooz project.
464 | 
465 | * `Redis <http://redis.io/>`_:
466 |     Recommended solution by the Tooz project.
467 | 
468 | * `Memcached <http://memcached.org/>`_:
469 |     Recommended for testing.
470 | 
471 | You must configure a supported Tooz driver for the HA deployment of
472 | the Telemetry services.
473 | 
474 | For information about the required configuration options
475 | to set in the :file:`ceilometer.conf`, see the `coordination section
476 | <https://docs.openstack.org/ocata/config-reference/telemetry.html>`_
477 | in the OpenStack Configuration Reference.
478 | 
479 | .. note::
480 | 
481 |    Only one instance for the central and compute agent service(s) is able
482 |    to run and function correctly if the ``backend_url`` option is not set.
483 | 
484 | The availability check of the instances is provided by heartbeat messages.
485 | When the connection with an instance is lost, the workload will be
486 | reassigned within the remaining instances in the next polling cycle.
487 | 
488 | .. note::
489 | 
490 |    Memcached uses a timeout value, which should always be set to
491 |    a value that is higher than the heartbeat value set for Telemetry.
492 | 
493 | For backward compatibility and supporting existing deployments, the central
494 | agent configuration supports using different configuration files. This is for
495 | groups of service instances that are running in parallel.
496 | For enabling this configuration, set a value for the
497 | ``partitioning_group_prefix`` option in the
498 | `polling section <https://docs.openstack.org/ocata/config-reference/telemetry/telemetry-config-options.html>`_
499 | in the OpenStack Configuration Reference.
500 | 
501 | .. warning::
502 | 
503 |    For each sub-group of the central agent pool with the same
504 |    ``partitioning_group_prefix``, a disjoint subset of meters must be polled
505 |    to avoid samples being missing or duplicated. The list of meters to poll
506 |    can be set in the :file:`/etc/ceilometer/pipeline.yaml` configuration file.
507 |    For more information about pipelines see the `Data processing and pipelines
508 |    <https://docs.openstack.org/admin-guide/telemetry-data-pipelines.html>`_
509 |    section.
510 | 
511 | To enable the compute agent to run multiple instances simultaneously with
512 | workload partitioning, the ``workload_partitioning`` option must be set to
513 | ``True`` under the `compute section <https://docs.openstack.org/ocata/config-reference/telemetry.html>`_
514 | in the :file:`ceilometer.conf` configuration file.
515 | 
516 | 
517 | .. To Do: Cover any other projects here with API services which require specific
518 |    HA details.
519 | 


--------------------------------------------------------------------------------
/doc/source/control-plane.rst:
--------------------------------------------------------------------------------
 1 | ===========================
 2 | Configuring a control plane
 3 | ===========================
 4 | 
 5 | .. toctree::
 6 |    :maxdepth: 2
 7 | 
 8 |    control-plane-stateless.rst
 9 |    control-plane-stateful.rst
10 | 


--------------------------------------------------------------------------------
/doc/source/figures/Cluster-deployment-collapsed.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openstack/ha-guide/7154c29fb7c810496f4913c9d3ffa738f56d3afe/doc/source/figures/Cluster-deployment-collapsed.png


--------------------------------------------------------------------------------
/doc/source/figures/Cluster-deployment-segregated.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/openstack/ha-guide/7154c29fb7c810496f4913c9d3ffa738f56d3afe/doc/source/figures/Cluster-deployment-segregated.png


--------------------------------------------------------------------------------
/doc/source/ha-community.rst:
--------------------------------------------------------------------------------
 1 | ============
 2 | HA community
 3 | ============
 4 | 
 5 | The OpenStack HA community holds `weekly IRC meetings
 6 | <https://wiki.openstack.org/wiki/Meetings/HATeamMeeting>`_ to discuss
 7 | a range of topics relating to HA in OpenStack. Everyone interested is
 8 | encouraged to attend. The `logs of all previous meetings
 9 | <http://eavesdrop.openstack.org/meetings/ha/>`_ are available to read.
10 | 
11 | You can contact the HA community directly in `the #openstack-ha
12 | channel on Freenode IRC <https://wiki.openstack.org/wiki/IRC>`_, or by
13 | sending mail to the `openstack-dev
14 | <https://wiki.openstack.org/wiki/Mailing_Lists#Future_Development>`_
15 | mailing list with the ``[HA]`` prefix in the ``Subject`` header.
16 | 


--------------------------------------------------------------------------------
/doc/source/index.rst:
--------------------------------------------------------------------------------
 1 | =================================
 2 | OpenStack High Availability Guide
 3 | =================================
 4 | 
 5 | Abstract
 6 | ~~~~~~~~
 7 | 
 8 | This guide describes how to install and configure OpenStack for high
 9 | availability. It supplements the Installation Guides
10 | and assumes that you are familiar with the material in those guides.
11 | 
12 | .. warning::
13 | 
14 |    This guide is a work-in-progress and changing rapidly while we
15 |    continue to test and enhance the guidance.  There are open TODO
16 |    items throughout the guide which will be tracked on
17 |    `the ha-guide Storyboard site
18 |    <https://storyboard.openstack.org/#!/project/openstack/ha-guide>`_.
19 |    There is also a `bug list corresponding to the old version of the
20 |    guide
21 |    <https://bugs.launchpad.net/openstack-manuals?field.tag=ha-guide>`_
22 |    which need to be triaged, as some of those bugs may still be
23 |    relevant in which case they need to be ported over to Storyboard.
24 |    Please help where you are able.
25 | 
26 | .. toctree::
27 |    :maxdepth: 1
28 | 
29 |    common/conventions.rst
30 |    overview.rst
31 |    intro-ha.rst
32 |    intro-os-ha.rst
33 |    control-plane.rst
34 |    networking-ha.rst
35 |    storage-ha.rst
36 |    compute-node-ha.rst
37 |    monitoring.rst
38 |    testing.rst
39 |    ref-arch-examples.rst
40 |    ha-community.rst
41 |    common/appendix.rst
42 | 


--------------------------------------------------------------------------------
/doc/source/intro-ha-common-tech.rst:
--------------------------------------------------------------------------------
  1 | ========================
  2 | Commonly used technology
  3 | ========================
  4 | High availability can be achieved only on system level, while both hardware and
  5 | software components can contribute to the system level availability.
  6 | This document lists the most common hardware and software technologies
  7 | that can be used to build a highly available system.
  8 | 
  9 | Hardware
 10 | ~~~~~~~~
 11 | Using different technologies to enable high availability on the hardware
 12 | level provides a good basis to build a high available system. The next chapters
 13 | discuss the most common technologies used in this field.
 14 | 
 15 | Redundant switches
 16 | ------------------
 17 | Network switches are single point of failures as networking is critical to
 18 | operate all other basic domains of the infrastructure, like compute and
 19 | storage. Network switches need to be able to forward the network traffic
 20 | and be able to forward the traffic to a working next hop.
 21 | For these reasons consider the following two factors when making a network
 22 | switch redundant:
 23 | 
 24 | #. The network switch itself should synchronize its internal state to a
 25 |    redundant switch either in active/active or active/passive way.
 26 | 
 27 | #. The network topology should be designed in a way that the network router can
 28 |    use at least two paths in every critical direction.
 29 | 
 30 | Bonded interfaces
 31 | -----------------
 32 | Bonded interfaces are two independent physical network interfaces handled as
 33 | one interface in active/passive or in active/active redundancy mode. In
 34 | active/passive mode, if an error happens in the active network interface or in
 35 | the remote end of the interface, the interfaces are switched over. In
 36 | active/active mode, when an error happens in an interface or in the remote end
 37 | of an interface, then the interface is marked as unavailable and ceases to be
 38 | used.
 39 | 
 40 | Load balancers
 41 | --------------
 42 | Physical load balancers are special routers which direct the traffic in
 43 | different directions based on a set of rules. Load balancers can be in
 44 | redundant mode similarly to the physical switches.
 45 | Load balancers are also important for distributing the traffic to the different
 46 | active/active components of the system.
 47 | 
 48 | Storage
 49 | -------
 50 | Physical storage high availability can be achieved with different scopes:
 51 | 
 52 | #. High availability within a hardware unit with redundant disks (mostly
 53 |    organized into different RAID configurations), redundant control components,
 54 |    redundant I/O interfaces and redundant power supply.
 55 | 
 56 | #. System level high availability with redundant hardware units with data
 57 |    replication.
 58 | 
 59 | Software
 60 | ~~~~~~~~
 61 | 
 62 | HAproxy
 63 | -------
 64 | 
 65 | HAProxy provides a fast and reliable HTTP reverse proxy and load balancer
 66 | for TCP or HTTP applications. It is particularly suited for web crawling
 67 | under very high loads while needing persistence or Layer 7 processing.
 68 | It realistically supports tens of thousands of connections with recent
 69 | hardware.
 70 | 
 71 | .. note::
 72 | 
 73 |    Ensure your HAProxy installation is not a single point of failure,
 74 |    it is advisable to have multiple HAProxy instances running.
 75 | 
 76 |    You can also ensure the availability by other means, using Keepalived
 77 |    or Pacemaker.
 78 | 
 79 | Alternatively, you can use a commercial load balancer, which is hardware
 80 | or software. We recommend a hardware load balancer as it generally has
 81 | good performance.
 82 | 
 83 | For detailed instructions about installing HAProxy on your nodes,
 84 | see the HAProxy `official documentation <http://www.haproxy.org/#docs>`_.
 85 | 
 86 | keepalived
 87 | ----------
 88 | 
 89 | `keepalived <http://www.keepalived.org/>`_ is a routing software that
 90 | provides facilities for load balancing and high-availability to Linux
 91 | system and Linux based infrastructures.
 92 | 
 93 | Keepalived implements a set of checkers to dynamically and
 94 | adaptively maintain and manage loadbalanced server pool according
 95 | their health.
 96 | 
 97 | The keepalived daemon can be used to monitor services or systems and
 98 | to automatically failover to a standby if problems occur.
 99 | 
100 | Pacemaker
101 | ---------
102 | 
103 | `Pacemaker <http://clusterlabs.org/>`_ cluster stack is a state-of-the-art
104 | high availability and load balancing stack for the Linux platform.
105 | Pacemaker is used to make OpenStack infrastructure highly available.
106 | 
107 | Pacemaker relies on the
108 | `Corosync <http://corosync.github.io/corosync/>`_ messaging layer
109 | for reliable cluster communications. Corosync implements the Totem single-ring
110 | ordering and membership protocol. It also provides UDP and InfiniBand based
111 | messaging, quorum, and cluster membership to Pacemaker.
112 | 
113 | Pacemaker does not inherently understand the applications it manages.
114 | Instead, it relies on resource agents (RAs) that are scripts that encapsulate
115 | the knowledge of how to start, stop, and check the health of each application
116 | managed by the cluster.
117 | 
118 | These agents must conform to one of the `OCF <https://github.com/ClusterLabs/
119 | OCF-spec/blob/master/ra/resource-agent-api.md>`_,
120 | `SysV Init <http://refspecs.linux-foundation.org/LSB_3.0.0/LSB-Core-generic/
121 | LSB-Core-generic/iniscrptact.html>`_, Upstart, or Systemd standards.
122 | 
123 | Pacemaker ships with a large set of OCF agents (such as those managing
124 | MySQL databases, virtual IP addresses, and RabbitMQ), but can also use
125 | any agents already installed on your system and can be extended with
126 | your own (see the
127 | `developer guide <http://www.linux-ha.org/doc/dev-guides/ra-dev-guide.html>`_).
128 | 


--------------------------------------------------------------------------------
/doc/source/intro-ha-key-concepts.rst:
--------------------------------------------------------------------------------
  1 | ============
  2 | Key concepts
  3 | ============
  4 | 
  5 | Redundancy and failover
  6 | ~~~~~~~~~~~~~~~~~~~~~~~
  7 | 
  8 | High availability is implemented with redundant hardware
  9 | running redundant instances of each service.
 10 | If one piece of hardware running one instance of a service fails,
 11 | the system can then failover to use another instance of a service
 12 | that is running on hardware that did not fail.
 13 | 
 14 | A crucial aspect of high availability
 15 | is the elimination of single points of failure (SPOFs).
 16 | A SPOF is an individual piece of equipment or software
 17 | that causes system downtime or data loss if it fails.
 18 | In order to eliminate SPOFs, check that mechanisms exist for redundancy of:
 19 | 
 20 | - Network components, such as switches and routers
 21 | 
 22 | - Applications and automatic service migration
 23 | 
 24 | - Storage components
 25 | 
 26 | - Facility services such as power, air conditioning, and fire protection
 27 | 
 28 | In the event that a component fails and a back-up system must take on
 29 | its load, most high availability systems will replace the failed
 30 | component as quickly as possible to maintain necessary redundancy. This
 31 | way time spent in a degraded protection state is minimized.
 32 | 
 33 | Most high availability systems fail in the event of multiple
 34 | independent (non-consequential) failures. In this case, most
 35 | implementations favor protecting data over maintaining availability.
 36 | 
 37 | High availability systems typically achieve an uptime percentage of
 38 | 99.99% or more, which roughly equates to less than an hour of
 39 | cumulative downtime per year. In order to achieve this, high
 40 | availability systems should keep recovery times after a failure to
 41 | about one to two minutes, sometimes significantly less.
 42 | 
 43 | OpenStack currently meets such availability requirements for its own
 44 | infrastructure services, meaning that an uptime of 99.99% is feasible
 45 | for the OpenStack infrastructure proper. However, OpenStack does not
 46 | guarantee 99.99% availability for individual guest instances.
 47 | 
 48 | This document discusses some common methods of implementing highly
 49 | available systems, with an emphasis on the core OpenStack services and
 50 | other open source services that are closely aligned with OpenStack.
 51 | 
 52 | You will need to address high availability concerns for any applications
 53 | software that you run on your OpenStack environment. The important thing is
 54 | to make sure that your services are redundant and available.
 55 | How you achieve that is up to you.
 56 | 
 57 | Active/passive versus active/active
 58 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 59 | 
 60 | Stateful services can be configured as active/passive or active/active,
 61 | which are defined as follows:
 62 | 
 63 | :term:`active/passive configuration`
 64 |   Maintains a redundant instance
 65 |   that can be brought online when the active service fails.
 66 |   For example, OpenStack writes to the main database
 67 |   while maintaining a disaster recovery database that can be brought online
 68 |   if the main database fails.
 69 | 
 70 |   A typical active/passive installation for a stateful service maintains
 71 |   a replacement resource that can be brought online when required.
 72 |   Requests are handled using a :term:`virtual IP address (VIP)` that
 73 |   facilitates returning to service with minimal reconfiguration.
 74 |   A separate application (such as Pacemaker or Corosync) monitors
 75 |   these services, bringing the backup online as necessary.
 76 | 
 77 | :term:`active/active configuration`
 78 |   Each service also has a backup but manages both the main and
 79 |   redundant systems concurrently.
 80 |   This way, if there is a failure, the user is unlikely to notice.
 81 |   The backup system is already online and takes on increased load
 82 |   while the main system is fixed and brought back online.
 83 | 
 84 |   Typically, an active/active installation for a stateless service
 85 |   maintains a redundant instance, and requests are load balanced using
 86 |   a virtual IP address and a load balancer such as HAProxy.
 87 | 
 88 |   A typical active/active installation for a stateful service includes
 89 |   redundant services, with all instances having an identical state. In
 90 |   other words, updates to one instance of a database update all other
 91 |   instances. This way a request to one instance is the same as a
 92 |   request to any other. A load balancer manages the traffic to these
 93 |   systems, ensuring that operational systems always handle the
 94 |   request.
 95 | 
 96 | Clusters and quorums
 97 | ~~~~~~~~~~~~~~~~~~~~
 98 | 
 99 | The quorum specifies the minimal number of nodes
100 | that must be functional in a cluster of redundant nodes
101 | in order for the cluster to remain functional.
102 | When one node fails and failover transfers control to other nodes,
103 | the system must ensure that data and processes remain sane.
104 | To determine this, the contents of the remaining nodes are compared
105 | and, if there are discrepancies, a majority rules algorithm is implemented.
106 | 
107 | For this reason, each cluster in a high availability environment should
108 | have an odd number of nodes and the quorum is defined as more than a half
109 | of the nodes.
110 | If multiple nodes fail so that the cluster size falls below the quorum
111 | value, the cluster itself fails.
112 | 
113 | For example, in a seven-node cluster, the quorum should be set to
114 | ``floor(7/2) + 1 == 4``. If quorum is four and four nodes fail simultaneously,
115 | the cluster itself would fail, whereas it would continue to function, if
116 | no more than three nodes fail. If split to partitions of three and four nodes
117 | respectively, the quorum of four nodes would continue to operate the majority
118 | partition and stop or fence the minority one (depending on the
119 | no-quorum-policy cluster configuration).
120 | 
121 | And the quorum could also have been set to three, just as a configuration
122 | example.
123 | 
124 | .. note::
125 | 
126 |   We do not recommend setting the quorum to a value less than ``floor(n/2) + 1``
127 |   as it would likely cause a split-brain in a face of network partitions.
128 | 
129 | When four nodes fail simultaneously, the cluster would continue to function as
130 | well. But if split to partitions of three and four nodes respectively, the
131 | quorum of three would have made both sides to attempt to fence the other and
132 | host resources. Without fencing enabled, it would go straight to running
133 | two copies of each resource.
134 | 
135 | This is why setting the quorum to a value less than ``floor(n/2) + 1`` is
136 | dangerous. However it may be required for some specific cases, such as a
137 | temporary measure at a point it is known with 100% certainty that the other
138 | nodes are down.
139 | 
140 | When configuring an OpenStack environment for study or demonstration purposes,
141 | it is possible to turn off the quorum checking. Production systems should
142 | always run with quorum enabled.
143 | 
144 | Load balancing
145 | ~~~~~~~~~~~~~~
146 | 
147 | .. to do: definition and description of need within HA
148 | 


--------------------------------------------------------------------------------
/doc/source/intro-ha.rst:
--------------------------------------------------------------------------------
 1 | =================================
 2 | Introduction to high availability
 3 | =================================
 4 | 
 5 | High availability systems seek to minimize the following issues:
 6 | 
 7 | #. System downtime: Occurs when a user-facing service is unavailable
 8 |    beyond a specified maximum amount of time.
 9 | 
10 | #. Data loss: Accidental deletion or destruction of data.
11 | 
12 | Most high availability systems guarantee protection against system downtime
13 | and data loss only in the event of a single failure.
14 | However, they are also expected to protect against cascading failures,
15 | where a single failure deteriorates into a series of consequential failures.
16 | Many service providers guarantee a :term:`Service Level Agreement (SLA)`
17 | including uptime percentage of computing service, which is calculated based
18 | on the available time and system downtime excluding planned outage time.
19 | 
20 | .. toctree::
21 |    :maxdepth: 2
22 | 
23 |    intro-ha-key-concepts.rst
24 |    intro-ha-common-tech.rst
25 | 


--------------------------------------------------------------------------------
/doc/source/intro-os-ha-cluster.rst:
--------------------------------------------------------------------------------
 1 | ================
 2 | Cluster managers
 3 | ================
 4 | 
 5 | At its core, a cluster is a distributed finite state machine capable
 6 | of co-ordinating the startup and recovery of inter-related services
 7 | across a set of machines.
 8 | 
 9 | Even a distributed or replicated application that is able to survive failures
10 | on one or more machines can benefit from a cluster manager because a cluster
11 | manager has the following capabilities:
12 | 
13 | #. Awareness of other applications in the stack
14 | 
15 |    While SYS-V init replacements like systemd can provide
16 |    deterministic recovery of a complex stack of services, the
17 |    recovery is limited to one machine and lacks the context of what
18 |    is happening on other machines. This context is crucial to
19 |    determine the difference between a local failure, and clean startup
20 |    and recovery after a total site failure.
21 | 
22 | #. Awareness of instances on other machines
23 | 
24 |    Services like RabbitMQ and Galera have complicated boot-up
25 |    sequences that require co-ordination, and often serialization, of
26 |    startup operations across all machines in the cluster. This is
27 |    especially true after a site-wide failure or shutdown where you must
28 |    first determine the last machine to be active.
29 | 
30 | #. A shared implementation and calculation of `quorum
31 |    <https://en.wikipedia.org/wiki/Quorum_(Distributed_Systems)>`_
32 | 
33 |    It is very important that all members of the system share the same
34 |    view of who their peers are and whether or not they are in the
35 |    majority. Failure to do this leads very quickly to an internal
36 |    `split-brain <https://en.wikipedia.org/wiki/Split-brain_(computing)>`_
37 |    state. This is where different parts of the system are pulling in
38 |    different and incompatible directions.
39 | 
40 | #. Data integrity through fencing (a non-responsive process does not
41 |    imply it is not doing anything)
42 | 
43 |    A single application does not have sufficient context to know the
44 |    difference between failure of a machine and failure of the
45 |    application on a machine. The usual practice is to assume the
46 |    machine is dead and continue working, however this is highly risky. A
47 |    rogue process or machine could still be responding to requests and
48 |    generally causing havoc. The safer approach is to make use of
49 |    remotely accessible power switches and/or network switches and SAN
50 |    controllers to fence (isolate) the machine before continuing.
51 | 
52 | #. Automated recovery of failed instances
53 | 
54 |    While the application can still run after the failure of several
55 |    instances, it may not have sufficient capacity to serve the
56 |    required volume of requests. A cluster can automatically recover
57 |    failed instances to prevent additional load induced failures.
58 | 
59 | Pacemaker
60 | ~~~~~~~~~
61 | .. to do: description and point to ref arch example using pacemaker
62 | 
63 | `Pacemaker <http://clusterlabs.org>`_.
64 | 
65 | Systemd
66 | ~~~~~~~
67 | .. to do: description and point to ref arch example using Systemd and link
68 | 


--------------------------------------------------------------------------------
/doc/source/intro-os-ha-memcached.rst:
--------------------------------------------------------------------------------
 1 | =========
 2 | Memcached
 3 | =========
 4 | 
 5 | Most OpenStack services can use Memcached to store ephemeral data such as
 6 | tokens. Although Memcached does not support typical forms of redundancy such
 7 | as clustering, OpenStack services can use almost any number of instances
 8 | by configuring multiple hostnames or IP addresses.
 9 | 
10 | The Memcached client implements hashing to balance objects among the instances.
11 | Failure of an instance only impacts a percentage of the objects,
12 | and the client automatically removes it from the list of instances.
13 | 
14 | Installation
15 | ~~~~~~~~~~~~
16 | 
17 | To install and configure Memcached, read the
18 | `official documentation <https://github.com/Memcached/Memcached/wiki#getting-started>`_.
19 | 
20 | Memory caching is managed by `oslo.cache
21 | <http://specs.openstack.org/openstack/oslo-specs/specs/kilo/oslo-cache-using-dogpile.html>`_.
22 | This ensures consistency across all projects when using multiple Memcached
23 | servers. The following is an example configuration with three hosts:
24 | 
25 | .. code-block:: ini
26 | 
27 |   Memcached_servers = controller1:11211,controller2:11211,controller3:11211
28 | 
29 | By default, ``controller1`` handles the caching service. If the host goes down,
30 | ``controller2`` or ``controller3`` will complete the service.
31 | 
32 | For more information about Memcached installation, see the
33 | *Environment -> Memcached* section in the
34 | `Installation Guides <https://docs.openstack.org/ocata/install/>`_
35 | depending on your distribution.
36 | 


--------------------------------------------------------------------------------
/doc/source/intro-os-ha-state.rst:
--------------------------------------------------------------------------------
 1 | ==================================
 2 | Stateless versus stateful services
 3 | ==================================
 4 | 
 5 | OpenStack components can be divided into three categories:
 6 | 
 7 | - OpenStack APIs: APIs that are HTTP(s) stateless services written in python,
 8 |   easy to duplicate and mostly easy to load balance.
 9 | 
10 | - The SQL relational database server provides stateful type consumed by other
11 |   components. Supported databases are MySQL, MariaDB, and PostgreSQL.
12 |   Making the SQL database redundant is complex.
13 | 
14 | - :term:`Advanced Message Queuing Protocol (AMQP)` provides OpenStack
15 |   internal stateful communication service.
16 | 
17 | .. to do: Ensure the difference between stateless and stateful services
18 | .. is clear
19 | 
20 | Stateless services
21 | ~~~~~~~~~~~~~~~~~~
22 | 
23 | A service that provides a response after your request and then
24 | requires no further attention. To make a stateless service highly
25 | available, you need to provide redundant instances and load balance them.
26 | 
27 | Stateless OpenStack services
28 | ----------------------------
29 | 
30 | OpenStack services that are stateless include ``nova-api``,
31 | ``nova-conductor``, ``glance-api``, ``keystone-api``, ``neutron-api``,
32 | and ``nova-scheduler``.
33 | 
34 | Stateful services
35 | ~~~~~~~~~~~~~~~~~
36 | 
37 | A service where subsequent requests to the service
38 | depend on the results of the first request.
39 | Stateful services are more difficult to manage because a single
40 | action typically involves more than one request. Providing
41 | additional instances and load balancing does not solve the problem.
42 | For example, if the horizon user interface reset itself every time
43 | you went to a new page, it would not be very useful.
44 | OpenStack services that are stateful include the OpenStack database
45 | and message queue.
46 | Making stateful services highly available can depend on whether you choose
47 | an active/passive or active/active configuration.
48 | 
49 | Stateful OpenStack services
50 | ----------------------------
51 | 
52 | .. to do: create list of stateful services
53 | 


--------------------------------------------------------------------------------
/doc/source/intro-os-ha.rst:
--------------------------------------------------------------------------------
 1 | ================================================
 2 | Introduction to high availability with OpenStack
 3 | ================================================
 4 | 
 5 | .. to do: description of section & improvement of title (intro to OS HA)
 6 | 
 7 | .. toctree::
 8 |    :maxdepth: 2
 9 | 
10 |    intro-os-ha-state.rst
11 |    intro-os-ha-cluster.rst
12 |    intro-os-ha-memcached.rst
13 | 


--------------------------------------------------------------------------------
/doc/source/monitoring.rst:
--------------------------------------------------------------------------------
1 | ==========
2 | Monitoring
3 | ==========
4 | 
5 | 
6 | 
7 | 


--------------------------------------------------------------------------------
/doc/source/networking-ha-l3-agent.rst:
--------------------------------------------------------------------------------
 1 | ========
 2 | L3 Agent
 3 | ========
 4 | .. TODO: Introduce L3 agent
 5 | 
 6 | HA Routers
 7 | ~~~~~~~~~~
 8 | .. TODO: content for HA routers
 9 | 
10 | Networking DHCP agent
11 | ~~~~~~~~~~~~~~~~~~~~~
12 | The OpenStack Networking (neutron) service has a scheduler that lets you run
13 | multiple agents across nodes. The DHCP agent can be natively highly available.
14 | 
15 | To configure the number of DHCP agents per network, modify the
16 | ``dhcp_agents_per_network`` parameter in the :file:`/etc/neutron/neutron.conf`
17 | file. By default this is set to 1. To achieve high availability, assign more
18 | than one DHCP agent per network. For more information, see
19 | `High-availability for DHCP
20 | <https://docs.openstack.org/newton/networking-guide/config-dhcp-ha.html>`_.
21 | 


--------------------------------------------------------------------------------
/doc/source/networking-ha-neutron-l3-analysis.rst:
--------------------------------------------------------------------------------
1 | ==========
2 | Neutron L3
3 | ==========
4 | 
5 | .. TODO: create and import Neutron L3 analysis
6 |    Introduce the Networking (neutron) service L3 agent
7 | 


--------------------------------------------------------------------------------
/doc/source/networking-ha-neutron-server.rst:
--------------------------------------------------------------------------------
1 | =========================
2 | Neutron Networking server
3 | =========================
4 | 
5 | .. TODO: Create content similar to other API sections
6 | 


--------------------------------------------------------------------------------
/doc/source/networking-ha.rst:
--------------------------------------------------------------------------------
 1 | ===================================
 2 | Configuring the networking services
 3 | ===================================
 4 | 
 5 | Configure networking on each node. See the basic information about
 6 | configuring networking in the Networking service section of the
 7 | `Install Guides <https://docs.openstack.org/ocata/install/>`_,
 8 | depending on your distribution.
 9 | 
10 | OpenStack network nodes contain:
11 | 
12 | - Networking DHCP agent
13 | - Neutron L3 agent
14 | - Networking L2 agent
15 | 
16 | .. note::
17 | 
18 |    The L2 agent cannot be distributed and highly available. Instead, it
19 |    must be installed on each data forwarding node to control the virtual
20 |    network driver such as Open vSwitch or Linux Bridge. One L2 agent runs
21 |    per node and controls its virtual interfaces.
22 | 
23 | .. toctree::
24 |    :maxdepth: 2
25 | 
26 |    networking-ha-neutron-server.rst
27 |    networking-ha-neutron-l3-analysis.rst
28 |    networking-ha-l3-agent.rst
29 | 
30 | 


--------------------------------------------------------------------------------
/doc/source/overview.rst:
--------------------------------------------------------------------------------
 1 | ========
 2 | Overview
 3 | ========
 4 | 
 5 | This guide can be split into two parts:
 6 | 
 7 | #. High level architecture
 8 | #. Reference architecture examples, monitoring, and testing
 9 | 
10 | .. warning::
11 |    We recommend using this guide for assistance when considering your HA cloud.
12 |    We do not recommend using this guide for manually building your HA cloud.
13 |    We recommend starting with a pre-validated solution and adjusting to your
14 |    needs.
15 | 
16 | High availability is not for every user. It presents some challenges.
17 | High availability may be too complex for databases or
18 | systems with large amounts of data. Replication can slow large systems
19 | down. Different setups have different prerequisites. Read the guidelines
20 | for each setup.
21 | 
22 | .. important::
23 | 
24 |    High availability is turned off as the default in OpenStack setups.
25 | 


--------------------------------------------------------------------------------
/doc/source/ref-arch-examples.rst:
--------------------------------------------------------------------------------
1 | ======================
2 | Reference Architecture
3 | ======================
4 | 


--------------------------------------------------------------------------------
/doc/source/storage-ha-backend.rst:
--------------------------------------------------------------------------------
 1 | 
 2 | .. _storage-ha-backend:
 3 | 
 4 | ================
 5 | Storage back end
 6 | ================
 7 | 
 8 | An OpenStack environment includes multiple data pools for the VMs:
 9 | 
10 | - Ephemeral storage is allocated for an instance and is deleted when the
11 |   instance is deleted. The Compute service manages ephemeral storage and
12 |   by default, Compute stores ephemeral drives as files on local disks on the
13 |   compute node. As an alternative, you can use Ceph RBD as the storage back
14 |   end for ephemeral storage.
15 | 
16 | - Persistent storage exists outside all instances. Two types of persistent
17 |   storage are provided:
18 | 
19 |   - The Block Storage service (cinder) that can use LVM or Ceph RBD as the
20 |     storage back end.
21 |   - The Image service (glance) that can use the Object Storage service (swift)
22 |     or Ceph RBD as the storage back end.
23 | 
24 | For more information about configuring storage back ends for
25 | the different storage options, see `Manage volumes
26 | <https://docs.openstack.org/admin-guide/blockstorage-manage-volumes.html>`_
27 | in the OpenStack Administrator Guide.
28 | 
29 | This section discusses ways to protect against data loss in your OpenStack
30 | environment.
31 | 
32 | RAID drives
33 | -----------
34 | 
35 | Configuring RAID on the hard drives that implement storage protects your data
36 | against a hard drive failure. If the node itself fails, data may be lost.
37 | In particular, all volumes stored on an LVM node can be lost.
38 | 
39 | Ceph
40 | ----
41 | 
42 | `Ceph RBD <https://ceph.io>`_ is an innately high availability storage back
43 | end. It creates a storage cluster with multiple nodes that communicate with
44 | each other to replicate and redistribute data dynamically.
45 | A Ceph RBD storage cluster provides a single shared set of storage nodes that
46 | can handle all classes of persistent and ephemeral data (glance, cinder, and
47 | nova) that are required for OpenStack instances.
48 | 
49 | Ceph RBD provides object replication capabilities by storing Block Storage
50 | volumes as Ceph RBD objects. Ceph RBD ensures that each replica of an object
51 | is stored on a different node. This means that your volumes are protected
52 | against hard drive and node failures, or even the failure of the data center
53 | itself.
54 | 
55 | When Ceph RBD is used for ephemeral volumes as well as block and image storage,
56 | it supports `live migration
57 | <https://docs.openstack.org/admin-guide/compute-live-migration-usage.html>`_
58 | of VMs with ephemeral drives. LVM only supports live migration of
59 | volume-backed VMs.
60 | 


--------------------------------------------------------------------------------
/doc/source/storage-ha-block.rst:
--------------------------------------------------------------------------------
  1 | ==================================
  2 | Highly available Block Storage API
  3 | ==================================
  4 | 
  5 | Cinder provides Block-Storage-as-a-Service suitable for performance
  6 | sensitive scenarios such as databases, expandable file systems, or
  7 | providing a server with access to raw block level storage.
  8 | 
  9 | Persistent block storage can survive instance termination and can also
 10 | be moved across instances like any external storage device. Cinder
 11 | also has volume snapshots capability for backing up the volumes.
 12 | 
 13 | Making the Block Storage API service highly available in
 14 | active/passive mode involves:
 15 | 
 16 | - :ref:`ha-blockstorage-pacemaker`
 17 | - :ref:`ha-blockstorage-configure`
 18 | - :ref:`ha-blockstorage-services`
 19 | 
 20 | In theory, you can run the Block Storage service as active/active.
 21 | However, because of sufficient concerns, we recommend running
 22 | the volume component as active/passive only.
 23 | 
 24 | You can read more about these concerns on the
 25 | `Red Hat Bugzilla <https://bugzilla.redhat.com/show_bug.cgi?id=1193229>`_
 26 | and there is a
 27 | `psuedo roadmap <https://etherpad.openstack.org/p/cinder-kilo-stabilisation-work>`_
 28 | for addressing them upstream.
 29 | 
 30 | .. _ha-blockstorage-pacemaker:
 31 | 
 32 | Add Block Storage API resource to Pacemaker
 33 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 34 | 
 35 | On RHEL-based systems, create resources for cinder's systemd agents and create
 36 | constraints to enforce startup/shutdown ordering:
 37 | 
 38 | .. code-block:: console
 39 | 
 40 |   pcs resource create openstack-cinder-api systemd:openstack-cinder-api --clone interleave=true
 41 |   pcs resource create openstack-cinder-scheduler systemd:openstack-cinder-scheduler --clone interleave=true
 42 |   pcs resource create openstack-cinder-volume systemd:openstack-cinder-volume
 43 | 
 44 |   pcs constraint order start openstack-cinder-api-clone then openstack-cinder-scheduler-clone
 45 |   pcs constraint colocation add openstack-cinder-scheduler-clone with openstack-cinder-api-clone
 46 |   pcs constraint order start openstack-cinder-scheduler-clone then openstack-cinder-volume
 47 |   pcs constraint colocation add openstack-cinder-volume with openstack-cinder-scheduler-clone
 48 | 
 49 | 
 50 | If the Block Storage service runs on the same nodes as the other services,
 51 | then it is advisable to also include:
 52 | 
 53 | .. code-block:: console
 54 | 
 55 |    pcs constraint order start openstack-keystone-clone then openstack-cinder-api-clone
 56 | 
 57 | Alternatively, instead of using systemd agents, download and
 58 | install the OCF resource agent:
 59 | 
 60 | .. code-block:: console
 61 | 
 62 |    # cd /usr/lib/ocf/resource.d/openstack
 63 |    # wget https://opendev.org/x/openstack-resource-agents/raw/branch/master/ocf/cinder-api
 64 |    # chmod a+rx *
 65 | 
 66 | You can now add the Pacemaker configuration for Block Storage API resource.
 67 | Connect to the Pacemaker cluster with the :command:`crm configure` command
 68 | and add the following cluster resources:
 69 | 
 70 | .. code-block:: none
 71 | 
 72 |    primitive p_cinder-api ocf:openstack:cinder-api \
 73 |       params config="/etc/cinder/cinder.conf" \
 74 |       os_password="secretsecret" \
 75 |       os_username="admin" \
 76 |       os_tenant_name="admin" \
 77 |       keystone_get_token_url="http://10.0.0.11:5000/v2.0/tokens" \
 78 |       op monitor interval="30s" timeout="30s"
 79 | 
 80 | This configuration creates ``p_cinder-api``, a resource for managing the
 81 | Block Storage API service.
 82 | 
 83 | The command :command:`crm configure` supports batch input, copy and paste the
 84 | lines above into your live Pacemaker configuration and then make changes as
 85 | required. For example, you may enter ``edit p_ip_cinder-api`` from the
 86 | :command:`crm configure` menu and edit the resource to match your preferred
 87 | virtual IP address.
 88 | 
 89 | Once completed, commit your configuration changes by entering :command:`commit`
 90 | from the :command:`crm configure` menu. Pacemaker then starts the Block Storage
 91 | API service and its dependent resources on one of your nodes.
 92 | 
 93 | .. _ha-blockstorage-configure:
 94 | 
 95 | Configure Block Storage API service
 96 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 97 | 
 98 | Edit the ``/etc/cinder/cinder.conf`` file. For example, on a RHEL-based system:
 99 | 
100 | .. code-block:: ini
101 |    :linenos:
102 | 
103 |    [DEFAULT]
104 |    # This is the name which we should advertise ourselves as and for
105 |    # A/P installations it should be the same everywhere
106 |    host = cinder-cluster-1
107 | 
108 |    # Listen on the Block Storage VIP
109 |    osapi_volume_listen = 10.0.0.11
110 | 
111 |    auth_strategy = keystone
112 |    control_exchange = cinder
113 | 
114 |    volume_driver = cinder.volume.drivers.nfs.NfsDriver
115 |    nfs_shares_config = /etc/cinder/nfs_exports
116 |    nfs_sparsed_volumes = true
117 |    nfs_mount_options = v3
118 | 
119 |    [database]
120 |    connection = mysql+pymysql://cinder:CINDER_DBPASS@10.0.0.11/cinder
121 |    max_retries = -1
122 | 
123 |    [keystone_authtoken]
124 |    # 10.0.0.11 is the Keystone VIP
125 |    identity_uri = http://10.0.0.11:35357/
126 |    www_authenticate_uri = http://10.0.0.11:5000/
127 |    admin_tenant_name = service
128 |    admin_user = cinder
129 |    admin_password = CINDER_PASS
130 | 
131 |    [oslo_messaging_rabbit]
132 |    # Explicitly list the rabbit hosts as it doesn't play well with HAProxy
133 |    rabbit_hosts = 10.0.0.12,10.0.0.13,10.0.0.14
134 |    # As a consequence, we also need HA queues
135 |    rabbit_ha_queues = True
136 |    heartbeat_timeout_threshold = 60
137 |    heartbeat_rate = 2
138 | 
139 | Replace ``CINDER_DBPASS`` with the password you chose for the Block Storage
140 | database. Replace ``CINDER_PASS`` with the password you chose for the
141 | ``cinder`` user in the Identity service.
142 | 
143 | This example assumes that you are using NFS for the physical storage, which
144 | will almost never be true in a production installation.
145 | 
146 | If you are using the Block Storage service OCF agent, some settings will
147 | be filled in for you, resulting in a shorter configuration file:
148 | 
149 | .. code-block:: ini
150 |    :linenos:
151 | 
152 |    # We have to use MySQL connection to store data:
153 |    connection = mysql+pymysql://cinder:CINDER_DBPASS@10.0.0.11/cinder
154 |    # Alternatively, you can switch to pymysql,
155 |    # a new Python 3 compatible library and use
156 |    # sql_connection = mysql+pymysql://cinder:CINDER_DBPASS@10.0.0.11/cinder
157 |    # and be ready when everything moves to Python 3.
158 |    # Ref: https://wiki.openstack.org/wiki/PyMySQL_evaluation
159 | 
160 |    # We bind Block Storage API to the VIP:
161 |    osapi_volume_listen = 10.0.0.11
162 | 
163 |    # We send notifications to High Available RabbitMQ:
164 |    notifier_strategy = rabbit
165 |    rabbit_host = 10.0.0.11
166 | 
167 | Replace ``CINDER_DBPASS`` with the password you chose for the Block Storage
168 | database.
169 | 
170 | .. _ha-blockstorage-services:
171 | 
172 | Configure OpenStack services to use the highly available Block Storage API
173 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
174 | 
175 | Your OpenStack services must now point their Block Storage API configuration
176 | to the highly available, virtual cluster IP address rather than a Block Storage
177 | API server’s physical IP address as you would for a non-HA environment.
178 | 
179 | Create the Block Storage API endpoint with this IP.
180 | 
181 | If you are using both private and public IP addresses, create two virtual IPs
182 | and define your endpoint. For example:
183 | 
184 | .. code-block:: console
185 | 
186 |    $ openstack endpoint create --region $KEYSTONE_REGION \
187 |      volumev2 public http://PUBLIC_VIP:8776/v2/%\(project_id\)s
188 |    $ openstack endpoint create --region $KEYSTONE_REGION \
189 |      volumev2 admin http://10.0.0.11:8776/v2/%\(project_id\)s
190 |    $ openstack endpoint create --region $KEYSTONE_REGION \
191 |      volumev2 internal http://10.0.0.11:8776/v2/%\(project_id\)s
192 | 
193 | Use Cinder volume backup and restore service
194 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
195 | 
196 | Cinder provides a feature to backup and restore volumes and snapshots.
197 | The first backup of a volume must be handled as a full backup.
198 | Subsequent backups may be either full or incremental backups from the
199 | last full backup.  See also `the Cinder Block Storage Administration
200 | Guide's section on backing up and restoring volumes and snapshots
201 | <https://docs.openstack.org/cinder/latest/admin/blockstorage-volume-backups.html>`_.
202 | 


--------------------------------------------------------------------------------
/doc/source/storage-ha-file-systems.rst:
--------------------------------------------------------------------------------
  1 | ========================================
  2 | Highly available Shared File Systems API
  3 | ========================================
  4 | 
  5 | Making the Shared File Systems (manila) API service highly available
  6 | in active/passive mode involves:
  7 | 
  8 | - :ref:`ha-sharedfilesystems-configure`
  9 | - :ref:`ha-sharedfilesystems-services`
 10 | - :ref:`ha-sharedfilesystems-pacemaker`
 11 | 
 12 | .. _ha-sharedfilesystems-configure:
 13 | 
 14 | Configure Shared File Systems API service
 15 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 16 | 
 17 | Edit the :file:`/etc/manila/manila.conf` file:
 18 | 
 19 | .. code-block:: ini
 20 |    :linenos:
 21 | 
 22 |    # We have to use MySQL connection to store data:
 23 |    sql_connection = mysql+pymysql://manila:password@10.0.0.11/manila?charset=utf8
 24 | 
 25 |    # We bind Shared File Systems API to the VIP:
 26 |    osapi_volume_listen = 10.0.0.11
 27 | 
 28 |    # We send notifications to High Available RabbitMQ:
 29 |    notifier_strategy = rabbit
 30 |    rabbit_host = 10.0.0.11
 31 | 
 32 | 
 33 | .. _ha-sharedfilesystems-services:
 34 | 
 35 | Configure OpenStack services to use Shared File Systems API
 36 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 37 | 
 38 | Your OpenStack services must now point their Shared File Systems API
 39 | configuration to the highly available, virtual cluster IP address rather than
 40 | a Shared File Systems API server’s physical IP address as you would
 41 | for a non-HA environment.
 42 | 
 43 | You must create the Shared File Systems API endpoint with this IP.
 44 | 
 45 | If you are using both private and public IP addresses, you should create two
 46 | virtual IPs and define your endpoints like this:
 47 | 
 48 | .. code-block:: console
 49 | 
 50 |    $ openstack endpoint create --region RegionOne \
 51 |      sharev2 public 'http://PUBLIC_VIP:8786/v2/%(tenant_id)s'
 52 | 
 53 |    $ openstack endpoint create --region RegionOne \
 54 |      sharev2 internal 'http://10.0.0.11:8786/v2/%(tenant_id)s'
 55 | 
 56 |    $ openstack endpoint create --region RegionOne \
 57 |      sharev2 admin 'http://10.0.0.11:8786/v2/%(tenant_id)s'
 58 | 
 59 | .. _ha-sharedfilesystems-pacemaker:
 60 | 
 61 | Add Shared File Systems API resource to Pacemaker
 62 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 63 | 
 64 | #. Download the resource agent to your system:
 65 | 
 66 |    .. code-block:: console
 67 | 
 68 |       # cd /usr/lib/ocf/resource.d/openstack
 69 |       # wget https://opendev.org/x/openstack-resource-agents/raw/branch/master/ocf/manila-api
 70 |       # chmod a+rx *
 71 | 
 72 | #. Add the Pacemaker configuration for the Shared File Systems
 73 |    API resource. Connect to the Pacemaker cluster with the following
 74 |    command:
 75 | 
 76 |    .. code-block:: console
 77 | 
 78 |       # crm configure
 79 | 
 80 |    .. note::
 81 | 
 82 |       The :command:`crm configure` supports batch input. Copy and paste
 83 |       the lines in the next step into your live Pacemaker configuration and then
 84 |       make changes as required.
 85 | 
 86 |       For example, you may enter ``edit p_ip_manila-api`` from the
 87 |       :command:`crm configure` menu and edit the resource to match your preferred
 88 |       virtual IP address.
 89 | 
 90 | #. Add the following cluster resources:
 91 | 
 92 |    .. code-block:: none
 93 | 
 94 |       primitive p_manila-api ocf:openstack:manila-api \
 95 |         params config="/etc/manila/manila.conf" \
 96 |         os_password="secretsecret" \
 97 |         os_username="admin" \
 98 |         os_tenant_name="admin" \
 99 |         keystone_get_token_url="http://10.0.0.11:5000/v2.0/tokens" \
100 |         op monitor interval="30s" timeout="30s"
101 | 
102 |    This configuration creates ``p_manila-api``, a resource for managing the
103 |    Shared File Systems API service.
104 | 
105 | #. Commit your configuration changes by entering the following command
106 |    from the :command:`crm configure` menu:
107 | 
108 |    .. code-block:: console
109 | 
110 |       # commit
111 | 
112 | Pacemaker now starts the Shared File Systems API service and its
113 | dependent resources on one of your nodes.
114 | 
115 | 


--------------------------------------------------------------------------------
/doc/source/storage-ha-image.rst:
--------------------------------------------------------------------------------
  1 | ==========================
  2 | Highly available Image API
  3 | ==========================
  4 | 
  5 | The OpenStack Image service offers a service for discovering, registering, and
  6 | retrieving virtual machine images. To make the OpenStack Image API service
  7 | highly available in active/passive mode, you must:
  8 | 
  9 | - :ref:`glance-api-pacemaker`
 10 | - :ref:`glance-api-configure`
 11 | - :ref:`glance-services`
 12 | 
 13 | Prerequisites
 14 | ~~~~~~~~~~~~~
 15 | 
 16 | Before beginning, ensure that you are familiar with the
 17 | documentation for installing the OpenStack Image API service.
 18 | See the *Image service* section in the
 19 | `Installation Guides <https://docs.openstack.org/ocata/install>`_,
 20 | depending on your distribution.
 21 | 
 22 | .. _glance-api-pacemaker:
 23 | 
 24 | Add OpenStack Image API resource to Pacemaker
 25 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 26 | 
 27 | #. Download the resource agent to your system:
 28 | 
 29 |    .. code-block:: console
 30 | 
 31 |       # cd /usr/lib/ocf/resource.d/openstack
 32 |       # wget https://opendev.org/x/openstack-resource-agents/raw/branch/master/ocf/glance-api
 33 |       # chmod a+rx *
 34 | 
 35 | #. Add the Pacemaker configuration for the OpenStack Image API resource.
 36 |    Use the following command to connect to the Pacemaker cluster:
 37 | 
 38 |    .. code-block:: console
 39 | 
 40 |       crm configure
 41 | 
 42 |    .. note::
 43 | 
 44 |       The :command:`crm configure` command supports batch input. Copy and paste
 45 |       the lines in the next step into your live Pacemaker configuration and
 46 |       then make changes as required.
 47 | 
 48 |       For example, you may enter ``edit p_ip_glance-api`` from the
 49 |       :command:`crm configure` menu and edit the resource to match your
 50 |       preferred virtual IP address.
 51 | 
 52 | #. Add the following cluster resources:
 53 | 
 54 |    .. code-block:: console
 55 | 
 56 |       primitive p_glance-api ocf:openstack:glance-api \
 57 |         params config="/etc/glance/glance-api.conf" \
 58 |         os_password="secretsecret" \
 59 |         os_username="admin" os_tenant_name="admin" \
 60 |         os_auth_url="http://10.0.0.11:5000/v2.0/" \
 61 |         op monitor interval="30s" timeout="30s"
 62 | 
 63 |    This configuration creates ``p_glance-api``, a resource for managing the
 64 |    OpenStack Image API service.
 65 | 
 66 | #. Commit your configuration changes by entering the following command from
 67 |    the :command:`crm configure` menu:
 68 | 
 69 |    .. code-block:: console
 70 | 
 71 |       commit
 72 | 
 73 | Pacemaker then starts the OpenStack Image API service and its dependent
 74 | resources on one of your nodes.
 75 | 
 76 | .. _glance-api-configure:
 77 | 
 78 | Configure OpenStack Image service API
 79 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 80 | 
 81 | Edit the :file:`/etc/glance/glance-api.conf` file
 82 | to configure the OpenStack Image service:
 83 | 
 84 | .. code-block:: ini
 85 | 
 86 |    # We have to use MySQL connection to store data:
 87 |    sql_connection=mysql://glance:password@10.0.0.11/glance
 88 |    # Alternatively, you can switch to pymysql,
 89 |    # a new Python 3 compatible library and use
 90 |    # sql_connection=mysql+pymysql://glance:password@10.0.0.11/glance
 91 |    # and be ready when everything moves to Python 3.
 92 |    # Ref: https://wiki.openstack.org/wiki/PyMySQL_evaluation
 93 | 
 94 |    # We bind OpenStack Image API to the VIP:
 95 |    bind_host = 10.0.0.11
 96 | 
 97 |    # Connect to OpenStack Image registry service:
 98 |    registry_host = 10.0.0.11
 99 | 
100 |    # We send notifications to High Available RabbitMQ:
101 |    notifier_strategy = rabbit
102 |    rabbit_host = 10.0.0.11
103 | 
104 | [TODO: need more discussion of these parameters]
105 | 
106 | .. _glance-services:
107 | 
108 | Configure OpenStack services to use the highly available OpenStack Image API
109 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
110 | 
111 | Your OpenStack services must now point their OpenStack Image API configuration
112 | to the highly available, virtual cluster IP address instead of pointing to the
113 | physical IP address of an OpenStack Image API server as you would in a non-HA
114 | cluster.
115 | 
116 | For example, if your OpenStack Image API service IP address is 10.0.0.11
117 | (as in the configuration explained here), you would use the following
118 | configuration in your :file:`nova.conf` file:
119 | 
120 | .. code-block:: ini
121 | 
122 |    [glance]
123 |    # ...
124 |    api_servers = 10.0.0.11
125 |    # ...
126 | 
127 | 
128 | You must also create the OpenStack Image API endpoint with this IP address.
129 | If you are using both private and public IP addresses, create two virtual IP
130 | addresses and define your endpoint. For example:
131 | 
132 | .. code-block:: console
133 | 
134 |    $ openstack endpoint create --region $KEYSTONE_REGION \
135 |      image public http://PUBLIC_VIP:9292
136 | 
137 |    $ openstack endpoint create --region $KEYSTONE_REGION \
138 |      image admin http://10.0.0.11:9292
139 | 
140 |    $ openstack endpoint create --region $KEYSTONE_REGION \
141 |      image internal http://10.0.0.11:9292
142 | 


--------------------------------------------------------------------------------
/doc/source/storage-ha.rst:
--------------------------------------------------------------------------------
 1 | ===================
 2 | Configuring storage
 3 | ===================
 4 | 
 5 | .. toctree::
 6 |    :maxdepth: 2
 7 | 
 8 |    storage-ha-image.rst
 9 |    storage-ha-block.rst
10 |    storage-ha-file-systems.rst
11 |    storage-ha-backend.rst
12 | 
13 | Making the Block Storage (cinder) API service highly available in
14 | active/active mode involves:
15 | 
16 | * Configuring Block Storage to listen on the VIP address
17 | 
18 | * Managing the Block Storage API daemon with the Pacemaker cluster manager
19 | 
20 | * Configuring OpenStack services to use this IP address
21 | 
22 | .. To Do: HA without Pacemaker
23 | 


--------------------------------------------------------------------------------
/doc/source/testing.rst:
--------------------------------------------------------------------------------
1 | =======
2 | Testing
3 | =======
4 | 
5 | 
6 | 
7 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
 1 | [metadata]
 2 | name = openstackhaguide
 3 | summary = OpenStack High Availability Guide
 4 | author = OpenStack
 5 | author_email = openstack-discuss@lists.openstack.org
 6 | home_page = https://docs.openstack.org/ha-guide/
 7 | classifier =
 8 |     Environment :: OpenStack
 9 |     Intended Audience :: Information Technology
10 |     Intended Audience :: System Administrators
11 |     License :: OSI Approved :: Apache Software License
12 |     Operating System :: POSIX :: Linux
13 |     Topic :: Documentation
14 | 


--------------------------------------------------------------------------------
/tox.ini:
--------------------------------------------------------------------------------
 1 | [tox]
 2 | minversion = 4.4
 3 | skipsdist = True
 4 | envlist = docs
 5 | 
 6 | [testenv:docs]
 7 | deps =
 8 |   -r{toxinidir}/doc/requirements.txt
 9 | commands =
10 |   doc8 doc/source -e txt -e rst
11 |   sphinx-build -E -W -b html doc/source doc/build/html
12 | 
13 | [testenv:pdf-docs]
14 | deps = {[testenv:docs]deps}
15 | allowlist_externals =
16 |   make
17 | commands =
18 |   sphinx-build -j auto -W -b latex doc/source doc/build/pdf
19 |   make -C doc/build/pdf
20 | 
21 | [doc8]
22 | # Settings for doc8:
23 | # Ignore target directories and autogenerated files
24 | ignore-path = doc/*/target,doc/*/build*
25 | # File extensions to use
26 | extensions = .rst,.txt
27 | # Maximal line length should be 79 but we have some overlong lines.
28 | # Let's not get far more in.
29 | max-line-length = 79
30 | # Disable some doc8 checks:
31 | # D000: Check RST validity (cannot handle the "linenos" directive)
32 | ignore = D000
33 | 


--------------------------------------------------------------------------------