├── .gitignore
├── .travis.yml
├── LICENSE
├── README.md
├── ansible.cfg
├── config.yml
├── doc
    ├── README.rst
    └── generate_autodoc_index.sh
├── drill.sh
├── files
    └── get-random-osd.py
├── gremlin.png
├── inventory
    ├── group_vars
    │   └── all
    ├── hosts
    └── structure
├── playbooks
    ├── case.yml
    ├── cases
    │   ├── compute
    │   │   └── 001.yml
    │   ├── control
    │   │   └── 001.yml
    │   ├── network
    │   │   └── 001.yml
    │   ├── storage
    │   │   └── 001.yml
    │   └── system
    │   │   └── 001.yml
    ├── common
    │   ├── ask.yml
    │   ├── create_auth.yml
    │   ├── next.yml
    │   ├── port.yml
    │   ├── remove_auth.yml
    │   └── service.yml
    ├── compute
    │   ├── service.yml
    │   └── system.yml
    ├── control
    │   ├── cinder
    │   │   └── service.yml
    │   ├── drill_api.yml
    │   ├── drill_db.yml
    │   ├── drill_hy.yml
    │   ├── drill_lb.yml
    │   ├── drill_mc.yml
    │   ├── drill_mq.yml
    │   ├── glance
    │   │   └── service.yml
    │   ├── haproxy
    │   │   └── service.yml
    │   ├── keystone
    │   │   └── service.yml
    │   ├── memcache
    │   │   └── service.yml
    │   ├── mysql
    │   │   ├── service.yml
    │   │   ├── stress.yml
    │   │   └── system.yml
    │   ├── neutron
    │   │   └── service.yml
    │   ├── nova
    │   │   └── service.yml
    │   └── rabbitmq
    │   │   ├── service.yml
    │   │   ├── stress.yml
    │   │   └── system.yml
    ├── drill.yml
    ├── drill_compute.yml
    ├── drill_control.yml
    ├── drill_network.yml
    ├── drill_storage.yml
    ├── network
    │   ├── service.yml
    │   └── system.yml
    ├── storage
    │   ├── drill_mon.yml
    │   ├── drill_osd.yml
    │   ├── drill_rgw.yml
    │   ├── mon
    │   │   ├── damage_mon.yml
    │   │   └── kill_mon.yml
    │   ├── osd
    │   │   ├── del_osd_partition.yml
    │   │   └── kill_osd.yml
    │   └── rgw
    │   │   └── kill_rgw.yml
    └── system
    │   ├── base.yml
    │   ├── cpu_load.yml
    │   ├── disk_load.yml
    │   ├── mem_load.yml
    │   ├── nic.yml
    │   ├── nic_delay.yml
    │   ├── nic_down.yml
    │   └── nic_loss.yml
└── roles
    ├── common
        ├── defaults
        │   └── main.yml
        └── tasks
        │   ├── port_add.yml
        │   ├── port_del.yml
        │   ├── start_service.yml
        │   └── stop_service.yml
    ├── compute
        ├── README.md
        ├── defaults
        │   └── main.yml
        ├── meta
        │   └── main.yml
        └── service
        │   └── main.yml
    ├── control
        ├── README.md
        ├── defaults
        │   └── main.yml
        ├── files
        │   └── stress_mq.py
        ├── meta
        │   └── main.yml
        └── tasks
        │   ├── purge_queue.yml
        │   ├── stress_db.yml
        │   └── stress_mq.yml
    ├── network
        ├── README.md
        ├── defaults
        │   └── main.yml
        ├── meta
        │   └── main.yml
        └── service
        │   └── tasks
        │       └── main.yml
    ├── provision
        ├── README.md
        ├── defaults
        │   └── main.yml
        ├── local
        │   └── tasks
        │   │   └── main.yml
        ├── meta
        │   └── main.yml
        ├── os_auth
        │   ├── defaults
        │   │   └── main.yml
        │   └── tasks
        │   │   ├── create_auth.yml
        │   │   └── remove_auth.yml
        ├── os_stack
        │   ├── defaults
        │   │   └── main.yml
        │   └── tasks
        │   │   ├── create_stack.yml
        │   │   └── remove_stack.yml
        ├── teardown
        │   ├── meta
        │   │   └── main.yml
        │   └── tasks
        │   │   └── main.yml
        └── user
        │   ├── meta
        │       └── main.yml
        │   └── tasks
        │       └── main.yml
    ├── storage
        ├── README.md
        ├── defaults
        │   └── main.yml
        ├── meta
        │   └── main.yml
        └── tasks
        │   ├── damage_mon.yml
        │   ├── del_osd_partition.yml
        │   ├── kill_mon.yml
        │   ├── kill_osd.yml
        │   ├── kill_rgw.yml
        │   ├── recover_damage_mon.yml
        │   ├── recover_osd_partition.yml
        │   ├── start_mon.yml
        │   ├── start_osd.yml
        │   ├── start_rgw.yml
        │   └── stop_mon.yml
    └── system
        ├── defaults
            └── main.yml
        ├── meta
            └── main.yml
        └── tasks
            ├── clear_tc.yml
            ├── cpu_load.yml
            ├── disk_load.yml
            ├── mem_load.yml
            ├── nic_delay.yml
            ├── nic_down.yml
            ├── nic_down_async.yml
            └── nic_loss.yml


/.gitignore:
--------------------------------------------------------------------------------
1 | .gremlin
2 | inventory/hosts
3 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | language: python
 3 | python: "2.7"
 4 | 
 5 | install:
 6 |   - sudo pip install ansible
 7 | 
 8 | script:
 9 |   - ./drill.sh -p playbooks/drill.yml -i inventory/structure -t all --syntax-check
10 | 
11 | notifications:
12 |   email: false
13 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "{}"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright {yyyy} {name of copyright owner}
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Gremlin
  2 | 
  3 | [![Build Status](https://travis-ci.org/unitedstack/gremlin.svg?branch=master)](https://travis-ci.org/unitedstack/gremlin)
  4 | 
  5 | OpenStack reliability verification and fault drill system.
  6 | 
  7 | ![](./gremlin.png)
  8 | 
  9 | ## Background
 10 | 
 11 | IaaS is the cornerstone of building IT systems, the stability and reliability of
 12 | the IaaS system is critical for customers, but how to evaluate its stability and
 13 | reliability after an IaaS system has been deployed, this needs to be actually
 14 | VERIFIED, and how to quickly locate the fault when the system is in failure,
 15 | this needs to do actually FAULT DRILL. When our customers know when, how and why
 16 | the system will be in failure, and know how to handle this situation, it will be
 17 | very helpful to grow their confidence to their system.
 18 | 
 19 | So, we designed the OpenStack Reliability Verification and Fault Drill program,
 20 | it will do reliability verification from multiple dimensions of cloud platform,
 21 | and will introduce man-made failures by using some operation tools, thus we can
 22 | carry on fault drill along with monitoring system and logging system.
 23 | 
 24 | ## Principle
 25 | 
 26 | The program should follow the principles below:
 27 | 
 28 | 1. All faults introduced should be alerted by monitoring system.
 29 | 2. All faults introduced can do fallback.
 30 | 3. All faults introduced should do cleanup when a fault drill is done.
 31 | 
 32 | 
 33 | ## Design
 34 | 
 35 | To cover more fault drill cases, the design be will formed from two aspects:
 36 | 
 37 | 1. Horizontally, from node role, such as controller, network, compute, storage
 38 | 2. Vertically, from system level, service level, and physical level
 39 | 
 40 | This can design a broad set of fault drill cases if combined with these two dimensions.
 41 | The test cases of system level and service level can be automated, but part of physical
 42 | level test cases should be operated by human.
 43 | 
 44 | 
 45 | ## Usage
 46 | 
 47 | There are two modes when running gremlin:
 48 | 
 49 | * auto: All test cases will run automatically. It will introduce fault and recover
 50 |         this fault automatically. The default mode is auto.
 51 | * manual: Will run in interactive mode, when every test case is done, will prompt
 52 |           to ask if to execute the next one. And after introduced a fault, it will
 53 |           ask if to recover this fault automatically.
 54 | 
 55 | Before running gremlin, ensure the host running gremlin can ssh to the target hosts
 56 | without password.
 57 | 
 58 | Now, following the steps to get started:
 59 | 
 60 | 1. Get the code
 61 | 
 62 |     ```
 63 |     git clone https://github.com/unitedstack/gremlin.git
 64 |     ```
 65 | 
 66 | 2. Install dependencies
 67 | 
 68 |     ```
 69 |     ./drill.sh --install-deps
 70 |     ```
 71 | 
 72 | 3. Define your inventory
 73 | 
 74 |     You should define your inventory according your environments. Modify the
 75 |     inventory/hosts file.
 76 | 
 77 | 4. Define your configuration
 78 | 
 79 |     Edit the config.yml to fit your environments.
 80 | 
 81 | 5. Run your test cases
 82 | 
 83 |     5.1 Run all test cases automatically:
 84 | 
 85 |     ```
 86 |     ./drill.sh -t all
 87 |     ```
 88 | 
 89 |     5.2 Run all test cases manually:
 90 | 
 91 |     ```
 92 |     ./drill.sh -t all --mode manual
 93 |     ```
 94 | 
 95 |     5.3 Run some specified test cases manually
 96 | 
 97 |     ```
 98 |     ./drill.sh -t mon-pre,mon-down --mode manual
 99 |     ```
100 | 
101 | 6. To get more help info
102 | 
103 |     ```
104 |     ./drill.sh -h
105 |     ```
106 | 
107 | ## More
108 | 
109 | * Documentation: https://docs.openstack.org/gremlin
110 | 


--------------------------------------------------------------------------------
/ansible.cfg:
--------------------------------------------------------------------------------
  1 | # config file for ansible -- https://ansible.com/
  2 | # ===============================================
  3 | 
  4 | # nearly all parameters can be overridden in ansible-playbook
  5 | # or with command line flags. ansible will read ANSIBLE_CONFIG,
  6 | # ansible.cfg in the current working directory, .ansible.cfg in
  7 | # the home directory or /etc/ansible/ansible.cfg, whichever it
  8 | # finds first
  9 | 
 10 | [defaults]
 11 | 
 12 | # some basic default values...
 13 | 
 14 | inventory      = inventory/
 15 | #library        = /usr/share/my_modules/
 16 | #module_utils   = /usr/share/my_module_utils/
 17 | #remote_tmp     = ~/.ansible/tmp
 18 | #local_tmp      = ~/.ansible/tmp
 19 | #forks          = 5
 20 | #poll_interval  = 15
 21 | #sudo_user      = root
 22 | #ask_sudo_pass = True
 23 | #ask_pass      = True
 24 | #transport      = smart
 25 | #remote_port    = 22
 26 | #module_lang    = C
 27 | #module_set_locale = False
 28 | 
 29 | # plays will gather facts by default, which contain information about
 30 | # the remote system.
 31 | #
 32 | # smart - gather by default, but don't regather if already gathered
 33 | # implicit - gather by default, turn off with gather_facts: False
 34 | # explicit - do not gather by default, must say gather_facts: True
 35 | gathering = smart
 36 | 
 37 | # This only affects the gathering done by a play's gather_facts directive,
 38 | # by default gathering retrieves all facts subsets
 39 | # all - gather all subsets
 40 | # network - gather min and network facts
 41 | # hardware - gather hardware facts (longest facts to retrieve)
 42 | # virtual - gather min and virtual facts
 43 | # facter - import facts from facter
 44 | # ohai - import facts from ohai
 45 | # You can combine them using comma (ex: network,virtual)
 46 | # You can negate them using ! (ex: !hardware,!facter,!ohai)
 47 | # A minimal set of facts is always gathered.
 48 | #gather_subset = all
 49 | 
 50 | # some hardware related facts are collected
 51 | # with a maximum timeout of 10 seconds. This
 52 | # option lets you increase or decrease that
 53 | # timeout to something more suitable for the
 54 | # environment. 
 55 | # gather_timeout = 10
 56 | 
 57 | # additional paths to search for roles in, colon separated
 58 | roles_path    = roles
 59 | 
 60 | # uncomment this to disable SSH key host checking
 61 | host_key_checking = False
 62 | 
 63 | # change the default callback, you can only have one 'stdout' type  enabled at a time.
 64 | #stdout_callback = skippy
 65 | 
 66 | 
 67 | ## Ansible ships with some plugins that require whitelisting,
 68 | ## this is done to avoid running all of a type by default.
 69 | ## These setting lists those that you want enabled for your system.
 70 | ## Custom plugins should not need this unless plugin author specifies it.
 71 | 
 72 | # enable callback plugins, they can output to stdout but cannot be 'stdout' type.
 73 | #callback_whitelist = timer, mail
 74 | 
 75 | # enable inventory plugins, default: 'host_list', 'script', 'yaml', 'ini'
 76 | #inventory_enabled = host_list, aws, openstack, docker
 77 | 
 78 | # Determine whether includes in tasks and handlers are "static" by
 79 | # default. As of 2.0, includes are dynamic by default. Setting these
 80 | # values to True will make includes behave more like they did in the
 81 | # 1.x versions.
 82 | #task_includes_static = True
 83 | #handler_includes_static = True
 84 | 
 85 | # Controls if a missing handler for a notification event is an error or a warning
 86 | #error_on_missing_handler = True
 87 | 
 88 | # change this for alternative sudo implementations
 89 | #sudo_exe = sudo
 90 | 
 91 | # What flags to pass to sudo
 92 | # WARNING: leaving out the defaults might create unexpected behaviours
 93 | #sudo_flags = -H -S -n
 94 | 
 95 | # SSH timeout
 96 | #timeout = 10
 97 | 
 98 | # default user to use for playbooks if user is not specified
 99 | # (/usr/bin/ansible will use current user as default)
100 | #remote_user = root
101 | 
102 | # logging is off by default unless this path is defined
103 | # if so defined, consider logrotate
104 | # log_path = drill.log
105 | 
106 | # default module name for /usr/bin/ansible
107 | #module_name = command
108 | 
109 | # use this shell for commands executed under sudo
110 | # you may need to change this to bin/bash in rare instances
111 | # if sudo is constrained
112 | #executable = /bin/sh
113 | 
114 | # if inventory variables overlap, does the higher precedence one win
115 | # or are hash values merged together?  The default is 'replace' but
116 | # this can also be set to 'merge'.
117 | #hash_behaviour = replace
118 | 
119 | # by default, variables from roles will be visible in the global variable
120 | # scope. To prevent this, the following option can be enabled, and only
121 | # tasks and handlers within the role will see the variables there
122 | #private_role_vars = yes
123 | 
124 | # list any Jinja2 extensions to enable here:
125 | #jinja2_extensions = jinja2.ext.do,jinja2.ext.i18n
126 | 
127 | # if set, always use this private key file for authentication, same as
128 | # if passing --private-key to ansible or ansible-playbook
129 | #private_key_file = /path/to/file
130 | 
131 | # If set, configures the path to the Vault password file as an alternative to
132 | # specifying --vault-password-file on the command line.
133 | #vault_password_file = /path/to/vault_password_file
134 | 
135 | # format of string {{ ansible_managed }} available within Jinja2
136 | # templates indicates to users editing templates files will be replaced.
137 | # replacing {file}, {host} and {uid} and strftime codes with proper values.
138 | #ansible_managed = Ansible managed: {file} modified on %Y-%m-%d %H:%M:%S by {uid} on {host}
139 | # {file}, {host}, {uid}, and the timestamp can all interfere with idempotence
140 | # in some situations so the default is a static string:
141 | #ansible_managed = Ansible managed
142 | 
143 | # by default, ansible-playbook will display "Skipping [host]" if it determines a task
144 | # should not be run on a host.  Set this to "False" if you don't want to see these "Skipping"
145 | # messages. NOTE: the task header will still be shown regardless of whether or not the
146 | # task is skipped.
147 | display_skipped_hosts = False
148 | 
149 | # by default, if a task in a playbook does not include a name: field then
150 | # ansible-playbook will construct a header that includes the task's action but
151 | # not the task's args.  This is a security feature because ansible cannot know
152 | # if the *module* considers an argument to be no_log at the time that the
153 | # header is printed.  If your environment doesn't have a problem securing
154 | # stdout from ansible-playbook (or you have manually specified no_log in your
155 | # playbook on all of the tasks where you have secret information) then you can
156 | # safely set this to True to get more informative messages.
157 | display_args_to_stdout = False
158 | 
159 | # by default (as of 1.3), Ansible will raise errors when attempting to dereference
160 | # Jinja2 variables that are not set in templates or action lines. Uncomment this line
161 | # to revert the behavior to pre-1.3.
162 | #error_on_undefined_vars = False
163 | 
164 | # by default (as of 1.6), Ansible may display warnings based on the configuration of the
165 | # system running ansible itself. This may include warnings about 3rd party packages or
166 | # other conditions that should be resolved if possible.
167 | # to disable these warnings, set the following value to False:
168 | #system_warnings = True
169 | 
170 | # by default (as of 1.4), Ansible may display deprecation warnings for language
171 | # features that should no longer be used and will be removed in future versions.
172 | # to disable these warnings, set the following value to False:
173 | #deprecation_warnings = True
174 | 
175 | # (as of 1.8), Ansible can optionally warn when usage of the shell and
176 | # command module appear to be simplified by using a default Ansible module
177 | # instead.  These warnings can be silenced by adjusting the following
178 | # setting or adding warn=yes or warn=no to the end of the command line
179 | # parameter string.  This will for example suggest using the git module
180 | # instead of shelling out to the git command.
181 | command_warnings = False
182 | 
183 | 
184 | # set plugin path directories here, separate with colons
185 | #action_plugins     = /usr/share/ansible/plugins/action
186 | #cache_plugins      = /usr/share/ansible/plugins/cache
187 | callback_plugins   = callback_plugins
188 | #connection_plugins = /usr/share/ansible/plugins/connection
189 | #lookup_plugins     = /usr/share/ansible/plugins/lookup
190 | #inventory_plugins  = /usr/share/ansible/plugins/inventory
191 | #vars_plugins       = /usr/share/ansible/plugins/vars
192 | #filter_plugins     = /usr/share/ansible/plugins/filter
193 | #test_plugins       = /usr/share/ansible/plugins/test
194 | #terminal_plugins   = /usr/share/ansible/plugins/terminal
195 | #strategy_plugins   = /usr/share/ansible/plugins/strategy
196 | 
197 | 
198 | # by default, ansible will use the 'linear' strategy but you may want to try
199 | # another one
200 | #strategy = free
201 | 
202 | # by default callbacks are not loaded for /bin/ansible, enable this if you
203 | # want, for example, a notification or logging callback to also apply to
204 | # /bin/ansible runs
205 | #bin_ansible_callbacks = False
206 | 
207 | 
208 | # don't like cows?  that's unfortunate.
209 | # set to 1 if you don't want cowsay support or export ANSIBLE_NOCOWS=1
210 | #nocows = 1
211 | 
212 | # set which cowsay stencil you'd like to use by default. When set to 'random',
213 | # a random stencil will be selected for each task. The selection will be filtered
214 | # against the `cow_whitelist` option below.
215 | #cow_selection = default
216 | #cow_selection = random
217 | 
218 | # when using the 'random' option for cowsay, stencils will be restricted to this list.
219 | # it should be formatted as a comma-separated list with no spaces between names.
220 | # NOTE: line continuations here are for formatting purposes only, as the INI parser
221 | #       in python does not support them.
222 | #cow_whitelist=bud-frogs,bunny,cheese,daemon,default,dragon,elephant-in-snake,elephant,eyes,\
223 | #              hellokitty,kitty,luke-koala,meow,milk,moofasa,moose,ren,sheep,small,stegosaurus,\
224 | #              stimpy,supermilker,three-eyes,turkey,turtle,tux,udder,vader-koala,vader,www
225 | 
226 | # don't like colors either?
227 | # set to 1 if you don't want colors, or export ANSIBLE_NOCOLOR=1
228 | #nocolor = 1
229 | 
230 | # if set to a persistent type (not 'memory', for example 'redis') fact values
231 | # from previous runs in Ansible will be stored.  This may be useful when
232 | # wanting to use, for example, IP information from one group of servers
233 | # without having to talk to them in the same playbook run to get their
234 | # current IP information.
235 | #fact_caching = memory
236 | 
237 | 
238 | # retry files
239 | # When a playbook fails by default a .retry file will be created in ~/
240 | # You can disable this feature by setting retry_files_enabled to False
241 | # and you can change the location of the files by setting retry_files_save_path
242 | 
243 | retry_files_enabled = False
244 | #retry_files_save_path = ~/.ansible-retry
245 | 
246 | # squash actions
247 | # Ansible can optimise actions that call modules with list parameters
248 | # when looping. Instead of calling the module once per with_ item, the
249 | # module is called once with all items at once. Currently this only works
250 | # under limited circumstances, and only with parameters named 'name'.
251 | #squash_actions = apk,apt,dnf,homebrew,pacman,pkgng,yum,zypper
252 | 
253 | # prevents logging of task data, off by default
254 | #no_log = False
255 | 
256 | # prevents logging of tasks, but only on the targets, data is still logged on the master/controller
257 | #no_target_syslog = False
258 | 
259 | # controls whether Ansible will raise an error or warning if a task has no
260 | # choice but to create world readable temporary files to execute a module on
261 | # the remote machine.  This option is False by default for security.  Users may
262 | # turn this on to have behaviour more like Ansible prior to 2.1.x.  See
263 | # https://docs.ansible.com/ansible/become.html#becoming-an-unprivileged-user
264 | # for more secure ways to fix this than enabling this option.
265 | #allow_world_readable_tmpfiles = False
266 | 
267 | # controls the compression level of variables sent to
268 | # worker processes. At the default of 0, no compression
269 | # is used. This value must be an integer from 0 to 9.
270 | #var_compression_level = 9
271 | 
272 | # controls what compression method is used for new-style ansible modules when
273 | # they are sent to the remote system.  The compression types depend on having
274 | # support compiled into both the controller's python and the client's python.
275 | # The names should match with the python Zipfile compression types:
276 | # * ZIP_STORED (no compression. available everywhere)
277 | # * ZIP_DEFLATED (uses zlib, the default)
278 | # These values may be set per host via the ansible_module_compression inventory
279 | # variable
280 | #module_compression = 'ZIP_DEFLATED'
281 | 
282 | # This controls the cutoff point (in bytes) on --diff for files
283 | # set to 0 for unlimited (RAM may suffer!).
284 | #max_diff_size = 1048576
285 | 
286 | # This controls how ansible handles multiple --tags and --skip-tags arguments
287 | # on the CLI.  If this is True then multiple arguments are merged together.  If
288 | # it is False, then the last specified argument is used and the others are ignored.
289 | # This option will be removed in 2.8.
290 | #merge_multiple_cli_flags = True
291 | 
292 | # Controls showing custom stats at the end, off by default
293 | #show_custom_stats = True
294 | 
295 | # Controls which files to ignore when using a directory as inventory with
296 | # possibly multiple sources (both static and dynamic)
297 | #inventory_ignore_extensions = ~, .orig, .bak, .ini, .cfg, .retry, .pyc, .pyo
298 | 
299 | # This family of modules use an alternative execution path optimized for network appliances
300 | # only update this setting if you know how this works, otherwise it can break module execution
301 | #network_group_modules=['eos', 'nxos', 'ios', 'iosxr', 'junos', 'vyos']
302 | 
303 | # This keeps facts from polluting the main namespace as variables.
304 | # Setting to True keeps them under the ansible_facts namespace, the default is False
305 | #restrict_facts_namespace: True
306 | 
307 | # When enabled, this option allows lookups (via variables like {{lookup('foo')}} or when used as
308 | # a loop with `with_foo`) to return data that is not marked "unsafe". This means the data may contain
309 | # jinja2 templating language which will be run through the templating engine.
310 | # ENABLING THIS COULD BE A SECURITY RISK
311 | #allow_unsafe_lookups = False
312 | 
313 | # set default errors for all plays
314 | #any_errors_fatal = False
315 | 
316 | [privilege_escalation]
317 | #become=True
318 | #become_method=sudo
319 | #become_user=root
320 | #become_ask_pass=False
321 | 
322 | [paramiko_connection]
323 | 
324 | # uncomment this line to cause the paramiko connection plugin to not record new host
325 | # keys encountered.  Increases performance on new host additions.  Setting works independently of the
326 | # host key checking setting above.
327 | #record_host_keys=False
328 | 
329 | # by default, Ansible requests a pseudo-terminal for commands executed under sudo. Uncomment this
330 | # line to disable this behaviour.
331 | #pty=False
332 | 
333 | # paramiko will default to looking for SSH keys initially when trying to
334 | # authenticate to remote devices.  This is a problem for some network devices
335 | # that close the connection after a key failure.  Uncomment this line to
336 | # disable the Paramiko look for keys function
337 | #look_for_keys = False
338 | 
339 | # When using persistent connections with Paramiko, the connection runs in a
340 | # background process.  If the host doesn't already have a valid SSH key, by
341 | # default Ansible will prompt to add the host key.  This will cause connections
342 | # running in background processes to fail.  Uncomment this line to have
343 | # Paramiko automatically add host keys.
344 | #host_key_auto_add = True
345 | 
346 | [ssh_connection]
347 | 
348 | # ssh arguments to use
349 | # Leaving off ControlPersist will result in poor performance, so use
350 | # paramiko on older platforms rather than removing it, -C controls compression use
351 | #ssh_args = -C -o ControlMaster=auto -o ControlPersist=60s
352 | 
353 | # The base directory for the ControlPath sockets. 
354 | # This is the "%(directory)s" in the control_path option
355 | # 
356 | # Example: 
357 | # control_path_dir = /tmp/.ansible/cp
358 | #control_path_dir = ~/.ansible/cp
359 | 
360 | # The path to use for the ControlPath sockets. This defaults to a hashed string of the hostname, 
361 | # port and username (empty string in the config). The hash mitigates a common problem users 
362 | # found with long hostames and the conventional %(directory)s/ansible-ssh-%%h-%%p-%%r format. 
363 | # In those cases, a "too long for Unix domain socket" ssh error would occur.
364 | #
365 | # Example:
366 | # control_path = %(directory)s/%%h-%%r
367 | #control_path =
368 | 
369 | # Enabling pipelining reduces the number of SSH operations required to
370 | # execute a module on the remote server. This can result in a significant
371 | # performance improvement when enabled, however when using "sudo:" you must
372 | # first disable 'requiretty' in /etc/sudoers
373 | #
374 | # By default, this option is disabled to preserve compatibility with
375 | # sudoers configurations that have requiretty (the default on many distros).
376 | #
377 | #pipelining = False
378 | 
379 | # Control the mechanism for transferring files (old)
380 | #   * smart = try sftp and then try scp [default]
381 | #   * True = use scp only
382 | #   * False = use sftp only
383 | #scp_if_ssh = smart
384 | 
385 | # Control the mechanism for transferring files (new)
386 | # If set, this will override the scp_if_ssh option
387 | #   * sftp  = use sftp to transfer files
388 | #   * scp   = use scp to transfer files
389 | #   * piped = use 'dd' over SSH to transfer files
390 | #   * smart = try sftp, scp, and piped, in that order [default]
391 | #transfer_method = smart
392 | 
393 | # if False, sftp will not use batch mode to transfer files. This may cause some
394 | # types of file transfer failures impossible to catch however, and should
395 | # only be disabled if your sftp version has problems with batch mode
396 | #sftp_batch_mode = False
397 | 
398 | [persistent_connection]
399 | 
400 | # Configures the persistent connection timeout value in seconds.  This value is
401 | # how long the persistent connection will remain idle before it is destroyed.  
402 | # If the connection doesn't receive a request before the timeout value 
403 | # expires, the connection is shutdown. The default value is 30 seconds.
404 | #connect_timeout = 30
405 | 
406 | # Configures the persistent connection retry timeout.  This value configures the
407 | # the retry timeout that ansible-connection will wait to connect
408 | # to the local domain socket. This value must be larger than the
409 | # ssh timeout (timeout) and less than persistent connection idle timeout (connect_timeout).
410 | # The default value is 15 seconds.
411 | #connect_retry_timeout = 15
412 | 
413 | # The command timeout value defines the amount of time to wait for a command
414 | # or RPC call before timing out. The value for the command timeout must
415 | # be less than the value of the persistent connection idle timeout (connect_timeout)
416 | # The default value is 10 second.
417 | #command_timeout = 10
418 | 
419 | [accelerate]
420 | #accelerate_port = 5099
421 | #accelerate_timeout = 30
422 | #accelerate_connect_timeout = 5.0
423 | 
424 | # The daemon timeout is measured in minutes. This time is measured
425 | # from the last activity to the accelerate daemon.
426 | #accelerate_daemon_timeout = 30
427 | 
428 | # If set to yes, accelerate_multi_key will allow multiple
429 | # private keys to be uploaded to it, though each user must
430 | # have access to the system via SSH to add a new key. The default
431 | # is "no".
432 | #accelerate_multi_key = yes
433 | 
434 | [selinux]
435 | # file systems that require special treatment when dealing with security context
436 | # the default behaviour that copies the existing context or uses the user default
437 | # needs to be changed to use the file system dependent context.
438 | #special_context_filesystems=nfs,vboxsf,fuse,ramfs,9p
439 | 
440 | # Set this to yes to allow libvirt_lxc connections to work without SELinux.
441 | #libvirt_lxc_noseclabel = yes
442 | 
443 | [colors]
444 | #highlight = white
445 | #verbose = blue
446 | #warn = bright purple
447 | #error = red
448 | #debug = dark gray
449 | #deprecate = purple
450 | #skip = cyan
451 | #unreachable = red
452 | #ok = green
453 | #changed = yellow
454 | #diff_add = green
455 | #diff_remove = red
456 | #diff_lines = cyan
457 | 
458 | 
459 | [diff]
460 | # Always print diff when running ( same as always running with -D/--diff )
461 | # always = no
462 | 
463 | # Set how many context lines to show in diff
464 | # context = 3
465 | 


--------------------------------------------------------------------------------
/config.yml:
--------------------------------------------------------------------------------
 1 | manage_packages: true
 2 | mgmt_nic_name: "eth0"
 3 | tenant_nic_name: "eth0"
 4 | provider_nic_name: "eth0"
 5 | external_nic_name: "eth0"
 6 | storage_nic_name: "eth0"
 7 | storage_mgmt_nic_name: "eth0"
 8 | 
 9 | stress_mysql_time: 300
10 | stress_mysql_host: localhost
11 | sysbench_threads: 1000
12 | sysbench_user: sysbench
13 | sysbench_password: sysbench
14 | sysbench_database: sysbench
15 | 
16 | physical_network_bridge: ovsbr3
17 | physical_network_bridge_port: eth0
18 | 
19 | external_network_bridge: br-ex
20 | external_network_bridge_port: eth0
21 | 
22 | rabbit_host: localhost
23 | rabbit_username: guest
24 | rabbit_password: guest
25 | 
26 | # Auth for OpenStack
27 | os_auth_url: 'http://127.0.0.1:5000/v3'
28 | os_project_domain_name: Default
29 | os_user_domain_name: Default
30 | os_admin_project: admin
31 | os_admin_username: admin
32 | os_admin_password: admin
33 | os_gremlin_role: member
34 | 


--------------------------------------------------------------------------------
/doc/README.rst:
--------------------------------------------------------------------------------
 1 | =================
 2 | Building the docs
 3 | =================
 4 | 
 5 | Dependencies
 6 | ============
 7 | 
 8 | Sphinx_
 9 |   You'll need sphinx (the python one) and if you are
10 |   using the virtualenv you'll need to install it in the virtualenv
11 |   specifically so that it can load the cinder modules.
12 | 
13 |   ::
14 | 
15 |     pip install Sphinx
16 | 
17 | Graphviz_
18 |   Some of the diagrams are generated using the ``dot`` language
19 |   from Graphviz.
20 | 
21 |   ::
22 | 
23 |     sudo apt-get install graphviz
24 | 
25 | .. _Sphinx: http://sphinx.pocoo.org
26 | 
27 | .. _Graphviz: http://www.graphviz.org/
28 | 
29 | 
30 | Use `make`
31 | ==========
32 | 
33 | Just type make::
34 | 
35 |   % make
36 | 
37 | Look in the Makefile for more targets.
38 | 
39 | 
40 | Manually
41 | ========
42 | 
43 |   1. Generate the code.rst file so that Sphinx will pull in our docstrings::
44 |      
45 |       % ./generate_autodoc_index.sh > source/code.rst
46 | 
47 |   2. Run `sphinx_build`::
48 | 
49 |       % sphinx-build -b html source build/html
50 | 
51 | 
52 | The docs have been built
53 | ========================
54 | 
55 | Check out the `build` directory to find them. Yay!
56 | 


--------------------------------------------------------------------------------
/doc/generate_autodoc_index.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | SOURCEDIR=doc/source/api
 4 | 
 5 | if [ ! -d ${SOURCEDIR} ] ; then
 6 |     mkdir -p ${SOURCEDIR}
 7 | fi
 8 | 
 9 | for x in `./doc/find_autodoc_modules.sh`;
10 | do
11 |   echo "Generating ${SOURCEDIR}/${x}.rst"
12 |   echo "${SOURCEDIR}/${x}.rst" >> .autogenerated 
13 |   heading="The :mod:\`${x}\` Module"
14 |   # Figure out how long the heading is
15 |   # and make sure to emit that many '=' under
16 |   # it to avoid heading format errors
17 |   # in Sphinx.
18 |   heading_len=$(echo "$heading" | wc -c)
19 |   underline=$(head -c $heading_len < /dev/zero | tr '\0' '=')
20 |   ( cat <<EOF
21 | ${heading}
22 | ${underline}
23 | 
24 | .. automodule:: ${x}
25 |   :members:
26 |   :undoc-members:
27 |   :show-inheritance:
28 | EOF
29 | ) > ${SOURCEDIR}/${x}.rst
30 | 
31 | done
32 | 
33 | if [ ! -f ${SOURCEDIR}/autoindex.rst ] ; then
34 | 
35 |     cat > ${SOURCEDIR}/autoindex.rst <<EOF 
36 | .. toctree::
37 |    :maxdepth: 1
38 | 
39 | EOF
40 |     for f in `cat .autogenerated | sort` ; do
41 |         relative=`echo ${f} | sed -e 's$^'${SOURCEDIR}'/$$'`
42 |         echo "   ${relative}" >> ${SOURCEDIR}/autoindex.rst
43 |     done
44 | 
45 |     echo ${SOURCEDIR}/autoindex.rst >> .autogenerated
46 | fi
47 | 


--------------------------------------------------------------------------------
/drill.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/bash
  2 | 
  3 | # With LANG set to everything else than C completely undercipherable errors
  4 | # like "file not found" and decoding errors will start to appear during scripts
  5 | # or even ansible modules
  6 | LANG=C
  7 | 
  8 | GREM_DIR=$(dirname $( readlink -f "${BASH_SOURCE[0]}" ))
  9 | DEFAULT_OPT_TAGS="untagged"
 10 | 
 11 | 
 12 | : ${OPT_TAGS:=$DEFAULT_OPT_TAGS}
 13 | : ${OPT_PLAYBOOK:=$GREM_DIR/playbooks/drill.yml}
 14 | : ${OPT_WORKDIR:=$GREM_DIR/.gremlin}
 15 | : ${OPT_CONFIG:=$GREM_DIR/config.yml}
 16 | : ${OPT_MODE:=auto}
 17 | 
 18 | 
 19 | install_deps () {
 20 |     sudo yum -y install epel-release
 21 |     sudo yum clean all
 22 |     sudo yum makecache
 23 |     sudo yum -y install ansible git
 24 | }
 25 | 
 26 | usage () {
 27 |     echo "Usage: $0 --install-deps"
 28 |     echo "                      install quickstart package dependencies and exit"
 29 |     echo ""
 30 |     echo "Usage: $0 [options]"
 31 |     echo ""
 32 |     echo "Basic options:"
 33 |     echo "  -p, --playbook <file>"
 34 |     echo "                      playbook to run(default=$OPT_PLAYBOOK)"
 35 |     echo "  -i, --inventory <file>"
 36 |     echo "                      specify inventory host path"
 37 |     echo "                      (default=./inventory/hosts) or comma separated host list"
 38 |     echo "  -c, --config <file>"
 39 |     echo "                      specify the config file that contains the node"
 40 |     echo "                      configuration, can be used only once"
 41 |     echo "                      (default=$OPT_CONFIG)"
 42 |     echo "  -m, --mode <mode>"
 43 |     echo "                      specify mode to run, there are two modes: manual, auto"
 44 |     echo "                      (default=$OPT_MODE)"
 45 |     echo "  -S, --step"
 46 |     echo "                      execute playbooks or tasks step by step"
 47 |     echo "  --syntax-check"
 48 |     echo "                      perform a syntax check on the playbook, but do not"
 49 |     echo "                      execute it"
 50 |     echo ""
 51 |     echo "Advanced options:"
 52 |     echo "  -v, --ansible-debug"
 53 |     echo "                      invoke ansible-playbook with -vvvv"
 54 |     echo "  -e, --extra-vars <key>=<value>"
 55 |     echo "                      additional ansible variables, can be used multiple times"
 56 |     echo "  -t, --tags <tag1>[,<tag2>,...]"
 57 |     echo "                      only run plays and tasks tagged with these values,"
 58 |     echo "                      specify 'all' to run everything"
 59 |     echo "                      (default=$OPT_TAGS)"
 60 |     echo "  -s, --skip-tags <tag1>[,<tag2>,...]"
 61 |     echo "                      only run plays and tasks whose tags do"
 62 |     echo "                      not match these values"
 63 |     echo "  -w, --working-dir <dir>"
 64 |     echo "                      directory where the inventory, config files, etc."
 65 |     echo "                      are created (default=$OPT_WORKDIR)"
 66 |     echo "  -h, --help          print this help and exit"
 67 | }
 68 | 
 69 | OPT_VARS=()
 70 | 
 71 | while [ "x$1" != "x" ]; do
 72 |     case "$1" in
 73 |         --install-deps)
 74 |             OPT_INSTALL_DEPS=1
 75 |             ;;
 76 |         --inventory|-i)
 77 |             OPT_INVENTORY=$2
 78 |             shift
 79 |             ;;
 80 |         --playbook|-p)
 81 |             OPT_PLAYBOOK=$2
 82 |             shift
 83 |             ;;
 84 |         --extra-vars|-e)
 85 |             OPT_VARS+=("-e")
 86 |             OPT_VARS+=("$2")
 87 |             shift
 88 |             ;;
 89 |         --config|-c)
 90 |             OPT_CONFIG=$2
 91 |             shift
 92 |             ;;
 93 |         --mode|-m)
 94 |             OPT_MODE=$2
 95 |             shift
 96 |             ;;
 97 |         --step|-s)
 98 |             OPT_STEP=1
 99 |             ;;
100 |         --syntax-check)
101 |             OPT_SYNTAX_CHECK=1
102 |             ;;
103 |         --ansible-debug|-v)
104 |             OPT_DEBUG_ANSIBLE=1
105 |             ;;
106 |         --tags|-t)
107 |             OPT_TAGS=$2
108 |             shift
109 |             ;;
110 |         --skip-tags|-S)
111 |             OPT_SKIP_TAGS=$2
112 |             shift
113 |             ;;
114 |         --working-dir|-w)
115 |             OPT_WORKDIR=$(realpath $2)
116 |             shift
117 |             ;;
118 |         --help|-h)
119 |             usage
120 |             exit
121 |             ;;
122 |         --)
123 |             shift
124 |             break
125 |             ;;
126 |         *)
127 |             break
128 |             ;;
129 |     esac
130 |     shift
131 | done
132 | 
133 | 
134 | if [ "$OPT_INSTALL_DEPS" = 1 ]; then
135 |     echo "NOTICE: installing dependencies"
136 |     install_deps
137 |     exit $?
138 | fi
139 | 
140 | 
141 | if [ "$#" -gt 2 ]; then
142 |     usage >&2
143 |     exit 2
144 | fi
145 | 
146 | 
147 | set -ex
148 | 
149 | export ANSIBLE_CONFIG=$GREM_DIR/ansible.cfg
150 | export ANSIBLE_INVENTORY=$GREM_DIR/inventory/hosts
151 | 
152 | if [ "$OPT_DEBUG_ANSIBLE" = 1 ]; then
153 |     VERBOSITY=vvvv
154 | else
155 |     VERBOSITY=vv
156 | fi
157 | 
158 | ansible-playbook -$VERBOSITY $OPT_PLAYBOOK \
159 |     -e @$OPT_CONFIG \
160 |     -e local_working_dir=$OPT_WORKDIR \
161 |     -e mode=$OPT_MODE \
162 |     ${OPT_VARS[@]} \
163 |     ${OPT_INVENTORY:+-i $OPT_INVENTORY} \
164 |     ${OPT_TAGS:+-t $OPT_TAGS} \
165 |     ${OPT_SKIP_TAGS:+--skip-tags $OPT_SKIP_TAGS} \
166 |     ${OPT_STEP:+--step} \
167 |     ${OPT_SYNTAX_CHECK:+--syntax-check}
168 | 
169 | set +x
170 | 


--------------------------------------------------------------------------------
/files/get-random-osd.py:
--------------------------------------------------------------------------------
  1 | #!/use/bin/env python
  2 | 
  3 | from __future__ import print_function
  4 | 
  5 | import subprocess
  6 | import json
  7 | import socket
  8 | import random
  9 | import time
 10 | import argparse
 11 | 
 12 | bucket_id_map = {}
 13 | host_osd_map = {}
 14 | osd_host_map = {}
 15 | pg_map = {}
 16 | 
 17 | def init_osd_map():
 18 | 
 19 |     CMD = ['ceph', 'osd', 'crush', 'dump', '--format', 'json']
 20 | 
 21 |     ceph_osd_crush_dump_json = subprocess.check_output(CMD)
 22 |     ceph_osd_crush_dump = json.loads(ceph_osd_crush_dump_json)
 23 |     
 24 |     global bucket_id_map
 25 |     for bucket in ceph_osd_crush_dump['buckets']:
 26 |         bucket_id_map[bucket['id']] = dict({'type': bucket['type_name'],
 27 |                                          'name': bucket['name'],
 28 |                                          'items': [ item['id'] for item in bucket['items']]})
 29 | 
 30 |     host_id_list = [ item for item in bucket_id_map.keys()
 31 |                      if bucket_id_map[item]['type'] == 'host']
 32 |     global host_osd_map
 33 |     for item in host_id_list:
 34 |         host_name = bucket_id_map[item]['name']
 35 |         host_ip = socket.gethostbyname(host_name)
 36 |         host_osds = bucket_id_map[item]['items']
 37 |         host_osd_map[host_name] = dict({'mgmt': host_ip,
 38 |                                         'id':  item,
 39 |                                         'osds': host_osds})
 40 | 
 41 |     global osd_host_map
 42 |     for host in host_osd_map.keys():
 43 |         for osd in host_osd_map[host]['osds']:
 44 |             osd_name = ceph_osd_crush_dump['devices'][osd]['name']
 45 |             osd_mgmt = host_osd_map[host]['mgmt']
 46 |             osd_host_map[osd] = dict({'host': host,
 47 |                                       'name': osd_name,
 48 |                                       'mgmt': osd_mgmt})
 49 | 
 50 | def init_pg_map():
 51 | 
 52 |     CMD = ['ceph', 'pg', 'dump', '--format', 'json']
 53 | 
 54 |     ceph_pg_dump_json = subprocess.check_output(CMD)
 55 |     ceph_pg_dump = json.loads(ceph_pg_dump_json)
 56 |    
 57 |     global pg_map
 58 |     for item in ceph_pg_dump['pg_stats']:
 59 |         pg_id = item['pgid']
 60 |         pg_map[pg_id] = item['acting']
 61 | 
 62 | def get_random_osd(map_data, num=1):
 63 | 
 64 |     random_osd = {}
 65 | 
 66 |     random.seed(time.time())
 67 |     try:
 68 |         random_osd_list = random.sample(map_data.keys(), num)
 69 |     except ValueError:
 70 |         print("The total osds is {TOTAL}".format(TOTAL=len(map_data.keys()))) 
 71 |         random_osd_list = map_data.keys()
 72 |     random_osd['size'] = len(random_osd_list)
 73 |     random_osd['items'] = {}
 74 | 
 75 |     for osd in random_osd_list:
 76 |         random_osd['items'][osd] = map_data[osd]
 77 | 
 78 |     return random_osd  
 79 | 
 80 | def get_random_osd_from_pg(pg_map, osd_map):
 81 | 
 82 |     random_osd = {}
 83 | 
 84 |     random_pg_id = random.choice(pg_map.keys())
 85 |     random_osd['pgid'] = random_pg_id
 86 | 
 87 |     random_osd_list = pg_map[random_pg_id]
 88 |     random_osd['size'] = len(random_osd_list)
 89 |     random_osd['items'] = {}
 90 | 
 91 |     for osd in random_osd_list:
 92 |         random_osd['items'][osd] = osd_map[osd]
 93 | 
 94 |     return random_osd
 95 | 
 96 | def reformat_mgmt_osd_map(osd_map):
 97 | 
 98 |     mgmt_osd_map = {}
 99 | 
100 |     for osd in osd_map:
101 |         mgmt_addr = osd_map[osd]['mgmt']
102 |         if mgmt_addr not in mgmt_osd_map.keys():
103 |             mgmt_osd_map[mgmt_addr] = []
104 |         mgmt_osd_map[mgmt_addr].append(osd)
105 | 
106 |     return mgmt_osd_map
107 | 
108 | def output_data(data, output_format='plain', filename=None):
109 | 
110 |     if filename:
111 |         if output_format == 'plain':
112 |             file_handler = open(filename, 'w')
113 |             file_handler.write(data)  
114 |         elif output_format == 'json':
115 |             file_handler = open(filename + '.json', 'w')
116 |             file_handler.write(json.dumps(data))
117 |         elif output_format == 'json-pretty':
118 |             file_handler = open(filename + '.json', 'w')
119 |             file_handler.write(json.dumps(data, indent=2))
120 |         else:
121 |             print("Unsupport this {FORMAT} format".format(FORMAT=output_format))
122 |         file_handler.close()
123 |     else:
124 |         if output_format == 'plain':
125 |             print(data)
126 |         elif output_format == 'json':
127 |             print(json.dumps(data))
128 |         elif output_format == 'json-pretty':
129 |             print(json.dumps(data, indent=2))
130 |         else:
131 |             print("Unsupport this {FORMAT} format!".format(FORMAT=output_format))
132 | 
133 | def init_argument(parser):
134 | 
135 |     parser.add_argument('-n', '--number', nargs=1, type=int)
136 |     parser.add_argument('-p', '--percentage', nargs=1, type=int)
137 |     parser.add_argument('--list-host-map', action='store_true')
138 |     parser.add_argument('--list-osd-map', action='store_true')
139 |     parser.add_argument('--list-pg-map', action='store_true')
140 |     parser.add_argument('-F', '--format', nargs=1)
141 |     parser.add_argument('-f', '--file', nargs=1)
142 |     parser.add_argument('--get-random-osd', action='store_true')
143 |     parser.add_argument('--get-pg-osd', action='store_true')
144 |     parser.add_argument('--mgmt-osd', action='store_true')
145 | 
146 |     args = parser.parse_args()
147 | 
148 |     return args
149 | 
150 | def take_action(args):
151 | 
152 |     if isinstance(args.format, list):
153 |         output_format = args.format[0]
154 |     else:
155 |         output_format = 'plain'
156 | 
157 |     if isinstance(args.file, list):
158 |         output_filename = args.file[0]
159 |     else:
160 |         output_filename = None
161 | 
162 |     if isinstance(args.percentage, list):
163 |         percentage = args.percentage[0]
164 |         osd_number = int(len(osd_host_map.keys()) * (percentage / 100.0))
165 |     elif isinstance(args.number, list):
166 |         osd_number = args.number[0]
167 |     else:
168 |         osd_number = 1
169 | 
170 |     if args.mgmt_osd:
171 |         mgmt_osd_enabled = True
172 |     else:
173 |         mgmt_osd_enabled = False
174 | 
175 |     if args.list_host_map:
176 |         output_data(host_osd_map, output_format, output_filename)
177 | 
178 |     if args.list_osd_map:
179 |         if mgmt_osd_enabled:
180 |             output_map = reformat_mgmt_osd_map(osd_host_map)
181 |         else:
182 |             output_map = osd_host_map
183 |         output_data(output_map, output_format, output_filename)
184 | 
185 |     if args.list_pg_map:
186 |         output_data(pg_map, output_format, output_filename)
187 | 
188 |     if args.get_random_osd:
189 |         random_osd = get_random_osd(osd_host_map, osd_number)
190 |         if mgmt_osd_enabled:
191 |             output_map_temp = reformat_mgmt_osd_map(random_osd['items'])
192 |             output_map = dict({'size': random_osd['size'],
193 |                                'items': output_map_temp})
194 |         else:
195 |             output_map = random_osd
196 |         output_data(output_map, output_format, output_filename)
197 | 
198 |     if args.get_pg_osd:
199 |         random_osd = get_random_osd_from_pg(pg_map, osd_host_map)
200 |         if mgmt_osd_enabled:
201 |             osd_list = random_osd['items'].keys()
202 |             output_list = []
203 |             for num in range(1,random_osd['size'] + 1):
204 |                 random_osd_temp = {}
205 |                 for osd in osd_list[:num]:
206 |                     random_osd_temp[osd] = random_osd['items'][osd]
207 |                 output_list.append(reformat_mgmt_osd_map(random_osd_temp))
208 |             output_map = dict({'size': random_osd['size'],
209 |                                'items': output_list})
210 |         else:
211 |             output_map = random_osd
212 |         output_data(output_map, output_format, output_filename)
213 |         
214 | if __name__ == '__main__':
215 | 
216 |     init_osd_map()
217 |     init_pg_map()
218 | 
219 |     parser = argparse.ArgumentParser()
220 |     args = init_argument(parser)
221 | 
222 |     take_action(args)
223 | 


--------------------------------------------------------------------------------
/gremlin.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/unitedstack/gremlin/017fe09d80040019df7ed387bf1001114944f4c2/gremlin.png


--------------------------------------------------------------------------------
/inventory/group_vars/all:
--------------------------------------------------------------------------------
 1 | manage_packages: true
 2 | mgmt_nic_name: "eth0"
 3 | tenant_nic_name: "eth0"
 4 | provider_nic_name: "eth0"
 5 | external_nic_name: "eth0"
 6 | storage_nic_name: "eth0"
 7 | storage_mgmt_nic_name: "eth0"
 8 | 
 9 | stress_mysql_time: 300
10 | stress_mysql_host: localhost
11 | sysbench_threads: 1000
12 | sysbench_user: sysbench
13 | sysbench_password: sysbench
14 | sysbench_database: sysbench
15 | 
16 | physical_network_bridge: ovsbr3
17 | physical_network_bridge_port: eth0
18 | 
19 | external_network_bridge: br-ex
20 | external_network_bridge_port: eth0
21 | 
22 | case_prefix_map:
23 |   ctl: control
24 |   com: compute
25 |   net: network
26 |   sto: storage
27 |   sys: system
28 | 


--------------------------------------------------------------------------------
/inventory/hosts:
--------------------------------------------------------------------------------
 1 | # ------------------------------------------
 2 | # # High-level hostgroups
 3 | # #
 4 | # # Add hosts to these groups (ideally by creating an additional inventory
 5 | # # file in the inventory directory, rather than editing this file) to
 6 | # # set up typical groups of services.
 7 | 
 8 | myip ansible_connection=local ansible_become=true
 9 | 
10 | # Node role
11 | [api]
12 | myip
13 | 
14 | [db]
15 | myip
16 | 
17 | [mq]
18 | myip
19 | 
20 | [mc]
21 | myip
22 | 
23 | [lb]
24 | myip
25 | 
26 | [mon]
27 | myip
28 | 
29 | [osd]
30 | myip
31 | 
32 | [rgw]
33 | myip
34 | 
35 | 
36 | # Node role group
37 | [control:children]
38 | api
39 | db
40 | mq
41 | mc
42 | lb
43 | 
44 | [network]
45 | myip
46 | 
47 | [compute]
48 | myip
49 | 
50 | [storage:children]
51 | mon
52 | osd
53 | rgw
54 | 


--------------------------------------------------------------------------------
/inventory/structure:
--------------------------------------------------------------------------------
 1 | # ------------------------------------------
 2 | # # High-level hostgroups
 3 | # #
 4 | # # Add hosts to these groups (ideally by creating an additional inventory
 5 | # # file in the inventory directory, rather than editing this file) to
 6 | # # set up typical groups of services.
 7 | 
 8 | myip ansible_connection=local ansible_become=true
 9 | 
10 | # Node role
11 | [api]
12 | myip
13 | 
14 | [db]
15 | myip
16 | 
17 | [mq]
18 | myip
19 | 
20 | [mc]
21 | myip
22 | 
23 | [lb]
24 | myip
25 | 
26 | [mon]
27 | myip
28 | 
29 | [osd]
30 | myip
31 | 
32 | [rgw]
33 | myip
34 | 
35 | 
36 | # Node role group
37 | [control:children]
38 | api
39 | db
40 | mq
41 | mc
42 | lb
43 | 
44 | [network]
45 | myip
46 | 
47 | [compute]
48 | myip
49 | 
50 | [storage:children]
51 | mon
52 | osd
53 | rgw
54 | 


--------------------------------------------------------------------------------
/playbooks/case.yml:
--------------------------------------------------------------------------------
 1 | # This is the entry point of case, you can specify the
 2 | # case number to be executed.
 3 | 
 4 | # The case number will be prefixed with the following short name:
 5 | 
 6 | # ctl: stands for control services releated case
 7 | # com: stands for compute services releated case
 8 | # net: stands for network services releated case
 9 | # sto: stands for storage services releated case
10 | # sys: stands for system services releated case
11 | 
12 | - name: Parse the case number to playbook path
13 |   hosts: localhost
14 |   connection: local
15 |   gather_facts: false
16 |   tasks:
17 |     - name: Parse cid to get case and id
18 |       set_fact:
19 |         case: "{{ case_prefix_map[cid.split('-')[0]] }}"
20 |         id: "{{ cid.split('-')[1] }}"
21 |       when:
22 |         - cid.split('-') | length == 2
23 | 
24 |     - name: Validate CID
25 |       fail:
26 |         msg: "Wrong cid: {{ cid }}, the cid format should be like: net-001"
27 |       when:
28 |         - case is not defined or id is not defined
29 | 
30 |     - name: Generate case path
31 |       set_fact:
32 |         case_path: "cases/{{ case }}/{{ id }}.yml"
33 | 
34 |     - name: Check if case palybook is existed
35 |       stat:
36 |         path: "{{ case_path }}"
37 |       register: state_result
38 | 
39 |     - name: Exit if cast path does not exist
40 |       fail:
41 |         msg: "The case {{ case_path }} does not exist, please check it"
42 |       when:
43 |         - state_result.stat.exists == false
44 | 
45 |     - name: Print the case path
46 |       debug:
47 |           msg: The {{ case_path }} will be executed
48 | 


--------------------------------------------------------------------------------
/playbooks/cases/compute/001.yml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/unitedstack/gremlin/017fe09d80040019df7ed387bf1001114944f4c2/playbooks/cases/compute/001.yml


--------------------------------------------------------------------------------
/playbooks/cases/control/001.yml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/unitedstack/gremlin/017fe09d80040019df7ed387bf1001114944f4c2/playbooks/cases/control/001.yml


--------------------------------------------------------------------------------
/playbooks/cases/network/001.yml:
--------------------------------------------------------------------------------
1 | - name: "Network Case 001: Router unavailable when host in neutron changed"
2 |   hosts: networker
3 |   gather_facts: false
4 |   tasks:
5 |     - name: Test
6 |       debug:
7 |           msg: "The case 001 is executing"
8 | 


--------------------------------------------------------------------------------
/playbooks/cases/storage/001.yml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/unitedstack/gremlin/017fe09d80040019df7ed387bf1001114944f4c2/playbooks/cases/storage/001.yml


--------------------------------------------------------------------------------
/playbooks/cases/system/001.yml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/unitedstack/gremlin/017fe09d80040019df7ed387bf1001114944f4c2/playbooks/cases/system/001.yml


--------------------------------------------------------------------------------
/playbooks/common/ask.yml:
--------------------------------------------------------------------------------
 1 | - name: Ask if recover this fault when in manual mode
 2 |   hosts: localhost
 3 |   gather_facts: false
 4 |   vars:
 5 |     execute: "{{ hostvars['localhost']['execute'] }}"
 6 |   tasks:
 7 |     - set_fact:
 8 |         recovery: true
 9 |     - block:
10 |         - pause:
11 |             prompt: "Will recover this fault? (y|yes, n|no)"
12 |           register: _recovery
13 |         - set_fact:
14 |             recovery: "{{ _recovery.user_input }}"
15 |       when:
16 |         - mode == "manual"
17 |         - execute == "y" or execute == true
18 | 


--------------------------------------------------------------------------------
/playbooks/common/create_auth.yml:
--------------------------------------------------------------------------------
1 | - name: Create Auth
2 |   hosts: "{{ api_host }}"
3 |   gather_facts: false
4 |   tasks:
5 |     - include_role:
6 |         name: provision/os_auth
7 |         tasks_from: create_auth
8 | 


--------------------------------------------------------------------------------
/playbooks/common/next.yml:
--------------------------------------------------------------------------------
 1 | - name: "{{ case_name }}"
 2 |   hosts: localhost
 3 |   gather_facts: false
 4 |   tasks:
 5 |     - set_fact:
 6 |         execute: true
 7 |     - block:
 8 |         - pause:
 9 |             prompt: "Will execute this case? (y|yes, n|no)"
10 |           register: _execute
11 |         - set_fact:
12 |             execute: "{{ _execute.user_input }}"
13 |       when:
14 |         - mode == "manual"
15 | 


--------------------------------------------------------------------------------
/playbooks/common/port.yml:
--------------------------------------------------------------------------------
 1 | # This playbook will do specified port drill test cases, and if
 2 | # in manual mode, it will prompt to ask if you want to recover it.
 3 | 
 4 | - include: next.yml
 5 |   vars:
 6 |     case_name: "CASE: Delete {{ ovs_port }} port from {{ ovs_bridge }} on {{ random_hosts }}"
 7 | 
 8 | - name: "Drill the case"
 9 |   hosts: "{{ random_hosts }}"
10 |   gather_facts: false
11 |   vars:
12 |     execute: "{{ hostvars['localhost']['execute'] }}"
13 |   tasks:
14 |     - include_role:
15 |         name: common
16 |         tasks_from: port_del
17 |       when:
18 |         - execute == "y" or execute == true
19 | 
20 | - include: ask.yml
21 | 
22 | - name: "Recover the fault"
23 |   hosts: "{{ random_hosts }}"
24 |   gather_facts: false
25 |   vars:
26 |     execute: "{{ hostvars['localhost']['execute'] }}"
27 |     recovery: "{{ hostvars['localhost']['recovery'] }}"
28 |   tasks:
29 |     - include_role:
30 |         name: common
31 |         tasks_from: port_add
32 |       when:
33 |         - mode == "auto" or recovery == "y" or recovery == true
34 |         - execute == "y" or execute == true
35 | 


--------------------------------------------------------------------------------
/playbooks/common/remove_auth.yml:
--------------------------------------------------------------------------------
1 | - name: Remove Auth
2 |   hosts: "{{ api_host }}"
3 |   gather_facts: false
4 |   tasks:
5 |     - include_role:
6 |         name: provision/os_auth
7 |         tasks_from: remove_auth
8 | 


--------------------------------------------------------------------------------
/playbooks/common/service.yml:
--------------------------------------------------------------------------------
 1 | # This playbook will do specified service drill test cases, and if
 2 | # in manual mode, it will prompt to ask if you want to recover it.
 3 | 
 4 | - include: next.yml
 5 |   vars:
 6 |     case_name: "CASE: Stop {{ service_name }} service on {{ random_hosts }}"
 7 | 
 8 | - name: "Drill the case"
 9 |   hosts: "{{ random_hosts }}"
10 |   gather_facts: false
11 |   vars:
12 |     execute: "{{ hostvars['localhost']['execute'] }}"
13 |   tasks:
14 |     - include_role:
15 |         name: common
16 |         tasks_from: stop_service
17 |       when:
18 |         - execute == "y" or execute == true
19 | 
20 | - include: ask.yml
21 | 
22 | - name: "Recover the fault"
23 |   hosts: "{{ random_hosts }}"
24 |   gather_facts: false
25 |   vars:
26 |     execute: "{{ hostvars['localhost']['execute'] }}"
27 |     recovery: "{{ hostvars['localhost']['recovery'] }}"
28 |   tasks:
29 |     - include_role:
30 |         name: common
31 |         tasks_from: start_service
32 |       when:
33 |         - mode == "auto" or recovery == "y" or recovery == true
34 |         - execute == "y" or execute == true
35 | 


--------------------------------------------------------------------------------
/playbooks/compute/service.yml:
--------------------------------------------------------------------------------
 1 | ## Service Level
 2 | 
 3 | # Stop and start nova releated services on compute node
 4 | - include: ../common/service.yml
 5 |   vars:
 6 |     random_hosts: random_one_compute_host
 7 |     service_name: openstack-nova-compute
 8 |   tags:
 9 |     - nova
10 |     - nova-compute-down
11 | 
12 | 
13 | # Stop and start libvirt releated services on compute node
14 | - include: ../common/service.yml
15 |   vars:
16 |     random_hosts: random_one_compute_host
17 |     service_name: libvirtd
18 |   tags:
19 |     - libvirtd
20 |     - libvirtd-down
21 | 
22 | 
23 | # Stop and start Neutron Open vSwitch agent services on compute node
24 | - include: ../common/service.yml
25 |   vars:
26 |     random_hosts: random_one_compute_host
27 |     service_name: neutron-openvswitch-agent
28 |   tags:
29 |     - ovs-agent
30 |     - compute-ovs-agent-down
31 | 
32 | 
33 | # Stop and start Open vSwitch services on compute node
34 | - include: ../common/service.yml
35 |   vars:
36 |     random_hosts: random_one_compute_host
37 |     service_name: openvswitch-nonetwork
38 |   tags:
39 |     - ovs
40 |     - compute-ovs-down
41 | 


--------------------------------------------------------------------------------
/playbooks/compute/system.yml:
--------------------------------------------------------------------------------
 1 | # System Level
 2 | - include: ../system/base.yml
 3 |   vars:
 4 |     random_hosts: random_one_compute_host
 5 |     node_group: compute
 6 | 
 7 | - include: ../system/nic.yml
 8 |   vars:
 9 |     random_hosts: random_one_compute_host
10 |     node_group: compute
11 |     nic_type: tenant
12 |     nic: "{{ tenant_nic_name }}"
13 |   tags:
14 |     - compute-tenant-nic
15 | 
16 | - include: ../system/nic.yml
17 |   vars:
18 |     random_hosts: random_one_compute_host
19 |     node_group: compute
20 |     nic_type: provider
21 |     nic: "{{ provider_nic_name }}"
22 |   tags:
23 |     - compute-provider-nic
24 | 
25 | - include: ../system/nic.yml
26 |   vars:
27 |     random_hosts: random_one_compute_host
28 |     node_group: compute
29 |     nic_type: storage
30 |     nic: "{{ storage_nic_name }}"
31 |   tags:
32 |     - compute-storage-nic
33 | 


--------------------------------------------------------------------------------
/playbooks/control/cinder/service.yml:
--------------------------------------------------------------------------------
 1 | # Stop and start cinder releated services on control node
 2 | 
 3 | # cinder-api
 4 | - include: ../../common/service.yml
 5 |   vars:
 6 |     random_hosts: random_one_api_host
 7 |     service_name: openstack-cinder-api
 8 | 
 9 | - include: ../../common/service.yml
10 |   vars:
11 |     random_hosts: random_two_api_hosts
12 |     service_name: openstack-cinder-api
13 | 
14 | - include: ../../common/service.yml
15 |   vars:
16 |     random_hosts: random_three_api_hosts
17 |     service_name: openstack-cinder-api
18 | 
19 | # cinder-volume
20 | - include: ../../common/service.yml
21 |   vars:
22 |     random_hosts: random_one_api_host
23 |     service_name: openstack-cinder-volume
24 | 
25 | - include: ../../common/service.yml
26 |   vars:
27 |     random_hosts: random_two_api_hosts
28 |     service_name: openstack-cinder-volume
29 | 
30 | - include: ../../common/service.yml
31 |   vars:
32 |     random_hosts: random_three_api_hosts
33 |     service_name: openstack-cinder-volume
34 | 
35 | # cinder-scheduler
36 | - include: ../../common/service.yml
37 |   vars:
38 |     random_hosts: random_one_api_host
39 |     service_name: openstack-cinder-scheduler
40 | 
41 | - include: ../../common/service.yml
42 |   vars:
43 |     random_hosts: random_two_api_hosts
44 |     service_name: openstack-cinder-scheduler
45 | 
46 | - include: ../../common/service.yml
47 |   vars:
48 |     random_hosts: random_three_api_hosts
49 |     service_name: openstack-cinder-scheduler
50 | 


--------------------------------------------------------------------------------
/playbooks/control/drill_api.yml:
--------------------------------------------------------------------------------
 1 | - name: Preparing for api node tests
 2 |   hosts: localhost
 3 |   connection: local
 4 |   gather_facts: false
 5 |   vars:
 6 |     shuffle_api_hosts: "{{ groups['api'] | shuffle }}"
 7 |     api_group_num: "{{ groups['api'] | length }}"
 8 |   tasks:
 9 |     - name: Random select one api host
10 |       add_host:
11 |         groups: random_one_api_host
12 |         name: "{{ groups['api'] | random }}"
13 |       when:
14 |         - api_group_num | int >= 1
15 | 
16 |     - name: Random select two api hosts
17 |       add_host:
18 |         groups: random_two_api_hosts
19 |         name: "{{ item }}"
20 |       with_items:
21 |         - "{{ shuffle_api_hosts[:2] }}"
22 |       when:
23 |         - api_group_num | int >= 2
24 | 
25 |     - name: Random select three api hosts
26 |       add_host:
27 |         groups: random_three_api_hosts
28 |         name: "{{ item }}"
29 |       with_items:
30 |         - "{{ shuffle_api_hosts[:3] }}"
31 |       when:
32 |         - api_group_num | int >= 3
33 |   tags:
34 |     - api-pre
35 | 
36 | ## Service Level
37 | 
38 | # Nova
39 | - include: nova/service.yml
40 |   tags:
41 |     - service
42 |     - api
43 |     - api-service
44 |     - control
45 |     - nova
46 | 
47 | # Cinder
48 | - include: cinder/service.yml
49 |   tags:
50 |     - service
51 |     - api
52 |     - api-service
53 |     - control
54 |     - cinder
55 | 
56 | # Glance
57 | - include: glance/service.yml
58 |   tags:
59 |     - service
60 |     - api
61 |     - api-service
62 |     - control
63 |     - glance
64 | 
65 | # Neutron
66 | - include: neutron/service.yml
67 |   tags:
68 |     - service
69 |     - api
70 |     - api-service
71 |     - control
72 |     - neutron
73 | 
74 | # Keystone
75 | - include: keystone/service.yml
76 |   tags:
77 |     - service
78 |     - api
79 |     - api-service
80 |     - control
81 |     - keystone
82 | 


--------------------------------------------------------------------------------
/playbooks/control/drill_db.yml:
--------------------------------------------------------------------------------
 1 | - name: Preparing for db node tests
 2 |   hosts: localhost
 3 |   connection: local
 4 |   gather_facts: false
 5 |   vars:
 6 |     shuffle_db_hosts: "{{ groups['db'] | shuffle }}"
 7 |     db_group_num: "{{ groups['db'] | length }}"
 8 |   tasks:
 9 |     - name: Random select one db host
10 |       add_host:
11 |         groups: random_one_db_host
12 |         name: "{{ groups['db'] | random }}"
13 |       when:
14 |         - db_group_num | int >= 1
15 | 
16 |     - name: Random select two db hosts
17 |       add_host:
18 |         groups: random_two_db_hosts
19 |         name: "{{ item }}"
20 |       with_items:
21 |         - "{{ shuffle_db_hosts[:2] }}"
22 |       when:
23 |         - db_group_num | int >= 2
24 | 
25 |     - name: Random select three db hosts
26 |       add_host:
27 |         groups: random_three_db_hosts
28 |         name: "{{ item }}"
29 |       with_items:
30 |         - "{{ shuffle_db_hosts[:3] }}"
31 |       when:
32 |         - db_group_num | int >= 3
33 |   tags:
34 |     - db-pre
35 | 
36 | 
37 | # Service down
38 | - include: mysql/service.yml
39 |   tags:
40 |     - service
41 |     - db
42 |     - db-service
43 |     - control
44 | 
45 | 
46 | # We mainly test the impact to the MySQL cluster when
47 | # there are network problems
48 | - include: mysql/system.yml
49 |   tags:
50 |     - system
51 |     - db
52 |     - db-system
53 |     - control
54 | 
55 | # Stress MySQL Cluster
56 | - include: mysql/stress.yml
57 |   tags:
58 |     - service
59 |     - db
60 |     - db-service
61 |     - control
62 |     - db-stress
63 | 


--------------------------------------------------------------------------------
/playbooks/control/drill_hy.yml:
--------------------------------------------------------------------------------
 1 | - name: Preparing for control hyper node tests
 2 |   hosts: localhost
 3 |   connection: local
 4 |   gather_facts: false
 5 |   vars:
 6 |     shuffle_control_hosts: "{{ groups['control'] | shuffle }}"
 7 |     control_group_num: "{{ groups['control'] | length }}"
 8 |   tasks:
 9 |     - name: Random select one control host
10 |       add_host:
11 |         groups: random_one_control_host
12 |         name: "{{ groups['control'] | random }}"
13 |       when:
14 |         - control_group_num | int >= 1
15 |   tags:
16 |     - control-pre
17 | 
18 | 
19 | ## System Level
20 | - include: ../system/base.yml
21 |   vars:
22 |     random_hosts: random_one_control_host
23 |     node_group: control
24 |   tags:
25 |     - system
26 |     - control
27 |     - control-system
28 | 


--------------------------------------------------------------------------------
/playbooks/control/drill_lb.yml:
--------------------------------------------------------------------------------
 1 | - name: Preparing for lb node tests
 2 |   hosts: localhost
 3 |   connection: local
 4 |   gather_facts: false
 5 |   vars:
 6 |     shuffle_lb_hosts: "{{ groups['lb'] | shuffle }}"
 7 |     lb_group_num: "{{ groups['lb'] | length }}"
 8 |   tasks:
 9 |     - name: Random select one lb host
10 |       add_host:
11 |         groups: random_one_lb_host
12 |         name: "{{ groups['lb'] | random }}"
13 |       when:
14 |         - lb_group_num | int >= 1
15 | 
16 |     - name: Random select two lb hosts
17 |       add_host:
18 |         groups: random_two_lb_hosts
19 |         name: "{{ item }}"
20 |       with_items:
21 |         - "{{ shuffle_lb_hosts[:2] }}"
22 |       when:
23 |         - lb_group_num | int >= 2
24 | 
25 |     - name: Random select three lb hosts
26 |       add_host:
27 |         groups: random_three_lb_hosts
28 |         name: "{{ item }}"
29 |       with_items:
30 |         - "{{ shuffle_lb_hosts[:3] }}"
31 |       when:
32 |         - lb_group_num | int >= 3
33 |   tags:
34 |     - lb-pre
35 | 
36 | - include: haproxy/service.yml
37 |   tags:
38 |     - service
39 |     - lb
40 |     - lb-service
41 |     - control
42 | 


--------------------------------------------------------------------------------
/playbooks/control/drill_mc.yml:
--------------------------------------------------------------------------------
 1 | - name: Preparing for mc node tests
 2 |   hosts: localhost
 3 |   connection: local
 4 |   gather_facts: false
 5 |   vars:
 6 |     shuffle_mc_hosts: "{{ groups['mc'] | shuffle }}"
 7 |     mc_group_num: "{{ groups['mc'] | length }}"
 8 |   tasks:
 9 |     - name: Random select one mc host
10 |       add_host:
11 |         groups: random_one_mc_host
12 |         name: "{{ groups['mc'] | random }}"
13 |       when:
14 |         - mc_group_num | int >= 1
15 | 
16 |     - name: Random select two mc hosts
17 |       add_host:
18 |         groups: random_two_mc_hosts
19 |         name: "{{ item }}"
20 |       with_items:
21 |         - "{{ shuffle_mc_hosts[:2] }}"
22 |       when:
23 |         - mc_group_num | int >= 2
24 | 
25 |     - name: Random select three mc hosts
26 |       add_host:
27 |         groups: random_three_mc_hosts
28 |         name: "{{ item }}"
29 |       with_items:
30 |         - "{{ shuffle_mc_hosts[:3] }}"
31 |       when:
32 |         - mc_group_num | int >= 3
33 |   tags:
34 |     - mc-pre
35 | 
36 | - include: memcache/service.yml
37 |   tags:
38 |     - service
39 |     - mc
40 |     - mc-service
41 |     - control
42 | 


--------------------------------------------------------------------------------
/playbooks/control/drill_mq.yml:
--------------------------------------------------------------------------------
 1 | - name: Preparing for mq node tests
 2 |   hosts: localhost
 3 |   connection: local
 4 |   gather_facts: false
 5 |   vars:
 6 |     shuffle_mq_hosts: "{{ groups['mq'] | shuffle }}"
 7 |     mq_group_num: "{{ groups['mq'] | length }}"
 8 |   tasks:
 9 |     - name: Random select one mq host
10 |       add_host:
11 |         groups: random_one_mq_host
12 |         name: "{{ groups['mq'] | random }}"
13 |       when:
14 |         - mq_group_num | int >= 1
15 | 
16 |     - name: Random select two mq hosts
17 |       add_host:
18 |         groups: random_two_mq_hosts
19 |         name: "{{ item }}"
20 |       with_items:
21 |         - "{{ shuffle_mq_hosts[:2] }}"
22 |       when:
23 |         - mq_group_num | int >= 2
24 | 
25 |     - name: Random select three mq hosts
26 |       add_host:
27 |         groups: random_three_mq_hosts
28 |         name: "{{ item }}"
29 |       with_items:
30 |         - "{{ shuffle_mq_hosts[:3] }}"
31 |       when:
32 |         - mq_group_num | int >= 3
33 |   tags:
34 |     - mq-pre
35 | 
36 | - include: rabbitmq/service.yml
37 |   tags:
38 |     - service
39 |     - mq
40 |     - mq-service
41 |     - control
42 | 
43 | # We mainly test the impact to the RabbitMQ cluster when
44 | # there are network problems
45 | - include: rabbitmq/system.yml
46 |   tags:
47 |     - system
48 |     - mq
49 |     - mq-system
50 |     - control
51 | 
52 | # Stress RabbitMQ Cluster
53 | - include: rabbitmq/stress.yml
54 |   tags:
55 |     - service
56 |     - mq
57 |     - mq-service
58 |     - control
59 |     - mq-stress
60 | 


--------------------------------------------------------------------------------
/playbooks/control/glance/service.yml:
--------------------------------------------------------------------------------
 1 | # Stop and start glance releated services on control node
 2 | 
 3 | # glance-api
 4 | - include: ../../common/service.yml
 5 |   vars:
 6 |     random_hosts: random_one_api_host
 7 |     service_name: openstack-glance-api
 8 | 
 9 | - include: ../../common/service.yml
10 |   vars:
11 |     random_hosts: random_two_api_hosts
12 |     service_name: openstack-glance-api
13 | 
14 | - include: ../../common/service.yml
15 |   vars:
16 |     random_hosts: random_three_api_hosts
17 |     service_name: openstack-glance-api
18 | 
19 | # glance-registry
20 | - include: ../../common/service.yml
21 |   vars:
22 |     random_hosts: random_one_api_host
23 |     service_name: openstack-glance-registry
24 | 
25 | - include: ../../common/service.yml
26 |   vars:
27 |     random_hosts: random_two_api_hosts
28 |     service_name: openstack-glance-registry
29 | 
30 | - include: ../../common/service.yml
31 |   vars:
32 |     random_hosts: random_three_api_hosts
33 |     service_name: openstack-glance-registry
34 | 


--------------------------------------------------------------------------------
/playbooks/control/haproxy/service.yml:
--------------------------------------------------------------------------------
 1 | # Stop and start haproxy releated services on control node
 2 | 
 3 | - include: ../../common/service.yml
 4 |   vars:
 5 |     random_hosts: random_one_lb_host
 6 |     service_name: haproxy
 7 | 
 8 | - include: ../../common/service.yml
 9 |   vars:
10 |     random_hosts: random_two_lb_hosts
11 |     service_name: haproxy
12 | 
13 | - include: ../../common/service.yml
14 |   vars:
15 |     random_hosts: random_three_lb_hosts
16 |     service_name: haproxy
17 | 


--------------------------------------------------------------------------------
/playbooks/control/keystone/service.yml:
--------------------------------------------------------------------------------
 1 | # Stop and start keystone releated services on control node
 2 | 
 3 | - include: ../../common/service.yml
 4 |   vars:
 5 |     random_hosts: random_one_api_host
 6 |     service_name: httpd
 7 | 
 8 | - include: ../../common/service.yml
 9 |   vars:
10 |     random_hosts: random_two_api_hosts
11 |     service_name: httpd
12 | 
13 | - include: ../../common/service.yml
14 |   vars:
15 |     random_hosts: random_three_api_hosts
16 |     service_name: httpd
17 | 


--------------------------------------------------------------------------------
/playbooks/control/memcache/service.yml:
--------------------------------------------------------------------------------
 1 | # Stop and start memcache releated services on control node
 2 | 
 3 | - include: ../../common/service.yml
 4 |   vars:
 5 |     random_hosts: random_one_mc_host
 6 |     service_name: memcached
 7 | 
 8 | - include: ../../common/service.yml
 9 |   vars:
10 |     random_hosts: random_two_mc_hosts
11 |     service_name: memcached
12 | 
13 | - include: ../../common/service.yml
14 |   vars:
15 |     random_hosts: random_three_mc_hosts
16 |     service_name: memcached
17 | 


--------------------------------------------------------------------------------
/playbooks/control/mysql/service.yml:
--------------------------------------------------------------------------------
 1 | # Stop and start mariadb releated services on control node
 2 | 
 3 | - include: ../../common/service.yml
 4 |   vars:
 5 |     random_hosts: random_one_db_host
 6 |     service_name: mysql
 7 | 
 8 | - include: ../../common/service.yml
 9 |   vars:
10 |     random_hosts: random_two_db_hosts
11 |     service_name: mysql
12 | 
13 | - include: ../../common/service.yml
14 |   vars:
15 |     random_hosts: random_three_db_hosts
16 |     service_name: mysql
17 | 


--------------------------------------------------------------------------------
/playbooks/control/mysql/stress.yml:
--------------------------------------------------------------------------------
 1 | - include: ../../common/next.yml
 2 |   vars:
 3 |     case_name: "Stress MySQL Cluster using sysbench"
 4 | 
 5 | - name: "Drill the case"
 6 |   hosts: random_one_db_host
 7 |   gather_facts: false
 8 |   vars:
 9 |     execute: "{{ hostvars['localhost']['execute'] }}"
10 |   tasks:
11 |     - include_role:
12 |         name: control
13 |         tasks_from: stress_db
14 |       when:
15 |         - execute == "y" or execute == true
16 | 


--------------------------------------------------------------------------------
/playbooks/control/mysql/system.yml:
--------------------------------------------------------------------------------
 1 | # For MySQL Cluster, we mainly test network partition
 2 | - include: ../../system/nic.yml
 3 |   vars:
 4 |     random_hosts: random_one_db_host
 5 |     node_group: db
 6 |     nic_type: mgmt
 7 |     nic: "{{ mgmt_nic_name }}"
 8 |   tags:
 9 |     - db-mgmt-nic
10 | 


--------------------------------------------------------------------------------
/playbooks/control/neutron/service.yml:
--------------------------------------------------------------------------------
 1 | # Stop and start neutron releated services on control node
 2 | 
 3 | - include: ../../common/service.yml
 4 |   vars:
 5 |     random_hosts: random_one_api_host
 6 |     service_name: neutron-server
 7 | 
 8 | - include: ../../common/service.yml
 9 |   vars:
10 |     random_hosts: random_two_api_hosts
11 |     service_name: neutron-server
12 | 
13 | - include: ../../common/service.yml
14 |   vars:
15 |     random_hosts: random_three_api_hosts
16 |     service_name: neutron-server
17 | 


--------------------------------------------------------------------------------
/playbooks/control/nova/service.yml:
--------------------------------------------------------------------------------
 1 | # Stop and start nova releated services on control node
 2 | 
 3 | # nova-api
 4 | - include: ../../common/service.yml
 5 |   vars:
 6 |     random_hosts: random_one_api_host
 7 |     service_name: openstack-nova-api
 8 | 
 9 | - include: ../../common/service.yml
10 |   vars:
11 |     random_hosts: random_two_api_hosts
12 |     service_name: openstack-nova-api
13 | 
14 | - include: ../../common/service.yml
15 |   vars:
16 |     random_hosts: random_three_api_hosts
17 |     service_name: openstack-nova-api
18 | 
19 | # nova-scheduler
20 | - include: ../../common/service.yml
21 |   vars:
22 |     random_hosts: random_one_api_host
23 |     service_name: openstack-nova-scheduler
24 | 
25 | - include: ../../common/service.yml
26 |   vars:
27 |     random_hosts: random_two_api_hosts
28 |     service_name: openstack-nova-scheduler
29 | 
30 | - include: ../../common/service.yml
31 |   vars:
32 |     random_hosts: random_three_api_hosts
33 |     service_name: openstack-nova-scheduler
34 | 
35 | # nova-conductor
36 | - include: ../../common/service.yml
37 |   vars:
38 |     random_hosts: random_one_api_host
39 |     service_name: openstack-nova-conductor
40 | 
41 | - include: ../../common/service.yml
42 |   vars:
43 |     random_hosts: random_two_api_hosts
44 |     service_name: openstack-nova-conductor
45 | 
46 | - include: ../../common/service.yml
47 |   vars:
48 |     random_hosts: random_three_api_hosts
49 |     service_name: openstack-nova-conductor
50 | 


--------------------------------------------------------------------------------
/playbooks/control/rabbitmq/service.yml:
--------------------------------------------------------------------------------
 1 | # Stop and start rabbitmq releated services on control node
 2 | 
 3 | - include: ../../common/service.yml
 4 |   vars:
 5 |     random_hosts: random_one_mq_host
 6 |     service_name: rabbitmq-server
 7 | 
 8 | - include: ../../common/service.yml
 9 |   vars:
10 |     random_hosts: random_two_mq_hosts
11 |     service_name: rabbitmq-server
12 | 
13 | - include: ../../common/service.yml
14 |   vars:
15 |     random_hosts: random_three_mq_hosts
16 |     service_name: rabbitmq-server
17 | 


--------------------------------------------------------------------------------
/playbooks/control/rabbitmq/stress.yml:
--------------------------------------------------------------------------------
 1 | # This playbook will do specified service drill test cases, and if
 2 | # in manual mode, it will prompt to ask if you want to recover it.
 3 | 
 4 | - include: ../../common/next.yml
 5 |   vars:
 6 |     case_name: "CASE: Start to stress RabbitMQ"
 7 | 
 8 | - name: "Drill the case"
 9 |   hosts: localhost 
10 |   gather_facts: true
11 |   vars:
12 |     execute: "{{ hostvars['localhost']['execute'] }}"
13 |   tasks:
14 |     - include_role:
15 |         name: control
16 |         tasks_from: stress_mq
17 |       when:
18 |         - execute == "y" or execute == true
19 | 
20 | - include: ../../common/ask.yml
21 | 
22 | - name: "Recover the fault"
23 |   hosts: random_one_mq_host
24 |   gather_facts: false
25 |   vars:
26 |     execute: "{{ hostvars['localhost']['execute'] }}"
27 |     recovery: "{{ hostvars['localhost']['recovery'] }}"
28 |   tasks:
29 |     - include_role:
30 |         name: control
31 |         tasks_from: purge_queue
32 |       when:
33 |         - mode == "auto" or recovery == "y" or recovery == true
34 |         - execute == "y" or execute == true
35 | 


--------------------------------------------------------------------------------
/playbooks/control/rabbitmq/system.yml:
--------------------------------------------------------------------------------
 1 | # For RabbitMQ Cluster, we mainly test network partition
 2 | - include: ../../system/nic.yml
 3 |   vars:
 4 |     random_hosts: random_one_mq_host
 5 |     node_group: mq
 6 |     nic_type: mgmt
 7 |     nic: "{{ mgmt_nic_name }}"
 8 |   tags:
 9 |     - mq-mgmt-nic
10 | 


--------------------------------------------------------------------------------
/playbooks/drill.yml:
--------------------------------------------------------------------------------
1 | - include: drill_storage.yml
2 | - include: drill_control.yml
3 | - include: drill_compute.yml
4 | - include: drill_network.yml
5 | 


--------------------------------------------------------------------------------
/playbooks/drill_compute.yml:
--------------------------------------------------------------------------------
 1 | - name: Preparing for compute node tests
 2 |   hosts: localhost
 3 |   connection: local
 4 |   gather_facts: false
 5 |   vars:
 6 |     compute_group_num: "{{ groups['compute'] | length }}"
 7 |     shuffle_compute_hosts: "{{ groups['compute'] | shuffle }}"
 8 |   tasks:
 9 |     - name: Random select one compute host
10 |       add_host:
11 |         groups: random_one_compute_host
12 |         name: "{{ groups['compute'] | random }}"
13 |       when:
14 |         - compute_group_num | int >= 1
15 | 
16 |     - name: Random select two compute hosts
17 |       add_host:
18 |         groups: random_two_compute_hosts
19 |         name: "{{ item }}"
20 |       with_items:
21 |         - "{{ shuffle_compute_hosts[:2] }}"
22 |       when:
23 |         - compute_group_num | int >= 2
24 | 
25 |     - name: Random select three compute hosts
26 |       add_host:
27 |         groups: random_three_compute_hosts
28 |         name: "{{ item }}"
29 |       with_items:
30 |         - "{{ shuffle_compute_hosts[:3] }}"
31 |       when:
32 |         - compute_group_num | int >= 3
33 |   tags:
34 |     - compute-pre
35 | 
36 | - include: compute/system.yml
37 |   tags:
38 |     - system
39 |     - compute
40 |     - compute-system
41 | 
42 | - include: compute/service.yml
43 |   tags:
44 |     - service
45 |     - compute
46 |     - compute-service
47 | 


--------------------------------------------------------------------------------
/playbooks/drill_control.yml:
--------------------------------------------------------------------------------
1 | - include: control/drill_hy.yml
2 | - include: control/drill_lb.yml
3 | - include: control/drill_api.yml
4 | - include: control/drill_mc.yml
5 | - include: control/drill_mq.yml
6 | - include: control/drill_db.yml
7 | 


--------------------------------------------------------------------------------
/playbooks/drill_network.yml:
--------------------------------------------------------------------------------
 1 | - name: Preparing for network node tests
 2 |   hosts: localhost
 3 |   connection: local
 4 |   gather_facts: false
 5 |   vars:
 6 |     shuffle_network_hosts: "{{ groups['network'] | shuffle }}"
 7 |     network_group_num: "{{ groups['network'] | length }}"
 8 |   tasks:
 9 |     - name: Random select one network host
10 |       add_host:
11 |         groups: random_one_network_host
12 |         name: "{{ groups['network'] | random }}"
13 |       when:
14 |         - network_group_num | int >= 1
15 | 
16 |     - name: Random select two network hosts
17 |       add_host:
18 |         groups: random_two_network_hosts
19 |         name: "{{ item }}"
20 |       with_items:
21 |         - "{{ shuffle_network_hosts[:2] }}"
22 |       when:
23 |         - network_group_num | int >= 2
24 | 
25 |     - name: Random select three network hosts
26 |       add_host:
27 |         groups: random_three_network_hosts
28 |         name: "{{ item }}"
29 |       with_items:
30 |         - "{{ shuffle_network_hosts[:3] }}"
31 |       when:
32 |         - network_group_num | int >= 3
33 |   tags:
34 |     - network-pre
35 | 
36 | - include: network/system.yml
37 |   tags:
38 |     - system
39 |     - network
40 |     - network-system
41 | 
42 | - include: network/service.yml
43 |   tags:
44 |     - service
45 |     - network
46 |     - network-system
47 | 


--------------------------------------------------------------------------------
/playbooks/drill_storage.yml:
--------------------------------------------------------------------------------
1 | - include: storage/drill_mon.yml
2 | - include: storage/drill_rgw.yml
3 | - include: storage/drill_osd.yml
4 | 


--------------------------------------------------------------------------------
/playbooks/network/service.yml:
--------------------------------------------------------------------------------
 1 | ## Service Level
 2 | # Stop and start neutron releated services on network node
 3 | 
 4 | # neutron-dhcp-agent
 5 | - include: ../common/service.yml
 6 |   vars:
 7 |     random_hosts: random_one_network_host
 8 |     service_name: neutron-dhcp-agent
 9 | 
10 | - include: ../common/service.yml
11 |   vars:
12 |     random_hosts: random_two_network_hosts
13 |     service_name: neutron-dhcp-agent
14 | 
15 | - include: ../common/service.yml
16 |   vars:
17 |     random_hosts: random_three_network_hosts
18 |     service_name: neutron-dhcp-agent
19 | 
20 | # neutron-lbaasv2-agent
21 | - include: ../common/service.yml
22 |   vars:
23 |     random_hosts: random_one_network_host
24 |     service_name: neutron-lbaasv2-agent
25 | 
26 | - include: ../common/service.yml
27 |   vars:
28 |     random_hosts: random_two_network_hosts
29 |     service_name: neutron-lbaasv2-agent
30 | 
31 | - include: ../common/service.yml
32 |   vars:
33 |     random_hosts: random_three_network_hosts
34 |     service_name: neutron-lbaasv2-agent
35 | 
36 | # neutron-metadata-agent
37 | - include: ../common/service.yml
38 |   vars:
39 |     random_hosts: random_one_network_host
40 |     service_name: neutron-metadata-agent
41 | 
42 | - include: ../common/service.yml
43 |   vars:
44 |     random_hosts: random_two_network_hosts
45 |     service_name: neutron-metadata-agent
46 | 
47 | - include: ../common/service.yml
48 |   vars:
49 |     random_hosts: random_three_network_hosts
50 |     service_name: neutron-metadata-agent
51 | 
52 | # neutron-openvswitch-agent
53 | - include: ../common/service.yml
54 |   vars:
55 |     random_hosts: random_one_network_host
56 |     service_name: neutron-openvswitch-agent
57 | 
58 | - include: ../common/service.yml
59 |   vars:
60 |     random_hosts: random_two_network_hosts
61 |     service_name: neutron-openvswitch-agent
62 | 
63 | - include: ../common/service.yml
64 |   vars:
65 |     random_hosts: random_three_network_hosts
66 |     service_name: neutron-openvswitch-agent
67 | 
68 | # neutron-vpn-agent
69 | - include: ../common/service.yml
70 |   vars:
71 |     random_hosts: random_one_network_host
72 |     service_name: neutron-vpn-agent
73 | 
74 | - include: ../common/service.yml
75 |   vars:
76 |     random_hosts: random_two_network_hosts
77 |     service_name: neutron-vpn-agent
78 | 
79 | - include: ../common/service.yml
80 |   vars:
81 |     random_hosts: random_three_network_hosts
82 |     service_name: neutron-vpn-agent
83 | 
84 | # openvswitch-nonetwork
85 | - include: ../common/service.yml
86 |   vars:
87 |     random_hosts: random_one_network_host
88 |     service_name: openvswitch-nonetwork # ovs-xxx
89 | 


--------------------------------------------------------------------------------
/playbooks/network/system.yml:
--------------------------------------------------------------------------------
 1 | ## System Level
 2 | - include: ../system/base.yml
 3 |   vars:
 4 |     random_hosts: random_one_network_host
 5 |     node_group: network
 6 | 
 7 | - include: ../system/nic.yml
 8 |   vars:
 9 |     random_hosts: random_one_network_host
10 |     node_group: network
11 |     nic_type: tenant
12 |     nic: "{{ tenant_nic_name }}"
13 |   tags:
14 |     - network-tenant-nic
15 | 
16 | - include: ../system/nic.yml
17 |   vars:
18 |     random_hosts: random_one_network_host
19 |     node_group: network
20 |     nic_type: provider
21 |     nic: "{{ provider_nic_name }}"
22 |   tags:
23 |     - network-provider-nic
24 | 
25 | - include: ../system/nic.yml
26 |   vars:
27 |     random_hosts: random_one_network_host
28 |     node_group: network
29 |     nic_type: external
30 |     nic: "{{ external_nic_name }}"
31 |   tags:
32 |     - network-external-nic
33 | 
34 | - include: ../common/port.yml
35 |   vars:
36 |     random_hosts: random_one_network_host
37 |     ovs_port: "{{ physical_network_bridge_port }}"
38 |     ovs_bridge: "{{ physical_network_bridge }}"
39 |   tags:
40 |     - network-physical-network-port
41 | 
42 | - include: ../common/port.yml
43 |   vars:
44 |     random_hosts: random_one_network_host
45 |     ovs_port: "{{ external_network_bridge_port }}"
46 |     ovs_bridge: "{{ external_network_bridge }}"
47 |   tags:
48 |     - network-external-network-port
49 | 


--------------------------------------------------------------------------------
/playbooks/storage/drill_mon.yml:
--------------------------------------------------------------------------------
 1 | - name: Preparing for mon node tests
 2 |   hosts: localhost
 3 |   connection: local
 4 |   gather_facts: false
 5 |   vars:
 6 |     shuffle_mon_hosts: "{{ groups['mon'] | shuffle }}"
 7 |     mon_group_num: "{{ groups['mon'] | length }}"
 8 |   tasks:
 9 |     - name: Random select one mon host
10 |       add_host:
11 |         groups: random_one_mon_host
12 |         name: "{{ groups['mon'] | random }}"
13 |       when:
14 |         - mon_group_num | int >= 1
15 | 
16 |     - name: Random select two mon hosts
17 |       add_host:
18 |         groups: random_two_mon_hosts
19 |         name: "{{ item }}"
20 |       with_items:
21 |         - "{{ shuffle_mon_hosts[:2] }}"
22 |       when:
23 |         - mon_group_num | int >= 2
24 | 
25 |     - name: Random select three mon hosts
26 |       add_host:
27 |         groups: random_three_mon_hosts
28 |         name: "{{ item }}"
29 |       with_items:
30 |         - "{{ shuffle_mon_hosts[:3] }}"
31 |       when:
32 |         - mon_group_num | int >= 3
33 |   tags:
34 |     - mon-pre
35 | 
36 | 
37 | ## System Level
38 | - include: ../system/base.yml
39 |   vars:
40 |     random_hosts: random_one_mon_host
41 |     node_group: mon
42 |   tags:
43 |     - system
44 |     - mon
45 |     - mon-system
46 | 
47 | ## Service Level
48 | - include: mon/kill_mon.yml
49 |   vars:
50 |     random_hosts: random_one_mon_host
51 |   tags:
52 |     - service
53 |     - mon
54 |     - mon-service
55 |     - mon-down
56 | 
57 | - include: mon/damage_mon.yml
58 |   vars:
59 |     random_hosts: random_one_mon_host
60 |   tags:
61 |     - service
62 |     - mon
63 |     - mon-service
64 |     - mon-down
65 | 
66 | - include: mon/kill_mon.yml
67 |   vars:
68 |     random_hosts: random_two_mon_hosts
69 |   tags:
70 |     - service
71 |     - mon
72 |     - mon-service
73 |     - mon-down
74 | 
75 | - include: mon/kill_mon.yml
76 |   vars:
77 |     random_hosts: random_three_mon_hosts
78 |   tags:
79 |     - service
80 |     - mon
81 |     - mon-service
82 |     - mon-down
83 | 


--------------------------------------------------------------------------------
/playbooks/storage/drill_osd.yml:
--------------------------------------------------------------------------------
  1 | - name: Preparing for osd node tests
  2 |   hosts: localhost
  3 |   connection: local
  4 |   gather_facts: false
  5 |   vars:
  6 |     osd_group_num: "{{ groups['osd'] | length }}"
  7 |     get_osd_script_path: "../../files/get-random-osd.py"
  8 |     ceph_client_host: "{{ groups['storage'][0] }}"
  9 |     osd_down_precentage: 30
 10 |   tasks:
 11 |     - name: Random select one osd host
 12 |       add_host:
 13 |         groups: random_one_osd_host
 14 |         name: "{{ groups['osd'] | random }}"
 15 |       when:
 16 |         - osd_group_num | int >= 1
 17 | 
 18 |     - name: Copy get-random-osd.py to storage node
 19 |       copy: 
 20 |         src: "{{ get_osd_script_path }}"
 21 |         dest: /tmp/get-random-osd.py
 22 |         mode: 0644
 23 |       delegate_to: "{{ ceph_client_host }}"
 24 | 
 25 |     - shell: "python get-random-osd.py --get-pg-osd --mgmt-osd 
 26 |              --format json --file /tmp/get-pg-osd"
 27 |       args:
 28 |         chdir: /tmp
 29 |       delegate_to: "{{ ceph_client_host }}"
 30 |     
 31 |     - fetch:
 32 |         src: "/tmp/get-pg-osd.json"
 33 |         dest: "/tmp/get-pg-osd.json"
 34 |         flat: yes
 35 |         fail_on_missing: yes
 36 |       delegate_to: "{{ ceph_client_host }}"
 37 | 
 38 |     - include_vars:
 39 |         name: _get_pg_osd
 40 |         file: "/tmp/get-pg-osd.json"
 41 | 
 42 |     - file:
 43 |         path: "/tmp/get-pg-osd.json"
 44 |         state: absent
 45 | 
 46 |     - name: Select one osd in random PG     
 47 |       add_host:
 48 |         groups: one_osd_pg_hosts
 49 |         name: "{{ item.key }}"
 50 |         one_osd_pg_list: "{{ item.value }}"
 51 |       with_dict: "{{ _get_pg_osd['items'][0] }}"
 52 |       when: _get_pg_osd['size'] >= 1
 53 | 
 54 |     - name: Select two osds in random PG 
 55 |       add_host:
 56 |         groups: two_osds_pg_hosts
 57 |         name: "{{ item.key }}"
 58 |         two_osds_pg_list: "{{ item.value }}"
 59 |       with_dict: "{{ _get_pg_osd['items'][1] }}"
 60 |       when: _get_pg_osd['size'] >= 2
 61 | 
 62 |     - name: Select three osds in random PG
 63 |       add_host:
 64 |         groups: three_osds_pg_hosts
 65 |         name: "{{ item.key }}"
 66 |         three_osds_pg_list: "{{ item.value }}"
 67 |       with_dict: "{{ _get_pg_osd['items'][2] }}"
 68 |       when: _get_pg_osd['size'] >= 3
 69 | 
 70 |     - shell: "python get-random-osd.py --get-random-osd 
 71 |              --percentage {{ osd_down_precentage }} --mgmt-osd 
 72 |              --format json --file /tmp/get-random-osd"
 73 |       args:
 74 |         chdir: /tmp
 75 |       delegate_to: "{{ ceph_client_host }}"
 76 |     
 77 |     - fetch:
 78 |         src: "/tmp/get-random-osd.json"
 79 |         dest: "/tmp/get-random-osd.json"
 80 |         flat: yes
 81 |         fail_on_missing: yes
 82 |       delegate_to: "{{ ceph_client_host }}"
 83 | 
 84 |     - include_vars:
 85 |         name: _get_random_osd
 86 |         file: "/tmp/get-random-osd.json"
 87 | 
 88 |     - file:
 89 |         path: "/tmp/get-random-osd.json"
 90 |         state: absent
 91 | 
 92 |     - name: Select some random osds
 93 |       add_host:
 94 |         groups: random_osd_hosts
 95 |         name: "{{ item.key }}"
 96 |         random_osd_list: "{{ item.value }}"
 97 |       with_dict: "{{ _get_random_osd['items'] }}"
 98 | 
 99 |     - name: Absent get-random-osd.py from storage node
100 |       file:
101 |         path: "/tmp/get-random-osd.py"
102 |         state: absent
103 |       delegate_to: "{{ ceph_client_host }}"
104 |   tags:
105 |     - osd-pre
106 | 
107 | 
108 | ## System Level
109 | 
110 | - include: ../system/base.yml
111 |   vars:
112 |     random_hosts: random_one_osd_host
113 |     node_group: osd
114 |   tags:
115 |     - system
116 |     - osd
117 |     - osd-system
118 | 
119 | - include: ../system/nic.yml
120 |   vars:
121 |     random_hosts: random_one_osd_host
122 |     node_group: osd
123 |     nic_type: storage
124 |     nic: "{{ storage_nic_name }}"
125 |   tags:
126 |     - system
127 |     - osd
128 |     - osd-system
129 |     - osd-storage-nic
130 | 
131 | ## Service Level
132 | - include: osd/kill_osd.yml
133 |   vars:
134 |     random_hosts: one_osd_pg_hosts
135 |     osd_down_list: "{{ one_osd_pg_list }}"
136 |   tags:
137 |     - service
138 |     - osd
139 |     - osd-service
140 |     - osd-down
141 | 
142 | - include: osd/kill_osd.yml
143 |   vars:
144 |     random_hosts: two_osds_pg_hosts
145 |     osd_down_list: "{{ two_osds_pg_list }}"
146 |   tags:
147 |     - service
148 |     - osd
149 |     - osd-service
150 |     - osd-down
151 | 
152 | - include: osd/kill_osd.yml
153 |   vars:
154 |     random_hosts: three_osds_pg_hosts
155 |     osd_down_list: "{{ three_osds_pg_list }}"
156 |   tags:
157 |     - service
158 |     - osd
159 |     - osd-service
160 |     - osd-down
161 | 
162 | - include: osd/kill_osd.yml
163 |   vars:
164 |     random_hosts: random_osd_hosts
165 |     osd_down_list: "{{ random_osd_list }}"
166 |   tags:
167 |     - service
168 |     - osd
169 |     - osd-service
170 |     - osd-down
171 | 
172 | - include: osd/del_osd_partition.yml
173 |   vars:
174 |     random_hosts: one_osd_pg_hosts
175 |     osd_down_list: "{{ one_osd_pg_list }}"
176 |   tags:
177 |     - service
178 |     - osd
179 |     - osd-service
180 |     - osd-partition-del
181 | 


--------------------------------------------------------------------------------
/playbooks/storage/drill_rgw.yml:
--------------------------------------------------------------------------------
 1 | - name: Preparing for rgw node tests
 2 |   hosts: localhost
 3 |   connection: local
 4 |   gather_facts: false
 5 |   vars:
 6 |     rgw_group_num: "{{ groups['rgw'] | length }}"
 7 |     shuffle_rgw_hosts: "{{ groups['rgw'] | shuffle }}"
 8 |   tasks:
 9 |     - name: Random select one rgw host
10 |       add_host:
11 |         groups: random_one_rgw_host
12 |         name: "{{ groups['rgw'] | random }}"
13 |       when:
14 |         - rgw_group_num | int >= 1
15 | 
16 |     - name: Random select two rgw hosts
17 |       add_host:
18 |         groups: random_two_rgw_hosts
19 |         name: "{{ item }}"
20 |       with_items:
21 |         - "{{ shuffle_rgw_hosts[:2] }}"
22 |       when:
23 |         - rgw_group_num | int >= 2
24 | 
25 |     - name: Random select three rgw hosts
26 |       add_host:
27 |         groups: random_three_rgw_hosts
28 |         name: "{{ item }}"
29 |       with_items:
30 |         - "{{ shuffle_rgw_hosts[:3] }}"
31 |       when:
32 |         - rgw_group_num | int >= 3
33 |   tags:
34 |     - rgw-pre
35 | 
36 | 
37 | ## System Level
38 | - include: ../system/base.yml
39 |   vars:
40 |     random_hosts: random_one_rgw_host
41 |     node_group: rgw
42 |   tags:
43 |     - system
44 |     - rgw
45 |     - rgw-system
46 | 
47 | 
48 | ## Service Level
49 | - include: rgw/kill_rgw.yml
50 |   vars:
51 |     random_hosts: random_one_rgw_host
52 |   tags:
53 |     - service
54 |     - rgw
55 |     - rgw-service
56 |     - rgw-down
57 | 
58 | - include: rgw/kill_rgw.yml
59 |   vars:
60 |     random_hosts: random_two_rgw_hosts
61 |   tags:
62 |     - service
63 |     - rgw
64 |     - rgw-service
65 |     - rgw-down
66 | 
67 | - include: rgw/kill_rgw.yml
68 |   vars:
69 |     random_hosts: random_three_rgw_hosts
70 |   tags:
71 |     - service
72 |     - rgw
73 |     - rgw-service
74 |     - rgw-down
75 | 


--------------------------------------------------------------------------------
/playbooks/storage/mon/damage_mon.yml:
--------------------------------------------------------------------------------
 1 | # This playbook will do mon kill drill test case, and if in manual
 2 | # mode, it will prompt to ask if you want to recover it.
 3 | 
 4 | - include: ../../common/next.yml
 5 |   vars:
 6 |     case_name: "CASE: Ceph monitor down by error opening data directory on {{ random_hosts }}"
 7 | 
 8 | - name: "Drill the case"
 9 |   hosts: "{{ random_hosts }}"
10 |   gather_facts: false
11 |   vars:
12 |     execute: "{{ hostvars['localhost']['execute'] }}"
13 |   tasks:
14 |     - include_role:
15 |         name: storage
16 |         tasks_from: damage_mon
17 |       when:
18 |         - execute == "y" or execute == true
19 | 
20 | - include: ../../common/ask.yml
21 | 
22 | - name: "Recover the fault"
23 |   hosts: "{{ random_hosts }}"
24 |   gather_facts: true
25 |   vars:
26 |     execute: "{{ hostvars['localhost']['execute'] }}"
27 |     recovery: "{{ hostvars['localhost']['recovery'] }}"
28 |   tasks:
29 |     - include_role:
30 |         name: storage
31 |         tasks_from: recover_damage_mon
32 |       when:
33 |         - mode == "auto" or recovery == "y" or recovery == true
34 |         - execute == "y" or execute == true
35 | 


--------------------------------------------------------------------------------
/playbooks/storage/mon/kill_mon.yml:
--------------------------------------------------------------------------------
 1 | # This playbook will do mon kill drill test case, and if in manual
 2 | # mode, it will prompt to ask if you want to recover it.
 3 | 
 4 | - include: ../../common/next.yml
 5 |   vars:
 6 |     case_name: "CASE: Ceph monitor down on {{ random_hosts }}"
 7 | 
 8 | - name: "Drill the case"
 9 |   hosts: "{{ random_hosts }}"
10 |   gather_facts: false
11 |   vars:
12 |     execute: "{{ hostvars['localhost']['execute'] }}"
13 |   tasks:
14 |     - include_role:
15 |         name: storage
16 |         tasks_from: kill_mon
17 |       when:
18 |         - execute == "y" or execute == true
19 | 
20 | - include: ../../common/ask.yml
21 | 
22 | - name: "Recover the fault"
23 |   hosts: "{{ random_hosts }}"
24 |   gather_facts: true
25 |   vars:
26 |     execute: "{{ hostvars['localhost']['execute'] }}"
27 |     recovery: "{{ hostvars['localhost']['recovery'] }}"
28 |   tasks:
29 |     - include_role:
30 |         name: storage
31 |         tasks_from: start_mon
32 |       when:
33 |         - mode == "auto" or recovery == "y" or recovery == true
34 |         - execute == "y" or execute == true
35 | 


--------------------------------------------------------------------------------
/playbooks/storage/osd/del_osd_partition.yml:
--------------------------------------------------------------------------------
 1 | - include: ../../common/next.yml
 2 |   vars:
 3 |     case_name: "CASE: Storage - Ceph OSD partition-table delete on {{ random_hosts }}"
 4 | 
 5 | - name: "Drill the case"
 6 |   hosts: "{{ random_hosts }}"
 7 |   gather_facts: true
 8 |   vars:
 9 |     execute: "{{ hostvars['localhost']['execute'] }}"
10 |   tasks:
11 |     - block:
12 |       - include_role:
13 |           name: storage
14 |           tasks_from: kill_osd
15 |       - include_role:
16 |           name: storage
17 |           tasks_from: del_osd_partition  
18 |       when:
19 |         - execute == "y" or execute == true
20 | 
21 | - include: ../../common/ask.yml
22 | 
23 | - name: "Recover the fault"
24 |   hosts: "{{ random_hosts }}"
25 |   gather_facts: true
26 |   vars:
27 |     execute: "{{ hostvars['localhost']['execute'] }}"
28 |     recovery: "{{ hostvars['localhost']['recovery'] }}"
29 |   tasks:
30 |     - block:
31 |       - include_role:
32 |           name: storage
33 |           tasks_from: recover_osd_partition
34 |       - include_role:
35 |           name: storage
36 |           tasks_from: start_osd
37 |       when:
38 |         - mode == "auto" or recovery == "y" or recovery == true
39 |         - execute == "y" or execute == true
40 | 


--------------------------------------------------------------------------------
/playbooks/storage/osd/kill_osd.yml:
--------------------------------------------------------------------------------
 1 | # This playbook will do osd kill drill test case, and if in manual
 2 | # mode, it will prompt to ask if you want to recover it.
 3 | 
 4 | - include: ../../common/next.yml
 5 |   vars:
 6 |     case_name: "CASE: Storage - Ceph OSD down on {{ random_hosts }}"
 7 | 
 8 | - name: "Drill the case"
 9 |   hosts: "{{ random_hosts }}"
10 |   gather_facts: true
11 |   vars:
12 |     execute: "{{ hostvars['localhost']['execute'] }}"
13 |   tasks:
14 |     - include_role:
15 |         name: storage
16 |         tasks_from: kill_osd
17 |       when:
18 |         - execute == "y" or execute == true
19 | 
20 | - include: ../../common/ask.yml
21 | 
22 | - name: "Recover the fault"
23 |   hosts: "{{ random_hosts }}"
24 |   gather_facts: true
25 |   vars:
26 |     execute: "{{ hostvars['localhost']['execute'] }}"
27 |     recovery: "{{ hostvars['localhost']['recovery'] }}"
28 |   tasks:
29 |     - include_role:
30 |         name: storage
31 |         tasks_from: start_osd
32 |       when:
33 |         - mode == "auto" or recovery == "y" or recovery == true
34 |         - execute == "y" or execute == true
35 | 


--------------------------------------------------------------------------------
/playbooks/storage/rgw/kill_rgw.yml:
--------------------------------------------------------------------------------
 1 | # This playbook will do radosgw kill drill test case, and if
 2 | # in manual mode, it will prompt to ask if you want to recover
 3 | # it.
 4 | 
 5 | - include: ../../common/next.yml
 6 |   vars:
 7 |     case_name: "CASE: Ceph radosgw down on {{ random_hosts }}"
 8 | 
 9 | - name: "Drill the case"
10 |   hosts: "{{ random_hosts }}"
11 |   gather_facts: false
12 |   vars:
13 |     execute: "{{ hostvars['localhost']['execute'] }}"
14 |   tasks:
15 |     - include_role:
16 |         name: storage
17 |         tasks_from: kill_rgw
18 |       when:
19 |         - execute == "y" or execute == true
20 | 
21 | - include: ../../common/ask.yml
22 | 
23 | - name: "Recover the fault"
24 |   hosts: "{{ random_hosts }}"
25 |   gather_facts: true
26 |   vars:
27 |     execute: "{{ hostvars['localhost']['execute'] }}"
28 |     recovery: "{{ hostvars['localhost']['recovery'] }}"
29 |   tasks:
30 |     - include_role:
31 |         name: storage
32 |         tasks_from: start_rgw
33 |       when:
34 |         - mode == "auto" or recovery == "y" or recovery == true
35 |         - execute == "y" or execute == true
36 | 


--------------------------------------------------------------------------------
/playbooks/system/base.yml:
--------------------------------------------------------------------------------
  1 | # CPU
  2 | - include: cpu_load.yml
  3 |   vars:
  4 |     cpu_stress_load: "80%"
  5 |     cpu_stress_timeout: "5m"
  6 |   tags:
  7 |     - "{{ node_group }}-cpu"
  8 | 
  9 | - include: cpu_load.yml
 10 |   vars:
 11 |     cpu_stress_load: "90%"
 12 |     cpu_stress_timeout: "5m"
 13 |   tags:
 14 |     - "{{ node_group }}-cpu"
 15 | 
 16 | - include: cpu_load.yml
 17 |   vars:
 18 |     cpu_stress_load: "100%"
 19 |     cpu_stress_timeout: "5m"
 20 |   tags:
 21 |     - "{{ node_group }}-cpu"
 22 | 
 23 | # Memory
 24 | - include: mem_load.yml
 25 |   vars:
 26 |     mem_stress_load: "80%"
 27 |     mem_stress_timeout: "5m"
 28 |   tags:
 29 |     - "{{ node_group }}-mem"
 30 | 
 31 | - include: mem_load.yml
 32 |   vars:
 33 |     mem_stress_load: "90%"
 34 |     mem_stress_timeout: "5m"
 35 |   tags:
 36 |     - "{{ node_group }}-mem"
 37 | 
 38 | - include: mem_load.yml
 39 |   vars:
 40 |     mem_stress_load: "100%"
 41 |     mem_stress_timeout: "5m"
 42 |   tags:
 43 |     - "{{ node_group }}-mem"
 44 | 
 45 | # Disk(root disk)
 46 | - include: disk_load.yml
 47 |   vars:
 48 |     disk_stress_load: "80%"
 49 |     disk_stress_timeout: "5m"
 50 |   tags:
 51 |     - "{{ node_group }}-root-disk"
 52 | 
 53 | - include: disk_load.yml
 54 |   vars:
 55 |     disk_stress_load: "90%"
 56 |     disk_stress_timeout: "5m"
 57 |   tags:
 58 |     - "{{ node_group }}-root-disk"
 59 | 
 60 | # Nic delay(Management Network)
 61 | - include: nic_delay.yml
 62 |   vars:
 63 |     nic_delay_time: "200ms"
 64 |     nic_delay_timeout: "5m"
 65 |     nic: "{{ mgmt_nic_name }}"
 66 |   tags:
 67 |     - "{{ node_group }}-mgmt-nic-delay"
 68 | 
 69 | - include: nic_delay.yml
 70 |   vars:
 71 |     nic_delay_time: "300ms"
 72 |     nic_delay_timeout: "5m"
 73 |     nic: "{{ mgmt_nic_name }}"
 74 |   tags:
 75 |     - "{{ node_group }}-mgmt-nic-delay"
 76 | 
 77 | # Nic package loss(Management Network)
 78 | - include: nic_loss.yml
 79 |   vars:
 80 |     nic_loss_percent: "80%"
 81 |     nic_loss_timeout: "5m"
 82 |     nic: "{{ mgmt_nic_name }}"
 83 |   tags:
 84 |     - "{{ node_group }}-mgmt-nic-loss"
 85 | 
 86 | - include: nic_loss.yml
 87 |   vars:
 88 |     nic_loss_percent: "90%"
 89 |     nic_loss_timeout: "5m"
 90 |     nic: "{{ mgmt_nic_name }}"
 91 |   tags:
 92 |     - "{{ node_group }}-mgmt-nic-loss"
 93 | 
 94 | # Nic down(Management Network)
 95 | - include: nic_down.yml
 96 |   vars:
 97 |     nic_down_timeout: "5m"
 98 |     nic: "{{ mgmt_nic_name }}"
 99 |   tags:
100 |     - "{{ node_group }}-mgmt-nic-down"
101 | 


--------------------------------------------------------------------------------
/playbooks/system/cpu_load.yml:
--------------------------------------------------------------------------------
 1 | # This playbook will do cpu stress load on specified node
 2 | 
 3 | - include: ../common/next.yml
 4 |   vars:
 5 |     case_name: "CASE: Stress {{ node_group }} host CPU load to {{ cpu_stress_load }} for {{ cpu_stress_timeout }} on {{ random_hosts }}"
 6 | 
 7 | - name: "Drill the case"
 8 |   hosts: "{{ random_hosts }}"
 9 |   gather_facts: true
10 |   vars:
11 |     execute: "{{ hostvars['localhost']['execute'] }}"
12 |   tasks:
13 |     - include_role:
14 |         name: system
15 |         tasks_from: cpu_load
16 |       when:
17 |         - execute == "y" or execute == true
18 | 


--------------------------------------------------------------------------------
/playbooks/system/disk_load.yml:
--------------------------------------------------------------------------------
 1 | # This playbook will do root file system stress load on specified node
 2 | 
 3 | - include: ../common/next.yml
 4 |   vars:
 5 |     case_name: "CASE: Stress {{ node_group }} host root file system load to {{ disk_stress_load }} for {{ disk_stress_timeout }} on {{ random_hosts }}"
 6 | 
 7 | - name: "Drill the case"
 8 |   hosts: "{{ random_hosts }}"
 9 |   gather_facts: true
10 |   vars:
11 |     execute: "{{ hostvars['localhost']['execute'] }}"
12 |   tasks:
13 |     - include_role:
14 |         name: system
15 |         tasks_from: disk_load
16 |       when:
17 |         - execute == "y" or execute == true
18 | 


--------------------------------------------------------------------------------
/playbooks/system/mem_load.yml:
--------------------------------------------------------------------------------
 1 | # This playbook will do mem stress load on specified node
 2 | 
 3 | - include: ../common/next.yml
 4 |   vars:
 5 |     case_name: "CASE: Stress {{ node_group }} host memory load to {{ mem_stress_load }} for {{ mem_stress_timeout }} on {{ random_hosts }}"
 6 | 
 7 | - name: "Drill the case"
 8 |   hosts: "{{ random_hosts }}"
 9 |   gather_facts: true
10 |   vars:
11 |     execute: "{{ hostvars['localhost']['execute'] }}"
12 |   tasks:
13 |     - include_role:
14 |         name: system
15 |         tasks_from: mem_load
16 |       when:
17 |         - execute == "y" or execute == true
18 | 


--------------------------------------------------------------------------------
/playbooks/system/nic.yml:
--------------------------------------------------------------------------------
 1 | # This playbook is generally to test a nic in data-plance
 2 | 
 3 | # Nic delay(Storage Network)
 4 | - include: nic_delay.yml
 5 |   vars:
 6 |     nic_delay_time: "300ms"
 7 |     nic_delay_timeout: "5m"
 8 |   tags:
 9 |     - "{{ node_group }}-{{ nic_type }}-nic-delay"
10 | 
11 | # Nic package loss(Storage Network)
12 | - include: nic_loss.yml
13 |   vars:
14 |     nic_loss_percent: "80%"
15 |     nic_loss_timeout: "5m"
16 |   tags:
17 |     - "{{ node_group }}-{{ nic_type }}-nic-loss"
18 | 
19 | # Nic down(Storage Network)
20 | - include: nic_down.yml
21 |   vars:
22 |     nic_down_timeout: "5m"
23 |   tags:
24 |     - "{{ node_group }}-{{ nic_type }}-nic-down"
25 | 


--------------------------------------------------------------------------------
/playbooks/system/nic_delay.yml:
--------------------------------------------------------------------------------
 1 | # This playbook will do nic delay of specified nic on specified node
 2 | 
 3 | - include: ../common/next.yml
 4 |   vars:
 5 |     case_name: "CASE: Set {{ node_group }} host {{ nic }} delay to {{ nic_delay_time }} for {{ nic_delay_timeout }} on {{ random_hosts }}"
 6 | 
 7 | - name: "Drill the case"
 8 |   hosts: "{{ random_hosts }}"
 9 |   gather_facts: false
10 |   vars:
11 |     execute: "{{ hostvars['localhost']['execute'] }}"
12 |   tasks:
13 |     - include_role:
14 |         name: system
15 |         tasks_from: nic_delay
16 |       when:
17 |         - execute == "y" or execute == true
18 | 


--------------------------------------------------------------------------------
/playbooks/system/nic_down.yml:
--------------------------------------------------------------------------------
 1 | # This playbook will down specified nic on specified node
 2 | 
 3 | - include: ../common/next.yml
 4 |   vars:
 5 |     case_name: "CASE: Set {{ node_group }} host {{ nic }} down for {{ nic_down_timeout }} on {{ random_hosts }}"
 6 | 
 7 | - name: "Drill the case"
 8 |   hosts: "{{ random_hosts }}"
 9 |   gather_facts: false
10 |   vars:
11 |     execute: "{{ hostvars['localhost']['execute'] }}"
12 |   tasks:
13 |     - include_role:
14 |         name: system
15 |         tasks_from: nic_down
16 |       when:
17 |         - execute == "y" or execute == true
18 | 


--------------------------------------------------------------------------------
/playbooks/system/nic_loss.yml:
--------------------------------------------------------------------------------
 1 | # This playbook will do nic loss of specified nic on specified node
 2 | 
 3 | - include: ../common/next.yml
 4 |   vars:
 5 |     case_name: "CASE: Set {{ node_group }} host {{ nic }} loss to {{ nic_loss_percent }} for {{ nic_loss_timeout }} on {{ random_hosts }}"
 6 | 
 7 | - name: "Drill the case"
 8 |   hosts: "{{ random_hosts }}"
 9 |   gather_facts: false
10 |   vars:
11 |     execute: "{{ hostvars['localhost']['execute'] }}"
12 |   tasks:
13 |     - include_role:
14 |         name: system
15 |         tasks_from: nic_loss
16 |       when:
17 |         - execute == "y" or execute == true
18 | 


--------------------------------------------------------------------------------
/roles/common/defaults/main.yml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/unitedstack/gremlin/017fe09d80040019df7ed387bf1001114944f4c2/roles/common/defaults/main.yml


--------------------------------------------------------------------------------
/roles/common/tasks/port_add.yml:
--------------------------------------------------------------------------------
1 | - name: Add a port to a ovs bridge
2 |   shell: >
3 |     ovs-vsctl add-port {{ ovs_bridge }} {{ ovs_port }}
4 |   become: true
5 | 


--------------------------------------------------------------------------------
/roles/common/tasks/port_del.yml:
--------------------------------------------------------------------------------
1 | - name: Delete a port from a ovs bridge
2 |   shell: >
3 |     ovs-vsctl del-port {{ ovs_bridge }} {{ ovs_port }}
4 |   become: true
5 | 


--------------------------------------------------------------------------------
/roles/common/tasks/start_service.yml:
--------------------------------------------------------------------------------
 1 | # Start a service
 2 | 
 3 | - block:
 4 |     - name: "Try to start service {{ service_name }} using systemctl"
 5 |       service: 
 6 |         name: "{{ service_name }}"
 7 |         state: started
 8 |       become: true
 9 | 
10 |   rescue:
11 |     - name: "Try to start service {{ service_name }} using service"
12 |       shell: service {{ service_name }} start
13 |       become: true
14 |       ignore_errors: yes
15 | 
16 |   always:
17 |    - name: Waiting 10 seconds to warm up
18 |      pause:
19 |        seconds: 10
20 | 


--------------------------------------------------------------------------------
/roles/common/tasks/stop_service.yml:
--------------------------------------------------------------------------------
 1 | # Stop a service
 2 | 
 3 | - block:
 4 |     - name: "Try to stop service {{ service_name }} using systemctl"
 5 |       service: 
 6 |         name: "{{ service_name }}"
 7 |         state: stopped
 8 |       become: true
 9 | 
10 |   rescue:
11 |     - name: "Try to stop service {{ service_name }} using service"
12 |       shell: service {{ service_name }} stop
13 |       become: true
14 |       ignore_errors: yes
15 | 
16 |   always:
17 |    - name: Waiting 10 seconds to warm down
18 |      pause:
19 |        seconds: 10
20 | 


--------------------------------------------------------------------------------
/roles/compute/README.md:
--------------------------------------------------------------------------------
 1 | # Compute reliability verification and fault drill role
 2 | 
 3 | We design test cases from the following two levels:
 4 | 
 5 | ## System Level
 6 | 
 7 | The compute node is for running instance, which has high demands for cpu, memory, and network.
 8 | 
 9 | There are four key networks used by compute node:
10 | 
11 | * management
12 | * storage
13 | * vlan
14 | * tunnel
15 | 
16 | ### CPU
17 | 
18 | * Stress CPU load to 80% for 5 minutes
19 | * Stress CPU load to 100% for 5 minutes
20 | 
21 | ### Memory
22 | 
23 | * Stress memory load to 80% for 5 minutes
24 | * Stress memory load to 100% for 5 minutes
25 | 
26 | ### Disk
27 | 
28 | * Stress root disk util to 80% for 5 minutes
29 | * Stress root disk util to 100% for 5 minutes
30 | 
31 | ### Network
32 | 
33 | * Management network package loss to 80%
34 | * Management network package loss to 100%
35 | * Management network package delay to 10ms
36 | * Management network package delay to 100ms
37 | * Ifdown management nic
38 | 
39 | * VLAN/Tunnel network package loss to 80%
40 | * VLAN/Tunnel network package loss to 100%
41 | * VLAN/Tunnel network package delay to 10ms
42 | * VLAN/Tunnel network package delay to 100ms
43 | * VLAN/Tunnel network package delay to 200ms
44 | * Ifdown vlan/tunnel nic
45 | 
46 | * Storage network package loss to 80%
47 | * Storage network package loss to 100%
48 | * Storage network package delay to 10ms
49 | * Storage network package delay to 100ms
50 | * Storage network package delay to 200ms
51 | * Ifdown storage nic
52 | 
53 | ## Service Level
54 | 
55 | There are following processes running on compute node:
56 | 
57 | on control plane:
58 | 
59 | * nova-compute
60 | * neutron-openvswitch-agent
61 | * libvirtd
62 | 
63 | on data plane:
64 | 
65 | * kvm-qemu
66 | * ovsdb-server
67 | * ovs-vswitchd
68 | 
69 | So we design the following test cases:
70 | 
71 | * kill nova-compute
72 | * systemctl stop nova-compute
73 | * kill neutron-openvswitch-agent
74 | * systemctl stop neutron-openvswitch-agent
75 | * kill libvirtd
76 | * systemctl stop libvirtd
77 | * kill a kvm/qemu process
78 | * kill ovsdb-server
79 | * systemctl stop ovsdb-server
80 | * kill ovs-vswitchd
81 | * systemctl stop ovs-vswitchd
82 | 


--------------------------------------------------------------------------------
/roles/compute/defaults/main.yml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/unitedstack/gremlin/017fe09d80040019df7ed387bf1001114944f4c2/roles/compute/defaults/main.yml


--------------------------------------------------------------------------------
/roles/compute/meta/main.yml:
--------------------------------------------------------------------------------
1 | # Include the `common` role as a dependency. This makes sure the
2 | # # variables defined in that role are available here.
3 | dependencies:
4 |   - common
5 | 


--------------------------------------------------------------------------------
/roles/compute/service/main.yml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/unitedstack/gremlin/017fe09d80040019df7ed387bf1001114944f4c2/roles/compute/service/main.yml


--------------------------------------------------------------------------------
/roles/control/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/unitedstack/gremlin/017fe09d80040019df7ed387bf1001114944f4c2/roles/control/README.md


--------------------------------------------------------------------------------
/roles/control/defaults/main.yml:
--------------------------------------------------------------------------------
 1 | rabbit_host: localhost
 2 | rabbit_username: guest
 3 | rabbit_password: guest
 4 | rabbit_exchange: gremlin
 5 | exchange_durable: False
 6 | exchange_auto_delete: False
 7 | rabbit_queue: notifications.info
 8 | queue_durable: True
 9 | queue_auto_delete: False
10 | routing_key: notifications.info
11 | threads: 100
12 | msg_per_thread: 10000
13 | 


--------------------------------------------------------------------------------
/roles/control/files/stress_mq.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | import argparse
 3 | import pika
 4 | import threading
 5 | 
 6 | parser = argparse.ArgumentParser("./stress_mq.py",
 7 |                                  description='Publish messages to RabbitMQ')
 8 | parser.add_argument('-H', '--rabbit-host', default='localhost',
 9 |                     help="RabbitMQ Host Address")
10 | parser.add_argument('-u', '--rabbit-username', default='guest',
11 |                     help="RabbitMQ username")
12 | parser.add_argument('-p', '--rabbit-password', default='guest',
13 |                     help="RabbitMQ password")
14 | parser.add_argument('--rabbit-exchange', default='gremlin',
15 |                     help="The exchange to stress in RabbitMQ")
16 | parser.add_argument('--exchange-durable', default='False',
17 |                     help="Set exchange to durable or not")
18 | parser.add_argument('--exchange-auto-delete', default='False',
19 |                     help="Set exchange to auto_delete or not")
20 | parser.add_argument('--rabbit-queue', default='notifications.info',
21 |                     help="The queue to stress in RabbitMQ")
22 | parser.add_argument('--queue-durable', default='False',
23 |                     help="Set queue to durable or not")
24 | parser.add_argument('--queue-auto-delete', default='False',
25 |                     help="Set queue to auto_delete or not")
26 | parser.add_argument('--routing-key', default='notifications.info',
27 |                     help="The routing_key the queue will bind with")
28 | parser.add_argument('-t', '--threads', type=int, default=100,
29 |                     help="The threading number will spawned to do publish messages")
30 | parser.add_argument('-n', '--msg-per-thread', type=int, default=10000,
31 |                     help="Message number every thread will publish")
32 | args = parser.parse_args()
33 | 
34 | credentials = pika.PlainCredentials(args.rabbit_username, args.rabbit_password)
35 | parameters = pika.ConnectionParameters(host=args.rabbit_host,
36 |                                        credentials=credentials)
37 | 
38 | def str2bool(v):
39 |     return v.lower() in ('true', 'yes', '1')
40 | 
41 | def publish():
42 |     connection = pika.BlockingConnection(parameters)
43 |     channel = connection.channel()  
44 | 
45 |     channel.exchange_declare(exchange=args.rabbit_exchange,
46 |                              durable=str2bool(args.exchange_durable),
47 |                              auto_delete=str2bool(args.exchange_auto_delete),
48 |                              type="topic")
49 |     channel.queue_declare(queue=args.rabbit_queue,
50 |                           durable=str2bool(args.queue_durable),
51 |                           auto_delete=str2bool(args.queue_auto_delete))
52 |     channel.queue_bind(args.rabbit_queue, args.rabbit_exchange,
53 |                        args.routing_key)
54 | 
55 |     message = 'Gremlin Coming!'
56 |     count = 0
57 |     while count < args.msg_per_thread:
58 |         channel.basic_publish(exchange=args.rabbit_exchange,
59 |                               routing_key=args.routing_key,
60 |                               body=message)
61 |         count = count + 1
62 |     connection.close()
63 | 
64 | threads = [threading.Thread(target=publish) for i in range(args.threads)]
65 | 
66 | for t in threads:
67 |     t.start()
68 | 
69 | for t in threads:
70 |     t.join()
71 | 


--------------------------------------------------------------------------------
/roles/control/meta/main.yml:
--------------------------------------------------------------------------------
1 | # Include the `common` role as a dependency. This makes sure the
2 | # # variables defined in that role are available here.
3 | dependencies:
4 |   - common
5 | 


--------------------------------------------------------------------------------
/roles/control/tasks/purge_queue.yml:
--------------------------------------------------------------------------------
1 | - name: Purge RabbitMQ queue
2 |   shell: >
3 |     /usr/local/bin/rabbitmqadmin purge queue name={{ rabbit_queue }}
4 |     --username={{ rabbit_username }} --password={{ rabbit_password }}
5 |   become: true
6 | 


--------------------------------------------------------------------------------
/roles/control/tasks/stress_db.yml:
--------------------------------------------------------------------------------
 1 | - name: Install sysbench
 2 |   package:
 3 |     name: sysbench
 4 |     state: present
 5 |   delegate_to: localhost
 6 |   become: true
 7 |   when: manage_packages|default(false)
 8 | 
 9 | - name: Delete sysbench database, user and grant priviledges first
10 |   shell: >
11 |     mysql -e "revoke all on sysbench.* from {{ sysbench_user }}@'%'";
12 |     mysql -e "drop user {{ sysbench_user }}@'%'";
13 |     mysql -e "drop database sysbench";
14 |   become: true
15 |   ignore_errors: true
16 | 
17 | - name: Create sysbench database, user and grant priviledges first
18 |   shell: >
19 |     mysql -e "create database sysbench";
20 |     mysql -e "create user '{{ sysbench_user }}'@'%' identified by '{{ sysbench_password }}'";
21 |     mysql -e "grant all on {{ sysbench_user }}.* to sysbench@'%' identified by '{{ sysbench_password }}';";
22 |   become: true
23 | 
24 | - name: Stress MySQL Cluster - Prepare 
25 |   shell: >
26 |     sysbench --time={{ stress_mysql_time }} --threads={{ sysbench_threads }} --mysql-user={{ sysbench_user }}
27 |     --mysql-password={{ sysbench_password }} --mysql-db={{ sysbench_database }} --mysql-host={{ stress_mysql_host}}
28 |     --db-driver=mysql --table_size=500000
29 |     /usr/share/sysbench/oltp_read_only.lua prepare
30 |   delegate_to: localhost
31 |   ignore_errors: true
32 | 
33 | - name: Stress MySQL Cluster - Run
34 |   shell: >
35 |     sysbench --time={{ stress_mysql_time }} --threads={{ sysbench_threads }} --mysql-user={{ sysbench_user }}
36 |     --mysql-password={{ sysbench_password }} --mysql-db={{ sysbench_database }} --mysql-host={{ stress_mysql_host}}
37 |     --db-driver=mysql --table_size=500000
38 |     /usr/share/sysbench/oltp_read_only.lua run
39 |   delegate_to: localhost
40 |   ignore_errors: true
41 | 
42 | - name: Stress MySQL Cluster - Cleanup
43 |   shell: >
44 |     sysbench --time={{ stress_mysql_time }} --threads={{ sysbench_threads }} --mysql-user={{ sysbench_user }}
45 |     --mysql-password={{ sysbench_password }} --mysql-db={{ sysbench_database }} --mysql-host={{ stress_mysql_host}}
46 |     --db-driver=mysql --table_size=500000
47 |     /usr/share/sysbench/oltp_read_only.lua cleanup
48 |   delegate_to: localhost
49 |   ignore_errors: true
50 | 
51 | - name: Delete sysbench database, user and grant priviledges
52 |   shell: >
53 |     mysql -e "revoke all on sysbench.* from {{ sysbench_user }}@'%'";
54 |     mysql -e "drop user {{ sysbench_user }}@'%'";
55 |     mysql -e "drop database sysbench";
56 |   become: true
57 |   ignore_errors: true
58 | 


--------------------------------------------------------------------------------
/roles/control/tasks/stress_mq.yml:
--------------------------------------------------------------------------------
 1 | - name: Install pika
 2 |   package:
 3 |     name: python-pika
 4 |     state: present
 5 |   become: true
 6 |   when: manage_packages|default(false)
 7 | 
 8 | - name: Stress RabbitMQ
 9 |   script: "stress_mq.py -H {{ rabbit_host }} -u {{ rabbit_username }} -p {{ rabbit_password }}
10 |            --rabbit-exchange {{ rabbit_exchange }} --exchange-durable {{ exchange_durable }}
11 |            --exchange-auto-delete {{ exchange_auto_delete}} --rabbit-queue {{ rabbit_queue }}
12 |            --queue-durable {{ queue_durable }} --queue-auto-delete {{ queue_auto_delete }}
13 |            --routing-key {{ routing_key }} -t {{ threads }} -n {{ msg_per_thread }}"
14 |   become: true
15 | 


--------------------------------------------------------------------------------
/roles/network/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/unitedstack/gremlin/017fe09d80040019df7ed387bf1001114944f4c2/roles/network/README.md


--------------------------------------------------------------------------------
/roles/network/defaults/main.yml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/unitedstack/gremlin/017fe09d80040019df7ed387bf1001114944f4c2/roles/network/defaults/main.yml


--------------------------------------------------------------------------------
/roles/network/meta/main.yml:
--------------------------------------------------------------------------------
1 | # Include the `common` role as a dependency. This makes sure the
2 | # # variables defined in that role are available here.
3 | dependencies:
4 |   - common
5 | 


--------------------------------------------------------------------------------
/roles/network/service/tasks/main.yml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/unitedstack/gremlin/017fe09d80040019df7ed387bf1001114944f4c2/roles/network/service/tasks/main.yml


--------------------------------------------------------------------------------
/roles/provision/README.md:
--------------------------------------------------------------------------------
1 | Add non-root user to target host to execute fault drill commands, and
2 | the non-root will be removed when the test case is done.
3 | 


--------------------------------------------------------------------------------
/roles/provision/defaults/main.yml:
--------------------------------------------------------------------------------
1 | # The path to an ssh key (that we will generate) that can be used to
2 | # log in to the target host.
3 | gremlin_key: "{{ local_working_dir }}/id_rsa_gremlin"
4 | 


--------------------------------------------------------------------------------
/roles/provision/local/tasks/main.yml:
--------------------------------------------------------------------------------
 1 | # Create local working dir
 2 | 
 3 | - name: Get current user group for localhost
 4 |   command: "id -gn"
 5 |   register: local_user_group
 6 |   changed_when: false
 7 | 
 8 | - name: Register fact for current user group
 9 |   set_fact:
10 |     current_group_local: "{{ local_user_group.stdout }}"
11 |   tags:
12 |     - provision
13 | 
14 | - block:
15 |   - name: Ensure local working dir exists
16 |     file:
17 |       path: "{{ local_working_dir }}"
18 |       state: directory
19 |       owner: "{{ ansible_env.USER }}"
20 |       group: "{{ current_group_local }}"
21 |     become: true
22 |   rescue:
23 |   - name: Ensure local working dir exists
24 |     file:
25 |       path: "{{ local_working_dir }}"
26 |       state: directory
27 |       owner: "{{ ansible_env.USER }}"
28 |       group: "{{ current_group_local }}"
29 | 


--------------------------------------------------------------------------------
/roles/provision/meta/main.yml:
--------------------------------------------------------------------------------
1 | # Include the `common` role as a dependency. This makes sure the
2 | # # variables defined in that role are available here.
3 | dependencies:
4 |   - common
5 | 


--------------------------------------------------------------------------------
/roles/provision/os_auth/defaults/main.yml:
--------------------------------------------------------------------------------
 1 | os_auth_url: 'http://127.0.0.1:5000/v3'
 2 | os_project_domain_name: Default
 3 | os_user_domain_name: Default
 4 | 
 5 | os_admin_project: admin
 6 | os_admin_username: admin
 7 | os_admin_password: admin
 8 | 
 9 | os_gremlin_password: drill@gremlin
10 | os_gremlin_role: member
11 | 


--------------------------------------------------------------------------------
/roles/provision/os_auth/tasks/create_auth.yml:
--------------------------------------------------------------------------------
 1 | - shell: "openstack --os-auth-url {{ os_auth_url }}
 2 |                     --os-identity-api-version 3
 3 |                     --os-project-domain-name {{ os_project_domain_name }}
 4 |                     --os-project-name {{ os_admin_project }}
 5 |                     --os-user-domain-name {{ os_user_domain_name }}
 6 |                     --os-username {{ os_admin_username }}
 7 |                     --os-password {{ os_admin_password }}
 8 |           project show gremlin"
 9 |   register: os_project_show
10 |   ignore_errors: true
11 | 
12 | - shell: "openstack --os-auth-url {{ os_auth_url }}
13 |                     --os-identity-api-version 3
14 |                     --os-project-domain-name {{ os_project_domain_name }}
15 |                     --os-project-name {{ os_admin_project }}
16 |                     --os-user-domain-name {{ os_user_domain_name }}
17 |                     --os-username {{ os_admin_username }}
18 |                     --os-password {{ os_admin_password }}
19 |           project create --domain {{ os_project_domain_name }} gremlin"
20 |   when: os_project_show.rc != 0
21 | 
22 | - shell: "openstack --os-auth-url {{ os_auth_url }}
23 |                     --os-identity-api-version 3
24 |                     --os-project-domain-name {{ os_project_domain_name }}
25 |                     --os-project-name {{ os_admin_project }}
26 |                     --os-user-domain-name {{ os_user_domain_name }}
27 |                     --os-username {{ os_admin_username }}
28 |                     --os-password {{ os_admin_password }}
29 |           user create --domain {{ os_project_domain_name }}
30 |                       --password {{ os_gremlin_password }}
31 |           gremlin --or-show"
32 | 
33 | - shell: "openstack --os-auth-url {{ os_auth_url }}
34 |                     --os-identity-api-version 3
35 |                     --os-project-domain-name {{ os_project_domain_name }}
36 |                     --os-project-name {{ os_admin_project }}
37 |                     --os-user-domain-name {{ os_user_domain_name }}
38 |                     --os-username {{ os_admin_username }}
39 |                     --os-password {{ os_admin_password }}
40 |           role add --project gremlin --user gremlin {{ os_gremlin_role }}"
41 | 


--------------------------------------------------------------------------------
/roles/provision/os_auth/tasks/remove_auth.yml:
--------------------------------------------------------------------------------
 1 | - shell: "openstack --os-auth-url {{ os_auth_url }}
 2 |                     --os-identity-api-version 3
 3 |                     --os-project-domain-name {{ os_project_domain_name }}
 4 |                     --os-project-name {{ os_admin_project }}
 5 |                     --os-user-domain-name {{ os_user_domain_name }}
 6 |                     --os-username {{ os_admin_username }}
 7 |                     --os-password {{ os_admin_password }}
 8 |           user show gremlin"
 9 |   register: os_user_show
10 |   ignore_errors: true
11 | 
12 | - shell: "openstack --os-auth-url {{ os_auth_url }}
13 |                     --os-identity-api-version 3
14 |                     --os-project-domain-name {{ os_project_domain_name }}
15 |                     --os-project-name {{ os_admin_project }}
16 |                     --os-user-domain-name {{ os_user_domain_name }}
17 |                     --os-username {{ os_admin_username }}
18 |                     --os-password {{ os_admin_password }}
19 |           user delete gremlin"
20 |   when: os_user_show.rc == 0
21 | 
22 | - shell: "openstack --os-auth-url {{ os_auth_url }}
23 |                     --os-identity-api-version 3
24 |                     --os-project-domain-name {{ os_project_domain_name }}
25 |                     --os-project-name {{ os_admin_project }}
26 |                     --os-user-domain-name {{ os_user_domain_name }}
27 |                     --os-username {{ os_admin_username }}
28 |                     --os-password {{ os_admin_password }}
29 |           project show gremlin"
30 |   register: os_project_show
31 |   ignore_errors: true
32 | 
33 | - shell: "openstack --os-auth-url {{ os_auth_url }}
34 |                     --os-identity-api-version 3
35 |                     --os-project-domain-name {{ os_project_domain_name }}
36 |                     --os-project-name {{ os_admin_project }}
37 |                     --os-user-domain-name {{ os_user_domain_name }}
38 |                     --os-username {{ os_admin_username }}
39 |                     --os-password {{ os_admin_password }}
40 |           project delete gremlin"
41 |   when: os_project_show.rc == 0
42 | 


--------------------------------------------------------------------------------
/roles/provision/os_stack/defaults/main.yml:
--------------------------------------------------------------------------------
1 | stack_template_path: stack-untitle.yml
2 | stack_name: untitle
3 | os_auth_url: 'http://127.0.0.1:5000/v3'
4 | os_region: RegionOne
5 | os_project_domain_name: Default
6 | os_user_domain_name: Default
7 | os_gremlin_password: drill@gremlin
8 | 


--------------------------------------------------------------------------------
/roles/provision/os_stack/tasks/create_stack.yml:
--------------------------------------------------------------------------------
 1 | - template:
 2 |     src: "{{ stack_template_path }}"
 3 |     dest: "/tmp/grem_stack_template.yml"
 4 |     force: true
 5 |     mode: 0644
 6 | 
 7 | - shell: "heat --os-auth-url {{ os_auth_url }}
 8 |                --os-region-name {{ os_region }}
 9 |                --os-project-domain-name {{ os_project_domain_name }}
10 |                --os-project-name gremlin
11 |                --os-user-domain-name {{ os_user_domain_name }}
12 |                --os-username gremlin
13 |                --os-password {{ os_gremlin_password }}
14 |           stack-create --template-file /tmp/grem_stack_template.yml
15 |           gremlin_{{ stack_name }}"
16 | 


--------------------------------------------------------------------------------
/roles/provision/os_stack/tasks/remove_stack.yml:
--------------------------------------------------------------------------------
 1 | - shell: "heat --os-auth-url {{ os_auth_url }}
 2 |                --os-region-name {{ os_region }}
 3 |                --os-project-domain-name {{ os_project_domain_name }}
 4 |                --os-project-name gremlin
 5 |                --os-user-domain-name {{ os_user_domain_name }}
 6 |                --os-username gremlin
 7 |                --os-password {{ os_gremlin_password }}
 8 |           stack-delete gremlin_{{ stack_name }}"
 9 | 
10 | - file:
11 |     path: "/tmp/grem_stack_template.yml"
12 |     state: absent
13 | 
14 | 


--------------------------------------------------------------------------------
/roles/provision/teardown/meta/main.yml:
--------------------------------------------------------------------------------
1 | dependencies:
2 |   - provision
3 | 


--------------------------------------------------------------------------------
/roles/provision/teardown/tasks/main.yml:
--------------------------------------------------------------------------------
 1 | # Check that the non-root user exists.
 2 | - name: Get UID of non-root user
 3 |   command: >
 4 |     id -u {{ non_root_user }}
 5 |   register: non_root_uid
 6 |   ignore_errors: true
 7 |   changed_when: false
 8 | 
 9 | # If the non-root user exists, perform a variety of cleanup tasks.
10 | - when: non_root_uid|success
11 |   block:
12 |     # Look for and kill any processes owned by the non-root user.
13 |     # This will let us remove the user later on.
14 |     - name: Check for processes owned by non-root user
15 |       command: >
16 |         pgrep -u {{ non_root_user }}
17 |       register: proc_exist
18 |       ignore_errors: true
19 |       become: true
20 | 
21 |     - name: Kill (SIGTERM) all processes owned by non-root user
22 |       command: >
23 |         pkill -u {{ non_root_user }}
24 |       ignore_errors: true
25 |       become: true
26 |       when: proc_exist|success
27 | 
28 |     - name: Kill (SIGKILL) all processes owned by non-root user
29 |       command: >
30 |         pkill -9 -u {{ non_root_user }}
31 |       when: proc_exist|success
32 |       ignore_errors: true
33 |       become: true
34 | 
35 |     # Now that we have taken care of any processes owned by this user
36 |     # account we can delete it.
37 |     - name: Remove non-root user account
38 |       user:
39 |         name: "{{ non_root_user }}"
40 |         state: absent
41 |         remove: true
42 |       become: true
43 | 


--------------------------------------------------------------------------------
/roles/provision/user/meta/main.yml:
--------------------------------------------------------------------------------
1 | dependencies:
2 |   - provision
3 | 


--------------------------------------------------------------------------------
/roles/provision/user/tasks/main.yml:
--------------------------------------------------------------------------------
 1 | # Create `gremlin_key`, which we will use to log in to the target
 2 | # host.  Note that this tasks runs on the ansible control host
 3 | # (because of the `delegate_to: localhost`), and we will later copy
 4 | # the public key to the appropriate location.
 5 | 
 6 | - name: Check if the host key exists
 7 |   delegate_to: localhost
 8 |   stat:
 9 |     path: "{{ gremlin_key }}"
10 |   register: gremlin_key_stat
11 | 
12 | - name: Create target host access key
13 |   delegate_to: localhost
14 |   command: >
15 |     ssh-keygen -f {{ gremlin_key }} -N ''
16 |     -C 'ansible_generated_key'
17 |     -t rsa -b 4096
18 |   args:
19 |     creates: "{{ gremlin_key }}"
20 |   when: not gremlin_key_stat.stat.exists
21 | 
22 | # Create a non-root user on the target host.  This is the user that
23 | # will execute fault drill test cases on the target host.
24 | - name: Create non-root group
25 |   group:
26 |     name: "{{ non_root_group }}"
27 |     state: present
28 | 
29 | - name: Create non-root user
30 |   user:
31 |     name: "{{ non_root_user }}"
32 |     group: "{{ non_root_group }}"
33 |     state: present
34 |     shell: /bin/bash
35 |   become: true
36 | 
37 | # Install the public component of `gremlin_key` in the
38 | # `.ssh/authorized_keys` file for the non-root user.
39 | - name: Configure non-root user authorized_keys
40 |   authorized_key:
41 |     user: "{{ non_root_user }}"
42 |     key: "{{ item }}"
43 |   with_file:
44 |     - "{{ gremlin_key }}.pub"
45 |   become: true
46 | 
47 | # I'm not always root, but when I am it's because of `sudo`.
48 | - name: Grant sudo privileges to non-root user
49 |   copy:
50 |     content: |
51 |       {{ non_root_user }} ALL=(ALL) NOPASSWD:ALL
52 |     dest: /etc/sudoers.d/{{ non_root_user }}
53 |     owner: root
54 |     group: root
55 |     mode: 0440
56 |   become: true
57 | 


--------------------------------------------------------------------------------
/roles/storage/README.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/unitedstack/gremlin/017fe09d80040019df7ed387bf1001114944f4c2/roles/storage/README.md


--------------------------------------------------------------------------------
/roles/storage/defaults/main.yml:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/unitedstack/gremlin/017fe09d80040019df7ed387bf1001114944f4c2/roles/storage/defaults/main.yml


--------------------------------------------------------------------------------
/roles/storage/meta/main.yml:
--------------------------------------------------------------------------------
1 | # Include the `common` role as a dependency. This makes sure the
2 | # # variables defined in that role are available here.
3 | dependencies:
4 |   - common
5 | 


--------------------------------------------------------------------------------
/roles/storage/tasks/damage_mon.yml:
--------------------------------------------------------------------------------
 1 | - block:
 2 |    - include: stop_mon.yml
 3 | 
 4 |    - name: Get mon name
 5 |      shell: ls /var/lib/ceph/mon/
 6 |      register: mon_name
 7 | 
 8 |    - name: Backup ceph db file
 9 |      shell: cp /var/lib/ceph/mon/{{ mon_name.stdout }}/store.db/CURRENT /tmp/
10 | 
11 |    - name: Change ceph db file
12 |      shell: echo 0 > /var/lib/ceph/mon/{{ mon_name.stdout }}/store.db/CURRENT
13 | 


--------------------------------------------------------------------------------
/roles/storage/tasks/del_osd_partition.yml:
--------------------------------------------------------------------------------
 1 | - block:
 2 |   - name: Get the OSD Disk
 3 |     shell: "df -h |grep ceph-{{ item }} |awk '{print $1}'|sed 's/[0-9]//g'"
 4 |     register: ceph_osd_disk
 5 |     become: true
 6 |     with_items:
 7 |       - "{{ osd_down_list }}"
 8 |   
 9 |   - name: Get the OSD Disk Partition
10 |     shell: "df -h |grep ceph-{{ item }} |awk '{print $1}'"
11 |     register: ceph_osd_disk_part
12 |     become: true
13 |     with_items:
14 |       - "{{ osd_down_list }}"
15 | 
16 |   - name: Backup Ceph OSD Partition
17 |     shell: "sgdisk {{ item[1].stdout }} -b /tmp/osd_{{ item[0] }}_partition.bak"
18 |     become: true
19 |     register: backup_result
20 |     ignore_errors: false
21 |     failed_when: " 'successfully' not in backup_result.stdout"
22 |     with_nested:
23 |       - "{{ osd_down_list }}"
24 |       - "{{ ceph_osd_disk.results }}"
25 | 
26 |   - name: Del OSD Partition Table
27 |     command: "sgdisk -o {{ item.stdout }}"
28 |     become: true
29 |     register: del_result
30 |     failed_when: " 'successfully' not in del_result.stdout"
31 |     with_items:
32 |       - "{{ ceph_osd_disk.results }}"
33 | 
34 |   - name: Umount Ceph OSD Disk
35 |     shell: "umount /var/lib/ceph/osd/ceph-{{ item }}"
36 |     become: true
37 |     with_items:
38 |       - "{{ osd_down_list }}"
39 |     
40 |   - name: Check OSD Umount Result
41 |     shell: "df -h |grep ceph-{{ item }}"
42 |     register: umount_result
43 |     become: true
44 |     failed_when: " 'ceph-' in umount_result.stdout"
45 |     with_items:
46 |       - "{{ osd_down_list }}"
47 | 
48 |   always:
49 |    - name: Waiting 10 seconds to warm down
50 |      pause:
51 |        seconds: 10
52 | 
53 | 


--------------------------------------------------------------------------------
/roles/storage/tasks/kill_mon.yml:
--------------------------------------------------------------------------------
 1 | - block:
 2 |    - name: Kill ceph monitor daemon
 3 |      shell: pkill ceph-mon
 4 |      become: true
 5 |      ignore_errors: true
 6 | 
 7 |    - name: Check if ceph monitor was killed
 8 |      shell: "ps -ef | grep ceph-mon"
 9 |      register: ps_result
10 |      become: true
11 | 
12 |    - fail:
13 |        msg: "Failed to kill ceph monitor daemon"
14 |      when: "'/usr/bin/ceph-mon' in ps_result.stdout"
15 | 
16 |   always:
17 |    - name: Waiting 10 seconds to warm down
18 |      pause:
19 |        seconds: 10
20 | 


--------------------------------------------------------------------------------
/roles/storage/tasks/kill_osd.yml:
--------------------------------------------------------------------------------
 1 | - block:
 2 |    - name: Kill Ceph OSD daemon
 3 |      command: "pkill --pidfile /var/run/ceph/osd.{{ item }}.pid"
 4 |      args:
 5 |        removes: "/var/run/ceph/osd.{{ item }}.pid"
 6 |      become: true
 7 |      with_items:
 8 |        - "{{ osd_down_list }}"
 9 | 
10 |    - name: Use Systemd to kill Ceph OSD daemon
11 |      command: "systemctl kill ceph-osd@{{ item }}"
12 |      args:
13 |        creates: "/var/run/ceph/osd.{{ item }}.pid"
14 |      become: true
15 |      with_items:
16 |        - "{{ osd_down_list }}"
17 | 
18 |   always:
19 |    - name: Waiting 10 seconds to warm down
20 |      pause:
21 |        seconds: 10
22 | 


--------------------------------------------------------------------------------
/roles/storage/tasks/kill_rgw.yml:
--------------------------------------------------------------------------------
 1 | - block:
 2 |    - name: Kill Ceph radosgw daemon
 3 |      command: "pkill radosgw"
 4 |      become: true
 5 |      ignore_errors: true
 6 | 
 7 |    - name: Check if ceph radosgw was killed
 8 |      shell: "ps -ef | grep radosgw"
 9 |      register: ps_result
10 |      ignore_errors: true
11 | 
12 |    - fail:
13 |        msg: "Failed to kill Ceph radosgw daemon"
14 |      when: "'/usr/bin/radosgw' in ps_result.stdout"
15 | 
16 |   always:
17 |    - name: Waiting 10 seconds to warm down
18 |      pause:
19 |        seconds: 10
20 | 


--------------------------------------------------------------------------------
/roles/storage/tasks/recover_damage_mon.yml:
--------------------------------------------------------------------------------
1 | - block:
2 |    - name: Recover ceph db file
3 |      shell: mv /tmp/CURRENT /var/lib/ceph/mon/{{ mon_name.stdout }}/store.db/ -f
4 | 
5 |    - include: start_mon.yml
6 | 


--------------------------------------------------------------------------------
/roles/storage/tasks/recover_osd_partition.yml:
--------------------------------------------------------------------------------
 1 | - block:
 2 |    - name: Recover Ceph OSD Partition 
 3 |      command: "sgdisk --load-backup /tmp/osd_{{ item[0] }}_partition.bak {{ item[1].stdout }}"
 4 |      register: recover_result
 5 |      become: true
 6 |      failed_when: " 'successfully' not in recover_result.stdout"
 7 |      with_nested:
 8 |        - "{{ osd_down_list }}"
 9 |        - "{{ ceph_osd_disk.results }}"
10 |      when: "ceph_osd_disk is defined"
11 | 
12 |    - name: Mount Ceph OSD Disk
13 |      shell: "mount {{ item[1].stdout }} /var/lib/ceph/osd/ceph-{{ item[0] }} "
14 |      become: true
15 |      with_nested:
16 |        - "{{ osd_down_list }}"
17 |        - "{{ ceph_osd_disk_part.results }}"
18 |      when: "ceph_osd_disk_part is defined"
19 | 
20 |    - name: Check OSD Mount Result
21 |      shell: "df -h |grep ceph-{{ item }}"
22 |      register: check_result
23 |      become: true
24 |      failed_when: " 'ceph-' not in check_result.stdout"
25 |      with_items:
26 |        - "{{ osd_down_list }}"
27 | 
28 |   always:
29 |    - name: Waiting 10 seconds to warm up
30 |      pause:
31 |        seconds: 10
32 | 


--------------------------------------------------------------------------------
/roles/storage/tasks/start_mon.yml:
--------------------------------------------------------------------------------
 1 | - block:
 2 |    - name: "Reset failed ceph monitor status "
 3 |      shell: "systemctl reset-failed ceph-mon@{{ ansible_hostname }}"
 4 | 
 5 |    - name: "Start Ceph Mon"
 6 |      service:
 7 |        name: "ceph-mon@{{ ansible_hostname }}"
 8 |        state: started
 9 |      become: true
10 | 
11 |   rescue:
12 |    - shell: "/etc/init.d/ceph start mon"
13 |      become: true
14 | 
15 |   always:
16 |    - name: Waiting 10 seconds to warm up
17 |      pause:
18 |        seconds: 10
19 | 


--------------------------------------------------------------------------------
/roles/storage/tasks/start_osd.yml:
--------------------------------------------------------------------------------
 1 | - block:
 2 |    - name: "Reset failed ceph osd status "
 3 |      shell: "systemctl reset-failed ceph-osd@{{ item }}"
 4 |      args:
 5 |        creates: "/etc/init.d/ceph"
 6 |      become: true
 7 |      with_items:
 8 |        - "{{ osd_down_list }}"
 9 | 
10 |    - name: "Start Ceph OSD"
11 |      shell: "systemctl start ceph-osd@{{ item }}"
12 |      args:
13 |        creates: "/etc/init.d/ceph"
14 |      become: true
15 |      with_items:
16 |        - "{{ osd_down_list }}"
17 | 
18 |    - shell: "/etc/init.d/ceph start osd.{{ item }}"
19 |      args:
20 |        removes: "/etc/init.d/ceph"
21 |      become: true
22 |      with_items:
23 |        - "{{ osd_down_list }}"
24 | 
25 |   always:
26 |    - name: Waiting 10 seconds to warm up
27 |      pause:
28 |        seconds: 10
29 | 


--------------------------------------------------------------------------------
/roles/storage/tasks/start_rgw.yml:
--------------------------------------------------------------------------------
 1 | - block:
 2 |    - name: "Reset failed ceph radosgw status "
 3 |      shell: "systemctl reset-failed ceph-radosgw@rgw.{{ ansible_hostname }}"
 4 |      become: true
 5 | 
 6 |    - name: "Start Ceph RGW"
 7 |      service:
 8 |        name: "ceph-radosgw@rgw.{{ ansible_hostname }}"
 9 |        state: started
10 |      become: true
11 | 
12 |   rescue:
13 |    - shell: "/etc/init.d/ceph-radosgw start"
14 |      become: true
15 | 
16 |   always:
17 |    - name: Waiting 10 seconds to warm up
18 |      pause:
19 |        seconds: 10
20 | 


--------------------------------------------------------------------------------
/roles/storage/tasks/stop_mon.yml:
--------------------------------------------------------------------------------
 1 | - block:
 2 |    - name: "Stop ceph monitor"
 3 |      shell: "systemctl stop ceph-mon@{{ ansible_hostname }}"
 4 | 
 5 |    - name: "Stop Ceph Mon"
 6 |      service:
 7 |        name: "ceph-mon@{{ ansible_hostname }}"
 8 |        state: stoped
 9 |      become: true
10 | 
11 |   rescue:
12 |    - shell: "/etc/init.d/ceph stop mon"
13 |      become: true
14 | 
15 |   always:
16 |    - name: Waiting 10 seconds to warm up
17 |      pause:
18 |        seconds: 10
19 | 


--------------------------------------------------------------------------------
/roles/system/defaults/main.yml:
--------------------------------------------------------------------------------
 1 | cpu_stress_timeout: "5m"
 2 | cpu_stress_workers: 0
 3 | cpu_stress_load: "50%"
 4 | 
 5 | mem_stress_timeout: "5m"
 6 | mem_stress_load: "50%"
 7 | 
 8 | disk_stress_timeout: "5m"
 9 | disk_stress_workers: 0
10 | disk_stress_load: "50%"
11 | 


--------------------------------------------------------------------------------
/roles/system/meta/main.yml:
--------------------------------------------------------------------------------
1 | # Include the `common` role as a dependency. This makes sure the
2 | # # variables defined in that role are available here.
3 | dependencies:
4 |   - common
5 | 


--------------------------------------------------------------------------------
/roles/system/tasks/clear_tc.yml:
--------------------------------------------------------------------------------
1 | - name: clear tc rule
2 |   shell: tc qdisc del dev {{ nic }} root
3 |   become: true
4 | 


--------------------------------------------------------------------------------
/roles/system/tasks/cpu_load.yml:
--------------------------------------------------------------------------------
 1 | - name: Install stress-ng
 2 |   package:
 3 |     name: stress-ng
 4 |     state: present
 5 |   become: true
 6 |   when: manage_packages|default(false)
 7 | 
 8 | - name: Install htop
 9 |   package:
10 |     name: htop
11 |     state: present
12 |   become: true
13 |   when: manage_packages|default(false)
14 | 
15 | - name: Stress CPU load
16 |   shell: stress-ng --cpu {{ cpu_stress_workers }} --cpu-load {{ cpu_stress_load }} --timeout {{ cpu_stress_timeout }}
17 |   become: true
18 | 


--------------------------------------------------------------------------------
/roles/system/tasks/disk_load.yml:
--------------------------------------------------------------------------------
 1 | - name: Install stress-ng
 2 |   package:
 3 |     name: stress-ng
 4 |     state: present
 5 |   become: true
 6 |   when: manage_packages|default(false)
 7 | 
 8 | - name: Install sysstat
 9 |   package:
10 |     name: sysstat
11 |     state: present
12 |   become: true
13 |   when: manage_packages|default(false)
14 | 
15 | - name: Stress root file system load(free space on the file system)
16 |   shell: stress-ng --iomix {{ disk_stress_workers }} --iomix-bytes {{ disk_stress_load }} --timeout {{ disk_stress_timeout }}
17 |   become: true
18 | 


--------------------------------------------------------------------------------
/roles/system/tasks/mem_load.yml:
--------------------------------------------------------------------------------
 1 | - name: Install stress-ng
 2 |   package:
 3 |     name: stress-ng
 4 |     state: present
 5 |   become: true
 6 |   when: manage_packages|default(false)
 7 | 
 8 | - name: Install htop
 9 |   package:
10 |     name: htop
11 |     state: present
12 |   become: true
13 |   when: manage_packages|default(false)
14 | 
15 | - name: Caculate stress vm_bytes
16 |   shell: awk '/MemAvailable/{printf "%d\n", $2 * {{ mem_stress_load[:-1] | int }} / 100;}' < /proc/meminfo
17 |   register: vm_bytes
18 | 
19 | - name: stress memory
20 |   shell: stress-ng --vm 1 --vm-bytes {{ vm_bytes.stdout }}k --vm-keep --timeout {{ mem_stress_timeout }}
21 |   become: true
22 | 


--------------------------------------------------------------------------------
/roles/system/tasks/nic_delay.yml:
--------------------------------------------------------------------------------
1 | - name: Apply nic delay tc rule
2 |   shell: >
3 |     tc qdisc add dev {{ nic }} root netem delay {{ nic_delay_time }};
4 |     sleep {{ nic_delay_timeout }};
5 |     tc qdisc del dev {{ nic }} root netem delay {{ nic_delay_time }};
6 |   become: true
7 | 


--------------------------------------------------------------------------------
/roles/system/tasks/nic_down.yml:
--------------------------------------------------------------------------------
1 | - name: ifdown the nic
2 |   shell: >
3 |     ifdown {{ nic }};
4 |     sleep {{ nic_down_timeout }};
5 |     ifup {{ nic }};
6 |   become: true
7 | 


--------------------------------------------------------------------------------
/roles/system/tasks/nic_down_async.yml:
--------------------------------------------------------------------------------
1 | - name: ifdown the nic asynchronously
2 |   shell: ifdown {{ nic }}
3 |   async: 0
4 |   poll: 0
5 |   become: true
6 | 


--------------------------------------------------------------------------------
/roles/system/tasks/nic_loss.yml:
--------------------------------------------------------------------------------
1 | - name: Apply nic loss tc rule
2 |   shell: >
3 |     tc qdisc add dev {{ nic }} root netem loss {{ nic_loss_percent }};
4 |     sleep {{ nic_loss_timeout }};
5 |     tc qdisc del dev {{ nic }} root netem loss {{ nic_loss_percent }};
6 |   become: true
7 | 


--------------------------------------------------------------------------------