├── .gitignore
├── LICENSE
├── ProxyInterface.md
├── README.md
├── Vagrantfile
├── exampleapp
├── .gitignore
├── README.md
├── bookinfoapp-failure.png
├── bookinfoapp.png
├── build-apps.sh
├── compose-app.yml
├── details
│ ├── Dockerfile
│ ├── details-supervisor.conf
│ ├── details.py
│ ├── login.defs
│ ├── start_all.sh
│ └── templates
│ │ ├── example.html
│ │ ├── index.html
│ │ ├── productpage.html
│ │ └── tintin.html
├── gateway
│ ├── Dockerfile
│ ├── gatewayconfig.json
│ ├── gremlingateway-supervisor.conf
│ ├── login.defs
│ └── start_all.sh
├── killapps.sh
├── productpage
│ ├── Dockerfile
│ ├── gremlinproduct-supervisor.conf
│ ├── login.defs
│ ├── productpage-supervisor.conf
│ ├── productpage-v1.py
│ ├── productpage.py
│ ├── proxyconfig.json
│ ├── start_all.sh
│ └── templates
│ │ ├── example.html
│ │ ├── index.html
│ │ ├── productpage.html
│ │ └── tintin.html
├── rebuild-productpage.sh
├── recipes
│ ├── checklist.json
│ ├── gremlins.json
│ ├── run_recipe_json.py
│ └── topology.json
├── reviews
│ ├── Dockerfile
│ ├── login.defs
│ ├── reviews-supervisor.conf
│ ├── reviews.py
│ ├── start_all.sh
│ └── templates
│ │ ├── example.html
│ │ ├── index.html
│ │ ├── productpage.html
│ │ └── tintin.html
├── runapps.sh
└── undochanges.sh
├── gremlin-testing-architecture.png
├── python
├── pygremlin
│ ├── __init__.py
│ ├── applicationgraph.py
│ ├── assertionchecker.py
│ └── failuregenerator.py
└── setup.py
└── recipes
└── json_recipes
├── example_checklist.json
├── example_gremlins.json
├── example_topology.json
├── generic_gremlin_template.json
└── run_recipe_json.py
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | env/
12 | build/
13 | develop-eggs/
14 | dist/
15 | downloads/
16 | eggs/
17 | .eggs/
18 | lib/
19 | lib64/
20 | parts/
21 | sdist/
22 | var/
23 | *.egg-info/
24 | .installed.cfg
25 | *.egg
26 |
27 | # PyInstaller
28 | # Usually these files are written by a python script from a template
29 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
30 | *.manifest
31 | *.spec
32 |
33 | # Installer logs
34 | pip-log.txt
35 | pip-delete-this-directory.txt
36 |
37 | # Unit test / coverage reports
38 | htmlcov/
39 | .tox/
40 | .coverage
41 | .coverage.*
42 | .cache
43 | nosetests.xml
44 | coverage.xml
45 | *,cover
46 | .hypothesis/
47 |
48 | # Translations
49 | *.mo
50 | *.pot
51 |
52 | # Django stuff:
53 | *.log
54 |
55 | # Sphinx documentation
56 | docs/_build/
57 |
58 | # PyBuilder
59 | target/
60 |
61 | #Ipython Notebook
62 | .ipynb_checkpoints
63 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Apache License
2 | Version 2.0, January 2004
3 | http://www.apache.org/licenses/
4 |
5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6 |
7 | 1. Definitions.
8 |
9 | "License" shall mean the terms and conditions for use, reproduction,
10 | and distribution as defined by Sections 1 through 9 of this document.
11 |
12 | "Licensor" shall mean the copyright owner or entity authorized by
13 | the copyright owner that is granting the License.
14 |
15 | "Legal Entity" shall mean the union of the acting entity and all
16 | other entities that control, are controlled by, or are under common
17 | control with that entity. For the purposes of this definition,
18 | "control" means (i) the power, direct or indirect, to cause the
19 | direction or management of such entity, whether by contract or
20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
21 | outstanding shares, or (iii) beneficial ownership of such entity.
22 |
23 | "You" (or "Your") shall mean an individual or Legal Entity
24 | exercising permissions granted by this License.
25 |
26 | "Source" form shall mean the preferred form for making modifications,
27 | including but not limited to software source code, documentation
28 | source, and configuration files.
29 |
30 | "Object" form shall mean any form resulting from mechanical
31 | transformation or translation of a Source form, including but
32 | not limited to compiled object code, generated documentation,
33 | and conversions to other media types.
34 |
35 | "Work" shall mean the work of authorship, whether in Source or
36 | Object form, made available under the License, as indicated by a
37 | copyright notice that is included in or attached to the work
38 | (an example is provided in the Appendix below).
39 |
40 | "Derivative Works" shall mean any work, whether in Source or Object
41 | form, that is based on (or derived from) the Work and for which the
42 | editorial revisions, annotations, elaborations, or other modifications
43 | represent, as a whole, an original work of authorship. For the purposes
44 | of this License, Derivative Works shall not include works that remain
45 | separable from, or merely link (or bind by name) to the interfaces of,
46 | the Work and Derivative Works thereof.
47 |
48 | "Contribution" shall mean any work of authorship, including
49 | the original version of the Work and any modifications or additions
50 | to that Work or Derivative Works thereof, that is intentionally
51 | submitted to Licensor for inclusion in the Work by the copyright owner
52 | or by an individual or Legal Entity authorized to submit on behalf of
53 | the copyright owner. For the purposes of this definition, "submitted"
54 | means any form of electronic, verbal, or written communication sent
55 | to the Licensor or its representatives, including but not limited to
56 | communication on electronic mailing lists, source code control systems,
57 | and issue tracking systems that are managed by, or on behalf of, the
58 | Licensor for the purpose of discussing and improving the Work, but
59 | excluding communication that is conspicuously marked or otherwise
60 | designated in writing by the copyright owner as "Not a Contribution."
61 |
62 | "Contributor" shall mean Licensor and any individual or Legal Entity
63 | on behalf of whom a Contribution has been received by Licensor and
64 | subsequently incorporated within the Work.
65 |
66 | 2. Grant of Copyright License. Subject to the terms and conditions of
67 | this License, each Contributor hereby grants to You a perpetual,
68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69 | copyright license to reproduce, prepare Derivative Works of,
70 | publicly display, publicly perform, sublicense, and distribute the
71 | Work and such Derivative Works in Source or Object form.
72 |
73 | 3. Grant of Patent License. Subject to the terms and conditions of
74 | this License, each Contributor hereby grants to You a perpetual,
75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76 | (except as stated in this section) patent license to make, have made,
77 | use, offer to sell, sell, import, and otherwise transfer the Work,
78 | where such license applies only to those patent claims licensable
79 | by such Contributor that are necessarily infringed by their
80 | Contribution(s) alone or by combination of their Contribution(s)
81 | with the Work to which such Contribution(s) was submitted. If You
82 | institute patent litigation against any entity (including a
83 | cross-claim or counterclaim in a lawsuit) alleging that the Work
84 | or a Contribution incorporated within the Work constitutes direct
85 | or contributory patent infringement, then any patent licenses
86 | granted to You under this License for that Work shall terminate
87 | as of the date such litigation is filed.
88 |
89 | 4. Redistribution. You may reproduce and distribute copies of the
90 | Work or Derivative Works thereof in any medium, with or without
91 | modifications, and in Source or Object form, provided that You
92 | meet the following conditions:
93 |
94 | (a) You must give any other recipients of the Work or
95 | Derivative Works a copy of this License; and
96 |
97 | (b) You must cause any modified files to carry prominent notices
98 | stating that You changed the files; and
99 |
100 | (c) You must retain, in the Source form of any Derivative Works
101 | that You distribute, all copyright, patent, trademark, and
102 | attribution notices from the Source form of the Work,
103 | excluding those notices that do not pertain to any part of
104 | the Derivative Works; and
105 |
106 | (d) If the Work includes a "NOTICE" text file as part of its
107 | distribution, then any Derivative Works that You distribute must
108 | include a readable copy of the attribution notices contained
109 | within such NOTICE file, excluding those notices that do not
110 | pertain to any part of the Derivative Works, in at least one
111 | of the following places: within a NOTICE text file distributed
112 | as part of the Derivative Works; within the Source form or
113 | documentation, if provided along with the Derivative Works; or,
114 | within a display generated by the Derivative Works, if and
115 | wherever such third-party notices normally appear. The contents
116 | of the NOTICE file are for informational purposes only and
117 | do not modify the License. You may add Your own attribution
118 | notices within Derivative Works that You distribute, alongside
119 | or as an addendum to the NOTICE text from the Work, provided
120 | that such additional attribution notices cannot be construed
121 | as modifying the License.
122 |
123 | You may add Your own copyright statement to Your modifications and
124 | may provide additional or different license terms and conditions
125 | for use, reproduction, or distribution of Your modifications, or
126 | for any such Derivative Works as a whole, provided Your use,
127 | reproduction, and distribution of the Work otherwise complies with
128 | the conditions stated in this License.
129 |
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 | any Contribution intentionally submitted for inclusion in the Work
132 | by You to the Licensor shall be under the terms and conditions of
133 | this License, without any additional terms or conditions.
134 | Notwithstanding the above, nothing herein shall supersede or modify
135 | the terms of any separate license agreement you may have executed
136 | with Licensor regarding such Contributions.
137 |
138 | 6. Trademarks. This License does not grant permission to use the trade
139 | names, trademarks, service marks, or product names of the Licensor,
140 | except as required for reasonable and customary use in describing the
141 | origin of the Work and reproducing the content of the NOTICE file.
142 |
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 | agreed to in writing, Licensor provides the Work (and each
145 | Contributor provides its Contributions) on an "AS IS" BASIS,
146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 | implied, including, without limitation, any warranties or conditions
148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 | PARTICULAR PURPOSE. You are solely responsible for determining the
150 | appropriateness of using or redistributing the Work and assume any
151 | risks associated with Your exercise of permissions under this License.
152 |
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 | whether in tort (including negligence), contract, or otherwise,
155 | unless required by applicable law (such as deliberate and grossly
156 | negligent acts) or agreed to in writing, shall any Contributor be
157 | liable to You for damages, including any direct, indirect, special,
158 | incidental, or consequential damages of any character arising as a
159 | result of this License or out of the use or inability to use the
160 | Work (including but not limited to damages for loss of goodwill,
161 | work stoppage, computer failure or malfunction, or any and all
162 | other commercial damages or losses), even if such Contributor
163 | has been advised of the possibility of such damages.
164 |
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 | the Work or Derivative Works thereof, You may choose to offer,
167 | and charge a fee for, acceptance of support, warranty, indemnity,
168 | or other liability obligations and/or rights consistent with this
169 | License. However, in accepting such obligations, You may act only
170 | on Your own behalf and on Your sole responsibility, not on behalf
171 | of any other Contributor, and only if You agree to indemnify,
172 | defend, and hold each Contributor harmless for any liability
173 | incurred by, or claims asserted against, such Contributor by reason
174 | of your accepting any such warranty or additional liability.
175 |
176 | END OF TERMS AND CONDITIONS
177 |
178 | APPENDIX: How to apply the Apache License to your work.
179 |
180 | To apply the Apache License to your work, attach the following
181 | boilerplate notice, with the fields enclosed by brackets "{}"
182 | replaced with your own identifying information. (Don't include
183 | the brackets!) The text should be enclosed in the appropriate
184 | comment syntax for the file format. We also recommend that a
185 | file or class name and description of purpose be included on the
186 | same "printed page" as the copyright notice for easier
187 | identification within third-party archives.
188 |
189 | Copyright {yyyy} {name of copyright owner}
190 |
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 |
195 | http://www.apache.org/licenses/LICENSE-2.0
196 |
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.
202 |
--------------------------------------------------------------------------------
/ProxyInterface.md:
--------------------------------------------------------------------------------
1 | Gremlin is agnostic of the service proxy implementation. It only requires
2 | the proxy to be capable of injecting the three basic types of faults into
3 | API calls between microservices.
4 |
5 | There are several ways in which you could implement a service proxy. Some
6 | well known approaches include sidecar-based solutions such as Nginx managed
7 | by [Confd](https://github.com/kelseyhightower/confd),
8 | [Ambassador containers in Kubernetes](http://blog.kubernetes.io/2015/06/the-distributed-system-toolkit-patterns.html),
9 | [AirBnB SmartStack](https://github.com/airbnb/synapse),
10 | [Netflix Prana](https://github.com/Netflix/Prana/), library-based options
11 | such as[Netflix Ribbon](https://github.com/Netflix/Ribbon), or API gateway
12 | patterns such as Mashape's [Kong](https://github.com/Mashape/Kong), and
13 | cloud-hosted solutions such as
14 | [IBM Service Proxy](https://developer.ibm.com/bluemix/2016/04/13/service-proxy-to-balance-monitor-and-test-your-microservices/).
15 |
16 | The only requirement for Gremlin is that the service proxy should support
17 | fault injection, and should be programmable over a REST API. A reference
18 | implementation of a service proxy with fault injection support can be found
19 | at [gremlinproxy](https://github.com/ResilienceTesting/gremlinproxy).
20 |
21 | * For fault injection, a service proxy implementation needs to support 3
22 | key primitives: abort, delay, and mangle.
23 |
24 | * Any fault injection rule comprises of a set of regexes that match a
25 | request/response, and a combination of one or more of the failure
26 | primitives.
27 |
28 | * A service proxy should be able to receive the fault injection rules via a
29 | REST API and inject faults on requests matching the rule.
30 |
31 | * A failure scenario is comprised of a set of fault injection rules
32 | distributed across one or more service proxies that sit in front of
33 | microservices.
34 |
35 | The REST APIs that need to be implemented by any type of service proxy are
36 | given below.
37 |
38 | ```POST /gremlin/v1/rules/add```: add a Rule. Rule will be posted as a JSON. Format is as follows
39 |
40 | ```javascript
41 | {
42 | source: ,
43 | dest: ,
44 | messagetype:
45 | headerpattern:
46 | bodypattern:
47 | delayprobability:
48 | delaydistribution: probability distribution function
49 |
50 | mangleprobability:
51 | mangledistribution: probability distribution function
52 |
53 | abortprobability:
54 | abortdistribution: probability distribution function
55 |
56 | delaytime: latency to inject into requests
57 | errorcode: HTTP error code or -1 to reset TCP connection
58 | searchstring: string to replace when Mangle is enabled
59 | replacestring: string to replace with for Mangle fault
60 | }
61 | ```
62 |
63 | ```POST /gremlin/v1/rules/remove``` : remove the rule specified in the message body (see rule format above)
64 |
65 | ```DELETE /gremlin/v1/rules```: clear all rules
66 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | ## Gremlin - Systematic Resiliency Testing of Microservices
2 |
3 | Gremlin is a framework for **systematically** testing the failure recovery
4 | logic in microservices in a manner that is independent of the programming
5 | language and the business logic in the microservices. Gremlin takes
6 | advantage of the fact that microservices are loosely coupled and interact
7 | with each other solely over the network, using well defined API over
8 | standard protocols such as HTTP.
9 |
10 | Rather than actually crashing a service
11 | to create a failure, Gremlin intercepts the network interactions (for e.g.,
12 | REST API calls) between microservices and manipulates it to fake a failure
13 | to the caller.
14 |
15 | By observing from the network, how other microservices are
16 | reacting to this failure, it is now possible to express assertions on the
17 | behavior of the end-to-end application during the failure.
18 |
19 |
20 | ### How it works
21 |
22 | ![Gremlin Architecture][gremlin-arch]
23 | [gremlin-arch]: https://github.com/ResilienceTesting/gremlinsdk-python/raw/master/gremlin-testing-architecture.png "Architecture of Gremlin Resilience Testing Framework"
24 |
25 | Gremlin relies on the service proxy (a dependency injection pattern) to
26 | inject failures into the API calls between microservices. Gremlin expects
27 | the service proxy to expose a set of well-defined low-level fault injection
28 | primitives, namely _abort, delay, and mangle_. The logs from the service proxy are
29 | expected to be forwarded to a log store such as Elasticsearch.
30 |
31 | For convenience, this project includes a reference implementation of the service proxy called [gremlinproxy](https://github.com/ResilienceTesting/gremlinproxy). This is a standalone process that can be used as a sidecar process running alongside the
32 | microservice in the same container or VM.
33 |
34 |
35 | #### Failure Injection and Assertion
36 |
37 | **Recipes:** Using the SDK, you can build recipes - Python code that describes a dependency graph between
38 | microservices, a failure scenario that impacts one or more services and
39 | assertions on the behavior of other microservices in the system during the
40 | failure. Recipes are mostly independent of the application's business
41 | logic. It can be reused across different applications, as long as the
42 | dependency graph between microservices is the same.
43 |
44 | The Gremlin SDK provides a set of
45 | abstractions built on top of the three fault injection primitives to enable the
46 | user to design and execute a variety of failure scenarios. In
47 | addition, it provides a set of simple abstractions on top of a log store
48 | (Elasticsearch), from which behavioral assertions can be designed (e.g.,
49 | was latency of service A <= 100ms?).
50 |
51 |
52 | ### Example recipes
53 |
54 | Consider the example application shown in the picture above. Lets say we
55 | want to overload service C and validate whether the application as a whole
56 | recovers in an expected manner.
57 |
58 | First, lets check if microservice A responds to the user within 50ms.
59 |
60 | ```python
61 | #!/usr/bin/python
62 | from pygremlin import *
63 | import sys, requests, json
64 |
65 | #Load the dependency graph
66 | dependency_graph_json = sys.argv[1]
67 | with open(dependency_graph_json) as fp:
68 | app = json.load(fp)
69 | topology = ApplicationGraph(app)
70 |
71 | ##Setup failure
72 | fg = FailureGenerator(topology)
73 | ###push failure rules to service proxies
74 | fg.overload_service(source='B', dest='C', headerpattern="overload-req-*")
75 | ###start a new test
76 | testID = fg.start_new_test()
77 |
78 | ##Inject some load
79 | for i in range(1000):
80 | requests.get("http://foo.com/A",
81 | headers={"X-Gremlin-ID": "overload-req-%d" % i}
82 |
83 | ##Run assertions
84 | eventlog = AssertionChecker(elasticsearch_host, testID)
85 | result = eventlog.check_bounded_response_time(source='gateway', dest='A', max_latency='50ms')
86 | assert result.success
87 | ```
88 |
89 | Now, lets say A passes the test. In other words, A times out API calls to B
90 | in 50ms. This is great. We could *almost* say that this synthetic
91 | application can handle overload of microservice C.
92 |
93 | Out of curiosity, lets check how B reacts to C's overload. Ideally, B
94 | should have timed out on C, much faster than A times out on B. So, here
95 | goes a recipe to check if B times out by say 40ms. Since we have already
96 | conducted the test, we just need to add more assertions to the same recipe
97 | (lets assume that we know the test ID)
98 |
99 | ```python
100 | ##omitting boilerplate code
101 | ...
102 |
103 | ##Run assertions
104 | eventlog = AssertionChecker(elasticsearch_host, testID)
105 | resultB = eventlog.check_bounded_response_time(source='B', dest='C', max_latency='40ms')
106 | assert result.success
107 | ```
108 |
109 | What if B had a timeout of 100ms when calling C? This assertion would
110 | fail. This is not an unrealistic scenario. In fact, this is quite common in
111 | microservice-based applications, because each service is being developed by
112 | different developers/teams. A and B have conflicting failure recovery policies.
113 |
114 | ### [Getting started](https://github.com/ResilienceTesting/gremlinsdk-python/blob/master/exampleapp)
115 |
116 | The exampleapp folder contains a simple microservice application and a
117 | step-by-step tutorial that walks you through the process of using the
118 | recipes to conduct systematic resilience testing.
119 |
120 | #### Note: Gremlin SDK is independent of the service proxy
121 |
122 | Gremlin SDK is designed to be agnostic of the service proxy implementation, aslong as it supports the fundamental fault injection primitives (`abort`, `delay`, and `mangle`), and its logs are forwarded to a log store such as Elasticsearch. See the
123 | [proxy interface](https://github.com/ResilienceTesting/gremlinsdk-python/blob/master/ProxyInterface.md)
124 | document for details on the fault injection API that needs to be supported by a proxy implementation. With a little bit of porting, the Gremlin SDK can work equally well with fault injection proxies like
125 | [Toxiproxy](https://github.com/shopify/toxiproxy) or a simple nginx proxy with OpenResty support, where you can use Lua code to inject faults into upstream API calls.
126 |
--------------------------------------------------------------------------------
/Vagrantfile:
--------------------------------------------------------------------------------
1 | # -*- mode: ruby -*-
2 | # vi: set ft=ruby :
3 |
4 | Vagrant.configure(2) do |config|
5 | config.vm.box = "ubuntu/trusty64"
6 |
7 | # Demo port for productpage
8 | config.vm.network "forwarded_port", guest: 9080, host: 9180
9 | config.vm.network "forwarded_port", guest: 9081, host: 9181
10 | config.vm.network "forwarded_port", guest: 9082, host: 9182
11 | config.vm.network "forwarded_port", guest: 9083, host: 9183
12 |
13 | # service proxy for gateway
14 | config.vm.network "forwarded_port", guest: 9877, host: 9877
15 | # service proxy for productpage
16 | config.vm.network "forwarded_port", guest: 9876, host: 9876
17 | # Elasticsearch
18 | config.vm.network "forwarded_port", guest: 29200, host: 29200
19 |
20 | # Create a private network, which allows host-only access to the machine
21 | # using a specific IP.
22 | config.vm.network "private_network", ip: "192.168.33.10/24"
23 | # config.vm.network "public_network"
24 |
25 | # config.vm.synced_folder ".", "/home/vagrant/gsdk"
26 |
27 | # Install docker
28 | config.vm.provision :docker
29 |
30 | # Install docker-compose
31 | config.vm.provision "shell", inline: <<-EOC
32 | test -e /usr/local/bin/docker-compose || \\
33 | curl -sSL https://github.com/docker/compose/releases/download/1.5.1/docker-compose-`uname -s`-`uname -m` \\
34 | | sudo tee /usr/local/bin/docker-compose > /dev/null
35 | sudo chmod +x /usr/local/bin/docker-compose
36 | test -e /etc/bash_completion.d/docker-compose || \\
37 | curl -sSL https://raw.githubusercontent.com/docker/compose/$(docker-compose --version | awk 'NR==1{print $NF}')/contrib/completion/bash/docker-compose \\
38 | | sudo tee /etc/bash_completion.d/docker-compose > /dev/null
39 | sudo apt-get install python-setuptools
40 | EOC
41 |
42 | config.vm.provider "virtualbox" do |vb|
43 | vb.memory = "3072"
44 | vb.cpus = 2
45 | end
46 | end
47 |
--------------------------------------------------------------------------------
/exampleapp/.gitignore:
--------------------------------------------------------------------------------
1 | gremlinproxy
2 |
--------------------------------------------------------------------------------
/exampleapp/README.md:
--------------------------------------------------------------------------------
1 | # Getting started with Gremlin
2 |
3 | In this tutorial, we will use a simple application composed of 3
4 | microservices, to understand how to use Gremlin to systematically inject a
5 | failure and test whether the microservices behaved in the expected manner
6 | during the failure. Specifically, we will be validating if the
7 | microservices implemented
8 | [stability patterns](http://cdn.oreillystatic.com/en/assets/1/event/79/Stability%20Patterns%20Presentation.pdf)
9 | to handle the failure. When compared with simply injecting faults (killing
10 | VMs, containers or failing requests randomly), one of the main advantages
11 | of this systematic approach is that it gives the tester a good idea of
12 | where things might be going wrong. He/She could then quickly fix the
13 | service, rebuild, redeploy, and test again.
14 |
15 | This example, while contrived, serves to illustrate the benefits of
16 | systematically testing your microservices application for failure recovery
17 | instead of randomly injecting failures without any useful validation.
18 |
19 | ## Setup on a local machine
20 |
21 | #### Pre-requisites
22 | * Docker and docker-compose
23 | * REST client (curl, Chrome + Postman, etc.)
24 |
25 | This tutorial will assume that you are using Chome + Postman to make
26 | REST API calls to our sample application
27 |
28 | * Setup Gremlin Python SDK
29 |
30 | ```bash
31 | vagrant@vagrant-ubuntu-trusty-64:~$ git clone https://github.com/ResilienceTesting/gremlinsdk-python
32 | vagrant@vagrant-ubuntu-trusty-64:~$ cd gremlinsdk-python/python
33 | vagrant@vagrant-ubuntu-trusty-64:~gremlinsdk-python/python $ sudo python setup.py install
34 | ```
35 |
36 | * Setup the simple microservice application
37 |
38 | 
39 |
40 | For trying out some of the recipes, we will be using a simple bookinfo
41 | application made of three microservices and an API gateway service
42 | (_gateway_) facing the user. The API gateway calls the _productpage_
43 | microservice, which in turn relies on _details_ microservice for ISBN
44 | info and the _reviews_ microservice for editorial reviews. The SDK
45 | contains all the code necessary to build out the Docker containers
46 | pertaining to each microservice. The application is written using
47 | Python's Flask framework.
48 |
49 | Lets first build the Docker images for each microservice.
50 |
51 | ```bash
52 | cd gremlinsdk-python/exampleapp; ./build-apps.sh
53 | ```
54 |
55 | The Docker images for the API _gateway_ and the _productpage_ service have
56 | the _gremlinproxy_ embedded inside them as a sidecar process. The
57 | microservices are connected to each other using Docker links. The
58 | entire application can be launched using ```docker-compose```. In real
59 | world, the microservices would be registering themselves with a service
60 | registry (e.g., Consul, Etcd, Zookeeper, etc.) and using a service
61 | proxy (i.e., dependency injection pattern), to dynamically discover the
62 | locations of other services and invoke their APIs. The _gremlinproxy_
63 | provided in this example is a simple reference implementation of a
64 | service proxy that relies on a static configuration file to indicate
65 | the location of other microservices.
66 |
67 | In addition to the 4 microservices for the Bookinfo app, there is a
68 | Logstash forwarder and an Elasticsearch container. Event logs from the
69 | Gremlin proxies are forwarded by the Logstash forwarder to the
70 | Elasticsearch server. The Gremlin SDK queries this Elasticsearch server
71 | during the behavior validation phase.
72 |
73 | ---
74 |
75 | ## Resilience testing: Checking for timeouts
76 |
77 | #### Step 1: Bring up the application and services
78 |
79 | ```bash
80 | cd gremlinsdk-python/exampleapp; ./runapps.sh
81 | ```
82 |
83 | Open Postman and access the URL http://localhost:9080/productpage to make sure the page is up.
84 |
85 |
86 | #### Step 2: Gremlin recipe - setting up failures
87 |
88 | Lets run a very simple Gremlin recipe that fakes the overload of the
89 | _reviews_ service (without needing to crash the service) and checks if the
90 | _productpage_ service handles this scenario using the timeout pattern. The
91 | figure below illustrates the failure scenario and shows both the expected
92 | and the unexpected behavior of the application. As noted earlier, this is a
93 | very contrived example meant for the purpose of illustration. In real
94 | world, you would be using a circuit breaker pattern to recover from such
95 | failures.
96 |
97 | 
98 |
99 | While it is possible to express Gremlin recipes purely in Python code, for
100 | the purpose of this tutorial, we will be using a simple generic test
101 | harness (```gremlinsdk-python/exampleapp/recipes/run_recipe_json.py```) that takes as input
102 | three JSON files: the application's dependency graph, the failure scenario
103 | and the assertions. You will find the following three JSON files in the
104 | ```gremlinsdk-python/exampleapp/recipes``` folder:
105 |
106 | + ```topology.json``` describes the applicaton topology for the bookinfo application that we setup earlier.
107 | + ```gremlins.json``` describes the failure scenario, wherein the
108 | _reviews_ service is overloaded. A symptom of this scenario is extremely
109 | delayed responses from the _reviews_ service. In our case, responses will
110 | be delayed by 8 seconds.
111 |
112 | **Scoping failures to synthetic users:** As we are doing this test in
113 | production, we don't want to affect real users with our failure
114 | tests. So lets restrict the failures to a set of synthetic requests. We
115 | distinguish synthetic requests using a special HTTP header
116 | ```X-Gremlin-ID```. Only requests carrying this header will be subjected
117 | to fault injection. Since multiple tests could be running
118 | simultaneously, we distinguish our test using a specific header value,
119 | ```testUser-timeout-*```. So any request from _productpage_ to
120 | _reviews_ that contains the HTTP header ```X-Gremlin-ID:
121 | testUser-timeout-``` will be subjected to the overload failure
122 | described in this JSON file.
123 |
124 | + ```checklist.json``` describes the list of behaviors we want to validate
125 | during such a scenario. In our case, we will check if the _productpage_
126 | service times out its API call to _reviews_ service and responds to the
127 | _gateway_ service within 100ms. This behavior is termed as
128 | _bounded\_response\_time_ in the ```checklist.json``` file.
129 |
130 | Lets run the recipe.
131 |
132 | ```bash
133 | cd gremlinsdk-python/exampleapp/recipes; ./run_recipe_json.py topology.json gremlins.json checklist.json
134 | ```
135 |
136 | You should see the following output:
137 |
138 | ```
139 | Use postman to inject test requests,
140 | with HTTP header X-Gremlin-ID:
141 | press Enter key to continue to validation phase
142 | ```
143 |
144 | *Note*: Realistically, load injection would be performed as part of the test
145 | script. However, for the purposes of this tutorial, lets manually inject
146 | the load into the application so that we can visually observe the impact of
147 | fault injection and failure handling.
148 |
149 | #### Step 3: Load injection
150 |
151 | Go back to Postman. Add ```X-Gremlin-ID``` to the header field and set
152 | ```testUser-timeout-1``` as the value for the header.
153 |
154 | Load the page (http://localhost:9080/productpage) and you should see that
155 | the page takes *more than 8 seconds to load*.
156 |
157 | This page load is an _example of poor handling of the failure
158 | scenario_. The _reviews_ service was overloaded. It took a long time to
159 | respond. _productpage_ service that was dependent on the _reviews_
160 | service, did not timeout its API call.
161 |
162 | Now, disable the header field in Postman and reload the page. You should
163 | see that the _web page loads in less than 100ms without
164 | ```X-Gremlin-ID```_. In other words, normal traffic remains unaffected,
165 | while only "tagged" test traffic carrying the X-Gremlin-ID header is
166 | subjected to failure injection.
167 |
168 | #### Step 4: Continuing recipe execution - behavior validation
169 |
170 | Go back to console and complete the recipe execution, i.e., run the
171 | behavior validation step.
172 |
173 | ```
174 | Hit the enter key on the console
175 | ```
176 |
177 | The validation code parses the log entries from gremlin service proxies to
178 | check if the _productpage_ service loaded in less than 100ms for requests
179 | containing ```X-Gremlin-ID```. You should see the following output on the
180 | console:
181 |
182 | ```
183 | Check bounded_response_rime productpage FAIL
184 | ```
185 |
186 | #### Step 5: Fix the microservice and redeploy
187 |
188 | Lets fix the buggy _productpage_ service, rebuild and redeploy. We will add
189 | a 100ms timeout to API calls made to the _reviews_ service.
190 |
191 | ```bash
192 | cd gremlinsdk-python/exampleapp
193 | ```
194 |
195 | Open productpage/productpage.py in your favorite editor. Go to the getReview() function.
196 |
197 | ```python
198 | def getReviews(headers):
199 | ##timeout is set to 10 milliseconds
200 | try:
201 | res = requests.get(reviews['url'], headers=headers) #, timeout=0.010)
202 | except:
203 | res = None
204 |
205 | if res and res.status_code == 200:
206 | return res.text
207 | else:
208 | return """
Sorry, product reviews are currently unavailable for this book.
"""
209 | ```
210 |
211 | Uncomment the part related to
212 | ```python
213 | #timeout=0.010
214 | ```
215 | and integrate it into the get API call like below:
216 |
217 | ```python
218 | res = requests.get(reviews['url'], headers=headers, timeout=0.010)
219 | ```
220 |
221 | Save and close the file.
222 |
223 | Rebuild the app.
224 |
225 | ```bash
226 | cd gremlinsdk-python/exampleapp; ./rebuild-productpage.sh
227 | ```
228 |
229 | Redeploy the app.
230 |
231 | ```bash
232 | cd gremlinsdk-python/exampleapp; ./killall.sh;./runall.sh
233 | ```
234 |
235 | #### Step 6: Test again
236 |
237 | Lets rerun the previous gremlin recipe to check if the product page service
238 | passes the test criterion. Repeat steps 2, 3 and 4. This time, even if
239 | ```X-Gremlin-ID``` is present, the product page loads in less than 100ms,
240 | and you should see
241 |
242 | ```
243 | Sorry reviews are currently unavailable
244 | ```
245 |
246 | You should also see the following console output during the behavior
247 | validation phase:
248 |
249 | ```
250 | Check bounded_response_time productpage PASS
251 | ```
252 |
253 | FYI: If you want to re-run the demo, you should revert the application to its
254 | old setup and rebuild the docker containers. The ```undochanges.sh```
255 | helper script automates all of these tasks.
256 |
257 | ```bash
258 | cd gremlinsdk-python/exampleapp; ./undochanges.sh
259 | ```
260 |
261 | ---
262 |
263 | ## Takeaways
264 |
265 | What we did above was to test an app for failure recovery, debugged it,
266 | fixed the issue, redeployed and tested again to ensure that the bug has
267 | been fixed properly. You could imagine automating the entire testing
268 | process above and integrating it into your build pipeline, so that you can
269 | run failure recovery tests just like your unit and integration tests.
270 |
--------------------------------------------------------------------------------
/exampleapp/bookinfoapp-failure.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ResilienceTesting/gremlinsdk-python/c5cc439ea1c0d6a98ff88f5604bf739f3c48d1e6/exampleapp/bookinfoapp-failure.png
--------------------------------------------------------------------------------
/exampleapp/bookinfoapp.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ResilienceTesting/gremlinsdk-python/c5cc439ea1c0d6a98ff88f5604bf739f3c48d1e6/exampleapp/bookinfoapp.png
--------------------------------------------------------------------------------
/exampleapp/build-apps.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | #gunicorn -D -w 1 -b 0.0.0.0:10081 --reload details:app
4 | #gunicorn -D -w 1 -b 0.0.0.0:10082 --reload reviews:app
5 | #gunicorn -w 1 -b 0.0.0.0:19080 --reload --access-logfile prod.log --error-logfile prod.log productpage:app >>prod.log 2>&1 &
6 |
7 | set -o errexit
8 | ##build the gremlinproxy
9 | docker run -v "$PWD":"/go/bin" -it golang:1.6 go get -u github.com/ResilienceTesting/gremlinproxy
10 | cp gremlinproxy gateway/
11 | cp gremlinproxy productpage/
12 | rm gremlinproxy
13 | pushd productpage
14 | docker build -t productpage .
15 | popd
16 |
17 | pushd details
18 | docker build -t details .
19 | popd
20 |
21 | pushd reviews
22 | docker build -t reviews .
23 | popd
24 |
25 | pushd gateway
26 | docker build -t gateway .
27 | popd
28 |
29 |
--------------------------------------------------------------------------------
/exampleapp/compose-app.yml:
--------------------------------------------------------------------------------
1 | es:
2 | image: elasticsearch:1.7
3 | command: elasticsearch -Des.index.analysis.analyzer.default.type=keyword
4 | ports:
5 | - "29200:9200"
6 | - "29300:9300"
7 | logstore.mybookstore.io:
8 | image: logstash
9 | command: logstash -e " input {udp {codec=>json port=>8092}} output {elasticsearch {hosts=>es index=>gremlin}} "
10 | ports:
11 | - "8092:8092/udp"
12 | links:
13 | - es
14 | details.mybookstore.io:
15 | image: details
16 | reviews.mybookstore.io:
17 | image: reviews
18 | productpage.mybookstore.io:
19 | image: productpage
20 | ports:
21 | - "9876:9876"
22 | links:
23 | - logstore.mybookstore.io
24 | - details.mybookstore.io
25 | - reviews.mybookstore.io
26 |
27 | gateway.mybookstore.io:
28 | image: gateway
29 | ports:
30 | - "9877:9876"
31 | - "9080:9080"
32 | links:
33 | - logstore.mybookstore.io
34 | - productpage.mybookstore.io
35 |
--------------------------------------------------------------------------------
/exampleapp/details/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM ubuntu:14.04
2 |
3 | RUN apt-get update && apt-get -y upgrade
4 | RUN apt-get install python-pip python-dev -y
5 | RUN pip install flask flask_json json2html simplejson gevent
6 |
7 | RUN apt-get install -y supervisor
8 | RUN mkdir -p /var/log/supervisor
9 |
10 | ADD login.defs /etc/login.defs
11 |
12 | RUN mkdir -p /opt/microservices
13 | ADD start_all.sh /opt/microservices/start_all.sh
14 | RUN chmod a+x /opt/microservices/start_all.sh
15 |
16 | ADD templates /opt/microservices/templates
17 | ADD details-supervisor.conf /etc/supervisor/conf.d/details.conf
18 | ADD details.py /opt/microservices/details.py
19 |
20 | EXPOSE 9080
21 | WORKDIR /opt/microservices
22 |
23 | #ADD supervisord.conf /etc/supervisor/conf.d/supervisord.conf
24 |
25 | CMD ["/opt/microservices/start_all.sh"]
26 |
--------------------------------------------------------------------------------
/exampleapp/details/details-supervisor.conf:
--------------------------------------------------------------------------------
1 | [program:details]
2 | command=python /opt/microservices/details.py 9080
3 | directory=/opt/microservices
4 | stdout_logfile=/dev/stdout
5 | stdout_logfile_maxbytes=0
6 | stderr_logfile=/dev/stdout
7 | stderr_logfile_maxbytes=0
8 |
--------------------------------------------------------------------------------
/exampleapp/details/details.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python
2 | from flask import Flask, request
3 | import simplejson as json
4 | import requests
5 | import sys
6 | from json2html import *
7 |
8 | app = Flask(__name__)
9 |
10 | details_resp="""
11 |
Product Details
12 |
13 |
Paperback:
200 pages
14 |
Publisher:
O'Reilly Media; 1 edition (March 25, 2015)
15 |
Language:
English
16 |
ISBN-10:
1491914254
17 |
ISBN-13:
978-1491914250
18 |
19 | """
20 |
21 | @app.route('/details')
22 | def bookDetails():
23 | global details_resp
24 | return details_resp
25 |
26 | @app.route('/')
27 | def index():
28 | """ Display frontpage with normal user and test user buttons"""
29 |
30 | top = """
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 | Book details service
51 |
52 |
Hello! This is the book details service. My content is
53 |
%s
54 |
55 |
56 | """ % (details_resp)
57 | return top
58 |
59 | if __name__ == '__main__':
60 | # To run the server, type-in $ python server.py
61 | if len(sys.argv) < 1:
62 | print "usage: %s port" % (sys.argv[0])
63 | sys.exit(-1)
64 |
65 | p = int(sys.argv[1])
66 | app.run(host='0.0.0.0', port=p, debug = False)
67 |
--------------------------------------------------------------------------------
/exampleapp/details/login.defs:
--------------------------------------------------------------------------------
1 | #
2 | # /etc/login.defs - Configuration control definitions for the login package.
3 | #
4 | # Three items must be defined: MAIL_DIR, ENV_SUPATH, and ENV_PATH.
5 | # If unspecified, some arbitrary (and possibly incorrect) value will
6 | # be assumed. All other items are optional - if not specified then
7 | # the described action or option will be inhibited.
8 | #
9 | # Comment lines (lines beginning with "#") and blank lines are ignored.
10 | #
11 | # Modified for Linux. --marekm
12 |
13 | # REQUIRED for useradd/userdel/usermod
14 | # Directory where mailboxes reside, _or_ name of file, relative to the
15 | # home directory. If you _do_ define MAIL_DIR and MAIL_FILE,
16 | # MAIL_DIR takes precedence.
17 | #
18 | # Essentially:
19 | # - MAIL_DIR defines the location of users mail spool files
20 | # (for mbox use) by appending the username to MAIL_DIR as defined
21 | # below.
22 | # - MAIL_FILE defines the location of the users mail spool files as the
23 | # fully-qualified filename obtained by prepending the user home
24 | # directory before $MAIL_FILE
25 | #
26 | # NOTE: This is no more used for setting up users MAIL environment variable
27 | # which is, starting from shadow 4.0.12-1 in Debian, entirely the
28 | # job of the pam_mail PAM modules
29 | # See default PAM configuration files provided for
30 | # login, su, etc.
31 | #
32 | # This is a temporary situation: setting these variables will soon
33 | # move to /etc/default/useradd and the variables will then be
34 | # no more supported
35 | MAIL_DIR /var/mail
36 | #MAIL_FILE .mail
37 |
38 | #
39 | # Enable logging and display of /var/log/faillog login failure info.
40 | # This option conflicts with the pam_tally PAM module.
41 | #
42 | FAILLOG_ENAB yes
43 |
44 | #
45 | # Enable display of unknown usernames when login failures are recorded.
46 | #
47 | # WARNING: Unknown usernames may become world readable.
48 | # See #290803 and #298773 for details about how this could become a security
49 | # concern
50 | LOG_UNKFAIL_ENAB no
51 |
52 | #
53 | # Enable logging of successful logins
54 | #
55 | LOG_OK_LOGINS no
56 |
57 | #
58 | # Enable "syslog" logging of su activity - in addition to sulog file logging.
59 | # SYSLOG_SG_ENAB does the same for newgrp and sg.
60 | #
61 | SYSLOG_SU_ENAB yes
62 | SYSLOG_SG_ENAB yes
63 |
64 | #
65 | # If defined, all su activity is logged to this file.
66 | #
67 | #SULOG_FILE /var/log/sulog
68 |
69 | #
70 | # If defined, file which maps tty line to TERM environment parameter.
71 | # Each line of the file is in a format something like "vt100 tty01".
72 | #
73 | #TTYTYPE_FILE /etc/ttytype
74 |
75 | #
76 | # If defined, login failures will be logged here in a utmp format
77 | # last, when invoked as lastb, will read /var/log/btmp, so...
78 | #
79 | FTMP_FILE /var/log/btmp
80 |
81 | #
82 | # If defined, the command name to display when running "su -". For
83 | # example, if this is defined as "su" then a "ps" will display the
84 | # command is "-su". If not defined, then "ps" would display the
85 | # name of the shell actually being run, e.g. something like "-sh".
86 | #
87 | SU_NAME su
88 |
89 | #
90 | # If defined, file which inhibits all the usual chatter during the login
91 | # sequence. If a full pathname, then hushed mode will be enabled if the
92 | # user's name or shell are found in the file. If not a full pathname, then
93 | # hushed mode will be enabled if the file exists in the user's home directory.
94 | #
95 | HUSHLOGIN_FILE .hushlogin
96 | #HUSHLOGIN_FILE /etc/hushlogins
97 |
98 | #
99 | # *REQUIRED* The default PATH settings, for superuser and normal users.
100 | #
101 | # (they are minimal, add the rest in the shell startup files)
102 | ENV_SUPATH PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
103 | ENV_PATH PATH=/usr/local/bin:/usr/bin:/bin:/usr/local/games:/usr/games
104 |
105 | #
106 | # Terminal permissions
107 | #
108 | # TTYGROUP Login tty will be assigned this group ownership.
109 | # TTYPERM Login tty will be set to this permission.
110 | #
111 | # If you have a "write" program which is "setgid" to a special group
112 | # which owns the terminals, define TTYGROUP to the group number and
113 | # TTYPERM to 0620. Otherwise leave TTYGROUP commented out and assign
114 | # TTYPERM to either 622 or 600.
115 | #
116 | # In Debian /usr/bin/bsd-write or similar programs are setgid tty
117 | # However, the default and recommended value for TTYPERM is still 0600
118 | # to not allow anyone to write to anyone else console or terminal
119 |
120 | # Users can still allow other people to write them by issuing
121 | # the "mesg y" command.
122 |
123 | TTYGROUP tty
124 | TTYPERM 0600
125 |
126 | #
127 | # Login configuration initializations:
128 | #
129 | # ERASECHAR Terminal ERASE character ('\010' = backspace).
130 | # KILLCHAR Terminal KILL character ('\025' = CTRL/U).
131 | # UMASK Default "umask" value.
132 | #
133 | # The ERASECHAR and KILLCHAR are used only on System V machines.
134 | #
135 | # UMASK is the default umask value for pam_umask and is used by
136 | # useradd and newusers to set the mode of the new home directories.
137 | # 022 is the "historical" value in Debian for UMASK
138 | # 027, or even 077, could be considered better for privacy
139 | # There is no One True Answer here : each sysadmin must make up his/her
140 | # mind.
141 | #
142 | # If USERGROUPS_ENAB is set to "yes", that will modify this UMASK default value
143 | # for private user groups, i. e. the uid is the same as gid, and username is
144 | # the same as the primary group name: for these, the user permissions will be
145 | # used as group permissions, e. g. 022 will become 002.
146 | #
147 | # Prefix these values with "0" to get octal, "0x" to get hexadecimal.
148 | #
149 | ERASECHAR 0177
150 | KILLCHAR 025
151 | UMASK 022
152 |
153 | #
154 | # Password aging controls:
155 | #
156 | # PASS_MAX_DAYS Maximum number of days a password may be used.
157 | # PASS_MIN_DAYS Minimum number of days allowed between password changes.
158 | # PASS_WARN_AGE Number of days warning given before a password expires.
159 | #
160 | PASS_MAX_DAYS 90
161 | PASS_MIN_DAYS 0
162 | PASS_WARN_AGE 7
163 |
164 | #
165 | # Min/max values for automatic uid selection in useradd
166 | #
167 | UID_MIN 1000
168 | UID_MAX 60000
169 | # System accounts
170 | #SYS_UID_MIN 100
171 | #SYS_UID_MAX 999
172 |
173 | #
174 | # Min/max values for automatic gid selection in groupadd
175 | #
176 | GID_MIN 1000
177 | GID_MAX 60000
178 | # System accounts
179 | #SYS_GID_MIN 100
180 | #SYS_GID_MAX 999
181 |
182 | #
183 | # Max number of login retries if password is bad. This will most likely be
184 | # overriden by PAM, since the default pam_unix module has it's own built
185 | # in of 3 retries. However, this is a safe fallback in case you are using
186 | # an authentication module that does not enforce PAM_MAXTRIES.
187 | #
188 | LOGIN_RETRIES 5
189 |
190 | #
191 | # Max time in seconds for login
192 | #
193 | LOGIN_TIMEOUT 60
194 |
195 | #
196 | # Which fields may be changed by regular users using chfn - use
197 | # any combination of letters "frwh" (full name, room number, work
198 | # phone, home phone). If not defined, no changes are allowed.
199 | # For backward compatibility, "yes" = "rwh" and "no" = "frwh".
200 | #
201 | CHFN_RESTRICT rwh
202 |
203 | #
204 | # Should login be allowed if we can't cd to the home directory?
205 | # Default in no.
206 | #
207 | DEFAULT_HOME yes
208 |
209 | #
210 | # If defined, this command is run when removing a user.
211 | # It should remove any at/cron/print jobs etc. owned by
212 | # the user to be removed (passed as the first argument).
213 | #
214 | #USERDEL_CMD /usr/sbin/userdel_local
215 |
216 | #
217 | # Enable setting of the umask group bits to be the same as owner bits
218 | # (examples: 022 -> 002, 077 -> 007) for non-root users, if the uid is
219 | # the same as gid, and username is the same as the primary group name.
220 | #
221 | # If set to yes, userdel will remove the user´s group if it contains no
222 | # more members, and useradd will create by default a group with the name
223 | # of the user.
224 | #
225 | USERGROUPS_ENAB yes
226 |
227 | #
228 | # Instead of the real user shell, the program specified by this parameter
229 | # will be launched, although its visible name (argv[0]) will be the shell's.
230 | # The program may do whatever it wants (logging, additional authentification,
231 | # banner, ...) before running the actual shell.
232 | #
233 | # FAKE_SHELL /bin/fakeshell
234 |
235 | #
236 | # If defined, either full pathname of a file containing device names or
237 | # a ":" delimited list of device names. Root logins will be allowed only
238 | # upon these devices.
239 | #
240 | # This variable is used by login and su.
241 | #
242 | #CONSOLE /etc/consoles
243 | #CONSOLE console:tty01:tty02:tty03:tty04
244 |
245 | #
246 | # List of groups to add to the user's supplementary group set
247 | # when logging in on the console (as determined by the CONSOLE
248 | # setting). Default is none.
249 | #
250 | # Use with caution - it is possible for users to gain permanent
251 | # access to these groups, even when not logged in on the console.
252 | # How to do it is left as an exercise for the reader...
253 | #
254 | # This variable is used by login and su.
255 | #
256 | #CONSOLE_GROUPS floppy:audio:cdrom
257 |
258 | #
259 | # If set to "yes", new passwords will be encrypted using the MD5-based
260 | # algorithm compatible with the one used by recent releases of FreeBSD.
261 | # It supports passwords of unlimited length and longer salt strings.
262 | # Set to "no" if you need to copy encrypted passwords to other systems
263 | # which don't understand the new algorithm. Default is "no".
264 | #
265 | # This variable is deprecated. You should use ENCRYPT_METHOD.
266 | #
267 | #MD5_CRYPT_ENAB no
268 |
269 | #
270 | # If set to MD5 , MD5-based algorithm will be used for encrypting password
271 | # If set to SHA256, SHA256-based algorithm will be used for encrypting password
272 | # If set to SHA512, SHA512-based algorithm will be used for encrypting password
273 | # If set to DES, DES-based algorithm will be used for encrypting password (default)
274 | # Overrides the MD5_CRYPT_ENAB option
275 | #
276 | # Note: It is recommended to use a value consistent with
277 | # the PAM modules configuration.
278 | #
279 | ENCRYPT_METHOD SHA512
280 |
281 | #
282 | # Only used if ENCRYPT_METHOD is set to SHA256 or SHA512.
283 | #
284 | # Define the number of SHA rounds.
285 | # With a lot of rounds, it is more difficult to brute forcing the password.
286 | # But note also that it more CPU resources will be needed to authenticate
287 | # users.
288 | #
289 | # If not specified, the libc will choose the default number of rounds (5000).
290 | # The values must be inside the 1000-999999999 range.
291 | # If only one of the MIN or MAX values is set, then this value will be used.
292 | # If MIN > MAX, the highest value will be used.
293 | #
294 | # SHA_CRYPT_MIN_ROUNDS 5000
295 | # SHA_CRYPT_MAX_ROUNDS 5000
296 |
297 | ################# OBSOLETED BY PAM ##############
298 | # #
299 | # These options are now handled by PAM. Please #
300 | # edit the appropriate file in /etc/pam.d/ to #
301 | # enable the equivelants of them.
302 | #
303 | ###############
304 |
305 | #MOTD_FILE
306 | #DIALUPS_CHECK_ENAB
307 | #LASTLOG_ENAB
308 | #MAIL_CHECK_ENAB
309 | #OBSCURE_CHECKS_ENAB
310 | #PORTTIME_CHECKS_ENAB
311 | #SU_WHEEL_ONLY
312 | #CRACKLIB_DICTPATH
313 | #PASS_CHANGE_TRIES
314 | #PASS_ALWAYS_WARN
315 | #ENVIRON_FILE
316 | #NOLOGINS_FILE
317 | #ISSUE_FILE
318 | PASS_MIN_LEN=8
319 | #PASS_MAX_LEN
320 | #ULIMIT
321 | #ENV_HZ
322 | #CHFN_AUTH
323 | #CHSH_AUTH
324 | #FAIL_DELAY
325 |
326 | ################# OBSOLETED #######################
327 | # #
328 | # These options are no more handled by shadow. #
329 | # #
330 | # Shadow utilities will display a warning if they #
331 | # still appear. #
332 | # #
333 | ###################################################
334 |
335 | # CLOSE_SESSIONS
336 | # LOGIN_STRING
337 | # NO_PASSWORD_CONSOLE
338 | # QMAIL_DIR
339 |
340 |
341 |
342 |
--------------------------------------------------------------------------------
/exampleapp/details/start_all.sh:
--------------------------------------------------------------------------------
1 | #! /bin/bash
2 | echo "******Start Service******"
3 | /usr/bin/supervisord -n
4 |
5 |
--------------------------------------------------------------------------------
/exampleapp/details/templates/example.html:
--------------------------------------------------------------------------------
1 | {% extends "bootstrap/base.html" %}
2 | {% block title %}This is an example page{% endblock %}
3 |
4 | {% block navbar %}
5 |
This is a series of comic albums created by the Belgian cartoonist Georges Remi,
30 | who wrote under the pen name Herge. The series was one of the most popular
31 | European comics of the 20th century.
This is a series of comic albums created by the Belgian cartoonist Georges Remi,
27 | who wrote under the pen name Herge. The series was one of the most popular
28 | European comics of the 20th century.
29 |
30 |
31 |
32 |
{{bookdetails}}
33 |
{{bookreviews}}
34 |
35 |
36 |
37 |
38 |
--------------------------------------------------------------------------------
/exampleapp/gateway/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM ubuntu:14.04
2 |
3 | RUN apt-get update && apt-get -y upgrade
4 | RUN apt-get install -y supervisor
5 | RUN mkdir -p /var/log/supervisor
6 |
7 | ADD login.defs /etc/login.defs
8 |
9 | RUN mkdir -p /opt/microservices
10 | ADD start_all.sh /opt/microservices/start_all.sh
11 | RUN chmod a+x /opt/microservices/start_all.sh
12 |
13 | ADD gremlinproxy /opt/microservices/gremlinproxy
14 | ADD gatewayconfig.json /opt/microservices/gatewayconfig.json
15 | ADD gremlingateway-supervisor.conf /etc/supervisor/conf.d/gremlingateway.conf
16 |
17 | EXPOSE 9080 9876
18 | WORKDIR /opt/microservices
19 |
20 |
21 | CMD ["/opt/microservices/start_all.sh"]
22 |
--------------------------------------------------------------------------------
/exampleapp/gateway/gatewayconfig.json:
--------------------------------------------------------------------------------
1 | {
2 | "services": [
3 | {
4 | "name": "productpage",
5 | "proxy": {
6 | "bindhost" : "0.0.0.0",
7 | "port": 9080,
8 | "protocol": "http"
9 | },
10 | "loadbalancer": {
11 | "hosts": [
12 | "productpage.mybookstore.io:9080"
13 | ],
14 | "mode": "roundrobin"
15 | }
16 | }
17 | ],
18 | "router": {
19 | "name": "gateway",
20 | "port": 9876,
21 | "trackingheader": "X-Gremlin-ID"
22 | },
23 | "loglevel" : "debug",
24 | "logjson": true,
25 | "logstash": "logstore.mybookstore.io:8092"
26 | }
27 |
--------------------------------------------------------------------------------
/exampleapp/gateway/gremlingateway-supervisor.conf:
--------------------------------------------------------------------------------
1 | [program:gremlingateway]
2 | command=/opt/microservices/gremlinproxy -c gatewayconfig.json
3 | directory=/opt/microservices
4 | stdout_logfile=/dev/stdout
5 | stdout_logfile_maxbytes=0
6 | stderr_logfile=/dev/stdout
7 | stderr_logfile_maxbytes=0
8 |
--------------------------------------------------------------------------------
/exampleapp/gateway/login.defs:
--------------------------------------------------------------------------------
1 | #
2 | # /etc/login.defs - Configuration control definitions for the login package.
3 | #
4 | # Three items must be defined: MAIL_DIR, ENV_SUPATH, and ENV_PATH.
5 | # If unspecified, some arbitrary (and possibly incorrect) value will
6 | # be assumed. All other items are optional - if not specified then
7 | # the described action or option will be inhibited.
8 | #
9 | # Comment lines (lines beginning with "#") and blank lines are ignored.
10 | #
11 | # Modified for Linux. --marekm
12 |
13 | # REQUIRED for useradd/userdel/usermod
14 | # Directory where mailboxes reside, _or_ name of file, relative to the
15 | # home directory. If you _do_ define MAIL_DIR and MAIL_FILE,
16 | # MAIL_DIR takes precedence.
17 | #
18 | # Essentially:
19 | # - MAIL_DIR defines the location of users mail spool files
20 | # (for mbox use) by appending the username to MAIL_DIR as defined
21 | # below.
22 | # - MAIL_FILE defines the location of the users mail spool files as the
23 | # fully-qualified filename obtained by prepending the user home
24 | # directory before $MAIL_FILE
25 | #
26 | # NOTE: This is no more used for setting up users MAIL environment variable
27 | # which is, starting from shadow 4.0.12-1 in Debian, entirely the
28 | # job of the pam_mail PAM modules
29 | # See default PAM configuration files provided for
30 | # login, su, etc.
31 | #
32 | # This is a temporary situation: setting these variables will soon
33 | # move to /etc/default/useradd and the variables will then be
34 | # no more supported
35 | MAIL_DIR /var/mail
36 | #MAIL_FILE .mail
37 |
38 | #
39 | # Enable logging and display of /var/log/faillog login failure info.
40 | # This option conflicts with the pam_tally PAM module.
41 | #
42 | FAILLOG_ENAB yes
43 |
44 | #
45 | # Enable display of unknown usernames when login failures are recorded.
46 | #
47 | # WARNING: Unknown usernames may become world readable.
48 | # See #290803 and #298773 for details about how this could become a security
49 | # concern
50 | LOG_UNKFAIL_ENAB no
51 |
52 | #
53 | # Enable logging of successful logins
54 | #
55 | LOG_OK_LOGINS no
56 |
57 | #
58 | # Enable "syslog" logging of su activity - in addition to sulog file logging.
59 | # SYSLOG_SG_ENAB does the same for newgrp and sg.
60 | #
61 | SYSLOG_SU_ENAB yes
62 | SYSLOG_SG_ENAB yes
63 |
64 | #
65 | # If defined, all su activity is logged to this file.
66 | #
67 | #SULOG_FILE /var/log/sulog
68 |
69 | #
70 | # If defined, file which maps tty line to TERM environment parameter.
71 | # Each line of the file is in a format something like "vt100 tty01".
72 | #
73 | #TTYTYPE_FILE /etc/ttytype
74 |
75 | #
76 | # If defined, login failures will be logged here in a utmp format
77 | # last, when invoked as lastb, will read /var/log/btmp, so...
78 | #
79 | FTMP_FILE /var/log/btmp
80 |
81 | #
82 | # If defined, the command name to display when running "su -". For
83 | # example, if this is defined as "su" then a "ps" will display the
84 | # command is "-su". If not defined, then "ps" would display the
85 | # name of the shell actually being run, e.g. something like "-sh".
86 | #
87 | SU_NAME su
88 |
89 | #
90 | # If defined, file which inhibits all the usual chatter during the login
91 | # sequence. If a full pathname, then hushed mode will be enabled if the
92 | # user's name or shell are found in the file. If not a full pathname, then
93 | # hushed mode will be enabled if the file exists in the user's home directory.
94 | #
95 | HUSHLOGIN_FILE .hushlogin
96 | #HUSHLOGIN_FILE /etc/hushlogins
97 |
98 | #
99 | # *REQUIRED* The default PATH settings, for superuser and normal users.
100 | #
101 | # (they are minimal, add the rest in the shell startup files)
102 | ENV_SUPATH PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
103 | ENV_PATH PATH=/usr/local/bin:/usr/bin:/bin:/usr/local/games:/usr/games
104 |
105 | #
106 | # Terminal permissions
107 | #
108 | # TTYGROUP Login tty will be assigned this group ownership.
109 | # TTYPERM Login tty will be set to this permission.
110 | #
111 | # If you have a "write" program which is "setgid" to a special group
112 | # which owns the terminals, define TTYGROUP to the group number and
113 | # TTYPERM to 0620. Otherwise leave TTYGROUP commented out and assign
114 | # TTYPERM to either 622 or 600.
115 | #
116 | # In Debian /usr/bin/bsd-write or similar programs are setgid tty
117 | # However, the default and recommended value for TTYPERM is still 0600
118 | # to not allow anyone to write to anyone else console or terminal
119 |
120 | # Users can still allow other people to write them by issuing
121 | # the "mesg y" command.
122 |
123 | TTYGROUP tty
124 | TTYPERM 0600
125 |
126 | #
127 | # Login configuration initializations:
128 | #
129 | # ERASECHAR Terminal ERASE character ('\010' = backspace).
130 | # KILLCHAR Terminal KILL character ('\025' = CTRL/U).
131 | # UMASK Default "umask" value.
132 | #
133 | # The ERASECHAR and KILLCHAR are used only on System V machines.
134 | #
135 | # UMASK is the default umask value for pam_umask and is used by
136 | # useradd and newusers to set the mode of the new home directories.
137 | # 022 is the "historical" value in Debian for UMASK
138 | # 027, or even 077, could be considered better for privacy
139 | # There is no One True Answer here : each sysadmin must make up his/her
140 | # mind.
141 | #
142 | # If USERGROUPS_ENAB is set to "yes", that will modify this UMASK default value
143 | # for private user groups, i. e. the uid is the same as gid, and username is
144 | # the same as the primary group name: for these, the user permissions will be
145 | # used as group permissions, e. g. 022 will become 002.
146 | #
147 | # Prefix these values with "0" to get octal, "0x" to get hexadecimal.
148 | #
149 | ERASECHAR 0177
150 | KILLCHAR 025
151 | UMASK 022
152 |
153 | #
154 | # Password aging controls:
155 | #
156 | # PASS_MAX_DAYS Maximum number of days a password may be used.
157 | # PASS_MIN_DAYS Minimum number of days allowed between password changes.
158 | # PASS_WARN_AGE Number of days warning given before a password expires.
159 | #
160 | PASS_MAX_DAYS 90
161 | PASS_MIN_DAYS 0
162 | PASS_WARN_AGE 7
163 |
164 | #
165 | # Min/max values for automatic uid selection in useradd
166 | #
167 | UID_MIN 1000
168 | UID_MAX 60000
169 | # System accounts
170 | #SYS_UID_MIN 100
171 | #SYS_UID_MAX 999
172 |
173 | #
174 | # Min/max values for automatic gid selection in groupadd
175 | #
176 | GID_MIN 1000
177 | GID_MAX 60000
178 | # System accounts
179 | #SYS_GID_MIN 100
180 | #SYS_GID_MAX 999
181 |
182 | #
183 | # Max number of login retries if password is bad. This will most likely be
184 | # overriden by PAM, since the default pam_unix module has it's own built
185 | # in of 3 retries. However, this is a safe fallback in case you are using
186 | # an authentication module that does not enforce PAM_MAXTRIES.
187 | #
188 | LOGIN_RETRIES 5
189 |
190 | #
191 | # Max time in seconds for login
192 | #
193 | LOGIN_TIMEOUT 60
194 |
195 | #
196 | # Which fields may be changed by regular users using chfn - use
197 | # any combination of letters "frwh" (full name, room number, work
198 | # phone, home phone). If not defined, no changes are allowed.
199 | # For backward compatibility, "yes" = "rwh" and "no" = "frwh".
200 | #
201 | CHFN_RESTRICT rwh
202 |
203 | #
204 | # Should login be allowed if we can't cd to the home directory?
205 | # Default in no.
206 | #
207 | DEFAULT_HOME yes
208 |
209 | #
210 | # If defined, this command is run when removing a user.
211 | # It should remove any at/cron/print jobs etc. owned by
212 | # the user to be removed (passed as the first argument).
213 | #
214 | #USERDEL_CMD /usr/sbin/userdel_local
215 |
216 | #
217 | # Enable setting of the umask group bits to be the same as owner bits
218 | # (examples: 022 -> 002, 077 -> 007) for non-root users, if the uid is
219 | # the same as gid, and username is the same as the primary group name.
220 | #
221 | # If set to yes, userdel will remove the user´s group if it contains no
222 | # more members, and useradd will create by default a group with the name
223 | # of the user.
224 | #
225 | USERGROUPS_ENAB yes
226 |
227 | #
228 | # Instead of the real user shell, the program specified by this parameter
229 | # will be launched, although its visible name (argv[0]) will be the shell's.
230 | # The program may do whatever it wants (logging, additional authentification,
231 | # banner, ...) before running the actual shell.
232 | #
233 | # FAKE_SHELL /bin/fakeshell
234 |
235 | #
236 | # If defined, either full pathname of a file containing device names or
237 | # a ":" delimited list of device names. Root logins will be allowed only
238 | # upon these devices.
239 | #
240 | # This variable is used by login and su.
241 | #
242 | #CONSOLE /etc/consoles
243 | #CONSOLE console:tty01:tty02:tty03:tty04
244 |
245 | #
246 | # List of groups to add to the user's supplementary group set
247 | # when logging in on the console (as determined by the CONSOLE
248 | # setting). Default is none.
249 | #
250 | # Use with caution - it is possible for users to gain permanent
251 | # access to these groups, even when not logged in on the console.
252 | # How to do it is left as an exercise for the reader...
253 | #
254 | # This variable is used by login and su.
255 | #
256 | #CONSOLE_GROUPS floppy:audio:cdrom
257 |
258 | #
259 | # If set to "yes", new passwords will be encrypted using the MD5-based
260 | # algorithm compatible with the one used by recent releases of FreeBSD.
261 | # It supports passwords of unlimited length and longer salt strings.
262 | # Set to "no" if you need to copy encrypted passwords to other systems
263 | # which don't understand the new algorithm. Default is "no".
264 | #
265 | # This variable is deprecated. You should use ENCRYPT_METHOD.
266 | #
267 | #MD5_CRYPT_ENAB no
268 |
269 | #
270 | # If set to MD5 , MD5-based algorithm will be used for encrypting password
271 | # If set to SHA256, SHA256-based algorithm will be used for encrypting password
272 | # If set to SHA512, SHA512-based algorithm will be used for encrypting password
273 | # If set to DES, DES-based algorithm will be used for encrypting password (default)
274 | # Overrides the MD5_CRYPT_ENAB option
275 | #
276 | # Note: It is recommended to use a value consistent with
277 | # the PAM modules configuration.
278 | #
279 | ENCRYPT_METHOD SHA512
280 |
281 | #
282 | # Only used if ENCRYPT_METHOD is set to SHA256 or SHA512.
283 | #
284 | # Define the number of SHA rounds.
285 | # With a lot of rounds, it is more difficult to brute forcing the password.
286 | # But note also that it more CPU resources will be needed to authenticate
287 | # users.
288 | #
289 | # If not specified, the libc will choose the default number of rounds (5000).
290 | # The values must be inside the 1000-999999999 range.
291 | # If only one of the MIN or MAX values is set, then this value will be used.
292 | # If MIN > MAX, the highest value will be used.
293 | #
294 | # SHA_CRYPT_MIN_ROUNDS 5000
295 | # SHA_CRYPT_MAX_ROUNDS 5000
296 |
297 | ################# OBSOLETED BY PAM ##############
298 | # #
299 | # These options are now handled by PAM. Please #
300 | # edit the appropriate file in /etc/pam.d/ to #
301 | # enable the equivelants of them.
302 | #
303 | ###############
304 |
305 | #MOTD_FILE
306 | #DIALUPS_CHECK_ENAB
307 | #LASTLOG_ENAB
308 | #MAIL_CHECK_ENAB
309 | #OBSCURE_CHECKS_ENAB
310 | #PORTTIME_CHECKS_ENAB
311 | #SU_WHEEL_ONLY
312 | #CRACKLIB_DICTPATH
313 | #PASS_CHANGE_TRIES
314 | #PASS_ALWAYS_WARN
315 | #ENVIRON_FILE
316 | #NOLOGINS_FILE
317 | #ISSUE_FILE
318 | PASS_MIN_LEN=8
319 | #PASS_MAX_LEN
320 | #ULIMIT
321 | #ENV_HZ
322 | #CHFN_AUTH
323 | #CHSH_AUTH
324 | #FAIL_DELAY
325 |
326 | ################# OBSOLETED #######################
327 | # #
328 | # These options are no more handled by shadow. #
329 | # #
330 | # Shadow utilities will display a warning if they #
331 | # still appear. #
332 | # #
333 | ###################################################
334 |
335 | # CLOSE_SESSIONS
336 | # LOGIN_STRING
337 | # NO_PASSWORD_CONSOLE
338 | # QMAIL_DIR
339 |
340 |
341 |
342 |
--------------------------------------------------------------------------------
/exampleapp/gateway/start_all.sh:
--------------------------------------------------------------------------------
1 | #! /bin/bash
2 | echo "******Start Service******"
3 | /usr/bin/supervisord -n
4 |
5 |
--------------------------------------------------------------------------------
/exampleapp/killapps.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | #gunicorn -D -w 1 -b 0.0.0.0:10081 --reload details:app
3 | #gunicorn -D -w 1 -b 0.0.0.0:10082 --reload reviews:app
4 | #gunicorn -w 1 -b 0.0.0.0:19080 --reload --access-logfile prod.log --error-logfile prod.log productpage:app >>prod.log 2>&1 &
5 | docker-compose -f compose-app.yml kill
6 |
--------------------------------------------------------------------------------
/exampleapp/productpage/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM ubuntu:14.04
2 |
3 | RUN apt-get update && apt-get -y upgrade
4 | RUN apt-get install python-pip python-dev wget -y
5 | RUN pip install flask flask_json json2html simplejson gevent
6 | RUN pip install flask-bootstrap
7 | RUN pip install gunicorn
8 |
9 | RUN apt-get install -y supervisor
10 | RUN mkdir -p /var/log/supervisor
11 |
12 | ADD login.defs /etc/login.defs
13 |
14 | RUN mkdir -p /opt/microservices
15 | ADD start_all.sh /opt/microservices/start_all.sh
16 | RUN chmod a+x /opt/microservices/start_all.sh
17 |
18 | ADD gremlinproxy /opt/microservices/gremlinproxy
19 | ADD proxyconfig.json /opt/microservices/proxyconfig.json
20 | ADD gremlinproduct-supervisor.conf /etc/supervisor/conf.d/gremlinproxy.conf
21 |
22 | ADD templates /opt/microservices/templates
23 | ADD productpage-supervisor.conf /etc/supervisor/conf.d/productpage.conf
24 | ADD productpage.py /opt/microservices/productpage.py
25 |
26 | #WORKDIR /opt
27 | #RUN wget ftp://public.dhe.ibm.com/cloud/bluemix/containers/logstash-mtlumberjack.tgz && \
28 | # tar -xzf logstash-mtlumberjack.tgz
29 | #ADD logstash.conf /opt/logstash/conf.d/
30 | #ADD supervisord.conf /etc/supervisor/conf.d/supervisord.conf
31 |
32 |
33 | EXPOSE 9080 9876
34 | WORKDIR /opt/microservices
35 |
36 |
37 | CMD ["/opt/microservices/start_all.sh"]
38 |
--------------------------------------------------------------------------------
/exampleapp/productpage/gremlinproduct-supervisor.conf:
--------------------------------------------------------------------------------
1 | [program:gremlinproxy]
2 | command=/opt/microservices/gremlinproxy -c proxyconfig.json
3 | directory=/opt/microservices
4 | stdout_logfile=/dev/stdout
5 | stdout_logfile_maxbytes=0
6 | stderr_logfile=/dev/stdout
7 | stderr_logfile_maxbytes=0
8 |
--------------------------------------------------------------------------------
/exampleapp/productpage/login.defs:
--------------------------------------------------------------------------------
1 | #
2 | # /etc/login.defs - Configuration control definitions for the login package.
3 | #
4 | # Three items must be defined: MAIL_DIR, ENV_SUPATH, and ENV_PATH.
5 | # If unspecified, some arbitrary (and possibly incorrect) value will
6 | # be assumed. All other items are optional - if not specified then
7 | # the described action or option will be inhibited.
8 | #
9 | # Comment lines (lines beginning with "#") and blank lines are ignored.
10 | #
11 | # Modified for Linux. --marekm
12 |
13 | # REQUIRED for useradd/userdel/usermod
14 | # Directory where mailboxes reside, _or_ name of file, relative to the
15 | # home directory. If you _do_ define MAIL_DIR and MAIL_FILE,
16 | # MAIL_DIR takes precedence.
17 | #
18 | # Essentially:
19 | # - MAIL_DIR defines the location of users mail spool files
20 | # (for mbox use) by appending the username to MAIL_DIR as defined
21 | # below.
22 | # - MAIL_FILE defines the location of the users mail spool files as the
23 | # fully-qualified filename obtained by prepending the user home
24 | # directory before $MAIL_FILE
25 | #
26 | # NOTE: This is no more used for setting up users MAIL environment variable
27 | # which is, starting from shadow 4.0.12-1 in Debian, entirely the
28 | # job of the pam_mail PAM modules
29 | # See default PAM configuration files provided for
30 | # login, su, etc.
31 | #
32 | # This is a temporary situation: setting these variables will soon
33 | # move to /etc/default/useradd and the variables will then be
34 | # no more supported
35 | MAIL_DIR /var/mail
36 | #MAIL_FILE .mail
37 |
38 | #
39 | # Enable logging and display of /var/log/faillog login failure info.
40 | # This option conflicts with the pam_tally PAM module.
41 | #
42 | FAILLOG_ENAB yes
43 |
44 | #
45 | # Enable display of unknown usernames when login failures are recorded.
46 | #
47 | # WARNING: Unknown usernames may become world readable.
48 | # See #290803 and #298773 for details about how this could become a security
49 | # concern
50 | LOG_UNKFAIL_ENAB no
51 |
52 | #
53 | # Enable logging of successful logins
54 | #
55 | LOG_OK_LOGINS no
56 |
57 | #
58 | # Enable "syslog" logging of su activity - in addition to sulog file logging.
59 | # SYSLOG_SG_ENAB does the same for newgrp and sg.
60 | #
61 | SYSLOG_SU_ENAB yes
62 | SYSLOG_SG_ENAB yes
63 |
64 | #
65 | # If defined, all su activity is logged to this file.
66 | #
67 | #SULOG_FILE /var/log/sulog
68 |
69 | #
70 | # If defined, file which maps tty line to TERM environment parameter.
71 | # Each line of the file is in a format something like "vt100 tty01".
72 | #
73 | #TTYTYPE_FILE /etc/ttytype
74 |
75 | #
76 | # If defined, login failures will be logged here in a utmp format
77 | # last, when invoked as lastb, will read /var/log/btmp, so...
78 | #
79 | FTMP_FILE /var/log/btmp
80 |
81 | #
82 | # If defined, the command name to display when running "su -". For
83 | # example, if this is defined as "su" then a "ps" will display the
84 | # command is "-su". If not defined, then "ps" would display the
85 | # name of the shell actually being run, e.g. something like "-sh".
86 | #
87 | SU_NAME su
88 |
89 | #
90 | # If defined, file which inhibits all the usual chatter during the login
91 | # sequence. If a full pathname, then hushed mode will be enabled if the
92 | # user's name or shell are found in the file. If not a full pathname, then
93 | # hushed mode will be enabled if the file exists in the user's home directory.
94 | #
95 | HUSHLOGIN_FILE .hushlogin
96 | #HUSHLOGIN_FILE /etc/hushlogins
97 |
98 | #
99 | # *REQUIRED* The default PATH settings, for superuser and normal users.
100 | #
101 | # (they are minimal, add the rest in the shell startup files)
102 | ENV_SUPATH PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
103 | ENV_PATH PATH=/usr/local/bin:/usr/bin:/bin:/usr/local/games:/usr/games
104 |
105 | #
106 | # Terminal permissions
107 | #
108 | # TTYGROUP Login tty will be assigned this group ownership.
109 | # TTYPERM Login tty will be set to this permission.
110 | #
111 | # If you have a "write" program which is "setgid" to a special group
112 | # which owns the terminals, define TTYGROUP to the group number and
113 | # TTYPERM to 0620. Otherwise leave TTYGROUP commented out and assign
114 | # TTYPERM to either 622 or 600.
115 | #
116 | # In Debian /usr/bin/bsd-write or similar programs are setgid tty
117 | # However, the default and recommended value for TTYPERM is still 0600
118 | # to not allow anyone to write to anyone else console or terminal
119 |
120 | # Users can still allow other people to write them by issuing
121 | # the "mesg y" command.
122 |
123 | TTYGROUP tty
124 | TTYPERM 0600
125 |
126 | #
127 | # Login configuration initializations:
128 | #
129 | # ERASECHAR Terminal ERASE character ('\010' = backspace).
130 | # KILLCHAR Terminal KILL character ('\025' = CTRL/U).
131 | # UMASK Default "umask" value.
132 | #
133 | # The ERASECHAR and KILLCHAR are used only on System V machines.
134 | #
135 | # UMASK is the default umask value for pam_umask and is used by
136 | # useradd and newusers to set the mode of the new home directories.
137 | # 022 is the "historical" value in Debian for UMASK
138 | # 027, or even 077, could be considered better for privacy
139 | # There is no One True Answer here : each sysadmin must make up his/her
140 | # mind.
141 | #
142 | # If USERGROUPS_ENAB is set to "yes", that will modify this UMASK default value
143 | # for private user groups, i. e. the uid is the same as gid, and username is
144 | # the same as the primary group name: for these, the user permissions will be
145 | # used as group permissions, e. g. 022 will become 002.
146 | #
147 | # Prefix these values with "0" to get octal, "0x" to get hexadecimal.
148 | #
149 | ERASECHAR 0177
150 | KILLCHAR 025
151 | UMASK 022
152 |
153 | #
154 | # Password aging controls:
155 | #
156 | # PASS_MAX_DAYS Maximum number of days a password may be used.
157 | # PASS_MIN_DAYS Minimum number of days allowed between password changes.
158 | # PASS_WARN_AGE Number of days warning given before a password expires.
159 | #
160 | PASS_MAX_DAYS 90
161 | PASS_MIN_DAYS 0
162 | PASS_WARN_AGE 7
163 |
164 | #
165 | # Min/max values for automatic uid selection in useradd
166 | #
167 | UID_MIN 1000
168 | UID_MAX 60000
169 | # System accounts
170 | #SYS_UID_MIN 100
171 | #SYS_UID_MAX 999
172 |
173 | #
174 | # Min/max values for automatic gid selection in groupadd
175 | #
176 | GID_MIN 1000
177 | GID_MAX 60000
178 | # System accounts
179 | #SYS_GID_MIN 100
180 | #SYS_GID_MAX 999
181 |
182 | #
183 | # Max number of login retries if password is bad. This will most likely be
184 | # overriden by PAM, since the default pam_unix module has it's own built
185 | # in of 3 retries. However, this is a safe fallback in case you are using
186 | # an authentication module that does not enforce PAM_MAXTRIES.
187 | #
188 | LOGIN_RETRIES 5
189 |
190 | #
191 | # Max time in seconds for login
192 | #
193 | LOGIN_TIMEOUT 60
194 |
195 | #
196 | # Which fields may be changed by regular users using chfn - use
197 | # any combination of letters "frwh" (full name, room number, work
198 | # phone, home phone). If not defined, no changes are allowed.
199 | # For backward compatibility, "yes" = "rwh" and "no" = "frwh".
200 | #
201 | CHFN_RESTRICT rwh
202 |
203 | #
204 | # Should login be allowed if we can't cd to the home directory?
205 | # Default in no.
206 | #
207 | DEFAULT_HOME yes
208 |
209 | #
210 | # If defined, this command is run when removing a user.
211 | # It should remove any at/cron/print jobs etc. owned by
212 | # the user to be removed (passed as the first argument).
213 | #
214 | #USERDEL_CMD /usr/sbin/userdel_local
215 |
216 | #
217 | # Enable setting of the umask group bits to be the same as owner bits
218 | # (examples: 022 -> 002, 077 -> 007) for non-root users, if the uid is
219 | # the same as gid, and username is the same as the primary group name.
220 | #
221 | # If set to yes, userdel will remove the user´s group if it contains no
222 | # more members, and useradd will create by default a group with the name
223 | # of the user.
224 | #
225 | USERGROUPS_ENAB yes
226 |
227 | #
228 | # Instead of the real user shell, the program specified by this parameter
229 | # will be launched, although its visible name (argv[0]) will be the shell's.
230 | # The program may do whatever it wants (logging, additional authentification,
231 | # banner, ...) before running the actual shell.
232 | #
233 | # FAKE_SHELL /bin/fakeshell
234 |
235 | #
236 | # If defined, either full pathname of a file containing device names or
237 | # a ":" delimited list of device names. Root logins will be allowed only
238 | # upon these devices.
239 | #
240 | # This variable is used by login and su.
241 | #
242 | #CONSOLE /etc/consoles
243 | #CONSOLE console:tty01:tty02:tty03:tty04
244 |
245 | #
246 | # List of groups to add to the user's supplementary group set
247 | # when logging in on the console (as determined by the CONSOLE
248 | # setting). Default is none.
249 | #
250 | # Use with caution - it is possible for users to gain permanent
251 | # access to these groups, even when not logged in on the console.
252 | # How to do it is left as an exercise for the reader...
253 | #
254 | # This variable is used by login and su.
255 | #
256 | #CONSOLE_GROUPS floppy:audio:cdrom
257 |
258 | #
259 | # If set to "yes", new passwords will be encrypted using the MD5-based
260 | # algorithm compatible with the one used by recent releases of FreeBSD.
261 | # It supports passwords of unlimited length and longer salt strings.
262 | # Set to "no" if you need to copy encrypted passwords to other systems
263 | # which don't understand the new algorithm. Default is "no".
264 | #
265 | # This variable is deprecated. You should use ENCRYPT_METHOD.
266 | #
267 | #MD5_CRYPT_ENAB no
268 |
269 | #
270 | # If set to MD5 , MD5-based algorithm will be used for encrypting password
271 | # If set to SHA256, SHA256-based algorithm will be used for encrypting password
272 | # If set to SHA512, SHA512-based algorithm will be used for encrypting password
273 | # If set to DES, DES-based algorithm will be used for encrypting password (default)
274 | # Overrides the MD5_CRYPT_ENAB option
275 | #
276 | # Note: It is recommended to use a value consistent with
277 | # the PAM modules configuration.
278 | #
279 | ENCRYPT_METHOD SHA512
280 |
281 | #
282 | # Only used if ENCRYPT_METHOD is set to SHA256 or SHA512.
283 | #
284 | # Define the number of SHA rounds.
285 | # With a lot of rounds, it is more difficult to brute forcing the password.
286 | # But note also that it more CPU resources will be needed to authenticate
287 | # users.
288 | #
289 | # If not specified, the libc will choose the default number of rounds (5000).
290 | # The values must be inside the 1000-999999999 range.
291 | # If only one of the MIN or MAX values is set, then this value will be used.
292 | # If MIN > MAX, the highest value will be used.
293 | #
294 | # SHA_CRYPT_MIN_ROUNDS 5000
295 | # SHA_CRYPT_MAX_ROUNDS 5000
296 |
297 | ################# OBSOLETED BY PAM ##############
298 | # #
299 | # These options are now handled by PAM. Please #
300 | # edit the appropriate file in /etc/pam.d/ to #
301 | # enable the equivelants of them.
302 | #
303 | ###############
304 |
305 | #MOTD_FILE
306 | #DIALUPS_CHECK_ENAB
307 | #LASTLOG_ENAB
308 | #MAIL_CHECK_ENAB
309 | #OBSCURE_CHECKS_ENAB
310 | #PORTTIME_CHECKS_ENAB
311 | #SU_WHEEL_ONLY
312 | #CRACKLIB_DICTPATH
313 | #PASS_CHANGE_TRIES
314 | #PASS_ALWAYS_WARN
315 | #ENVIRON_FILE
316 | #NOLOGINS_FILE
317 | #ISSUE_FILE
318 | PASS_MIN_LEN=8
319 | #PASS_MAX_LEN
320 | #ULIMIT
321 | #ENV_HZ
322 | #CHFN_AUTH
323 | #CHSH_AUTH
324 | #FAIL_DELAY
325 |
326 | ################# OBSOLETED #######################
327 | # #
328 | # These options are no more handled by shadow. #
329 | # #
330 | # Shadow utilities will display a warning if they #
331 | # still appear. #
332 | # #
333 | ###################################################
334 |
335 | # CLOSE_SESSIONS
336 | # LOGIN_STRING
337 | # NO_PASSWORD_CONSOLE
338 | # QMAIL_DIR
339 |
340 |
341 |
342 |
--------------------------------------------------------------------------------
/exampleapp/productpage/productpage-supervisor.conf:
--------------------------------------------------------------------------------
1 | [program:productpage]
2 | command=gunicorn -w 1 -b 0.0.0.0:9080 --reload --access-logfile - --error-logfile - productpage:app
3 | #python /opt/microservices/productpage.py 9080
4 | directory=/opt/microservices
5 | stdout_logfile=/dev/stdout
6 | stdout_logfile_maxbytes=0
7 | stderr_logfile=/dev/stdout
8 | stderr_logfile_maxbytes=0
9 |
--------------------------------------------------------------------------------
/exampleapp/productpage/productpage-v1.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python
2 | from flask import Flask, request, render_template
3 | from flask_bootstrap import Bootstrap
4 | import simplejson as json
5 | import requests
6 | import sys
7 | from json2html import *
8 | import logging
9 | from datetime import datetime
10 |
11 | def create_app():
12 | app = Flask(__name__)
13 | app.debug = True
14 | Bootstrap(app)
15 | return app
16 |
17 | app = create_app()
18 | log = logging.getLogger("myLogger")
19 | log.setLevel(logging.DEBUG)
20 |
21 | @app.before_first_request
22 | def setup_logging():
23 | console = logging.StreamHandler()
24 | log.addHandler(console)
25 |
26 | details = {
27 | "name" : "details",
28 | "url" : "http://localhost:9081/details",
29 | "children" : []
30 | }
31 |
32 | reviews = {
33 | "name" : "reviews",
34 | "url" : "http://localhost:9082/reviews",
35 | "children" : []
36 | }
37 |
38 | productpage = {
39 | "name" : "productpage",
40 | "children" : [details, reviews]
41 | }
42 |
43 | service_dict = {
44 | "productpage" : productpage,
45 | "details" : details,
46 | "reviews" : reviews,
47 | }
48 |
49 | if __name__ == '__main__':
50 | # To run the server, type-in $ python server.py
51 | if len(sys.argv) < 1:
52 | print "usage: %s port" % (sys.argv[0])
53 | sys.exit(-1)
54 |
55 | p = int(sys.argv[1])
56 | app.run(host='0.0.0.0', port=p, debug = True)
57 |
58 | def getGremlinHeader(request):
59 | usertype= request.args.get('u','')
60 | gremlinHeader = request.headers.get('X-Gremlin-ID')
61 |
62 | headers = {}
63 | if gremlinHeader is not None:
64 | headers = {'X-Gremlin-ID': gremlinHeader}
65 | elif usertype is not None and usertype.startswith('test'):
66 | headers = {'X-Gremlin-ID': usertype}
67 | return headers
68 |
69 | @app.route('/')
70 | def index():
71 | """ Display productpage with normal user and test user buttons"""
72 | global productpage
73 |
74 | table = json2html.convert(json = json.dumps(productpage),
75 | table_attributes="class=\"table table-condensed table-bordered table-hover\"")
76 |
77 | return render_template('index.html', serviceTable=table)
78 |
79 |
80 | @app.route('/productpage')
81 | def front():
82 | headers = getGremlinHeader(request)
83 |
84 | bookdetails = getDetails(headers)
85 | bookreviews = getReviews(headers)
86 | return render_template('productpage.html', details=bookdetails, reviews=bookreviews)
87 |
88 | def getReviews(headers):
89 | ##timeout is set to 10 milliseconds
90 | try:
91 | res = requests.get(reviews['url'], headers=headers)#, timeout=0.010)
92 | except:
93 | res = None
94 |
95 | if res and res.status_code == 200:
96 | return res.text
97 | else:
98 | return """
Sorry, product reviews are currently unavailable for this book.
"""
99 |
100 |
101 | def getDetails(headers):
102 | try:
103 | res = requests.get(details['url'], headers=headers)#, timeout=0.010)
104 | except:
105 | res = None
106 |
107 | if res and res.status_code == 200:
108 | return res.text
109 | else:
110 | return """
Sorry, product details are currently unavailable for this book.
This is a series of comic albums created by the Belgian cartoonist Georges Remi,
30 | who wrote under the pen name Herge. The series was one of the most popular
31 | European comics of the 20th century.
This is a series of comic albums created by the Belgian cartoonist Georges Remi,
27 | who wrote under the pen name Herge. The series was one of the most popular
28 | European comics of the 20th century.
29 |
30 |
31 |
32 |
{{bookdetails}}
33 |
{{bookreviews}}
34 |
35 |
36 |
37 |
38 |
--------------------------------------------------------------------------------
/exampleapp/rebuild-productpage.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | #gunicorn -D -w 1 -b 0.0.0.0:10081 --reload details:app
3 | #gunicorn -D -w 1 -b 0.0.0.0:10082 --reload reviews:app
4 | #gunicorn -w 1 -b 0.0.0.0:19080 --reload --access-logfile prod.log --error-logfile prod.log productpage:app >>prod.log 2>&1 &
5 | cd productpage; docker build -t productpage .
6 |
--------------------------------------------------------------------------------
/exampleapp/recipes/checklist.json:
--------------------------------------------------------------------------------
1 | {
2 | "log_server" : "192.168.64.2:29200",
3 | "checks" : [
4 | {
5 | "name" : "bounded_response_time",
6 | "source" : "gateway",
7 | "dest" : "productpage",
8 | "max_latency" : "100ms"
9 | }
10 | ]
11 | }
12 |
--------------------------------------------------------------------------------
/exampleapp/recipes/gremlins.json:
--------------------------------------------------------------------------------
1 | {
2 | "gremlins" : [
3 | {
4 | "scenario" : "delay_requests",
5 | "source" : "productpage",
6 | "dest" : "reviews",
7 | "headerpattern" : "testUser-timeout-*",
8 | "delaytime" : "8s"
9 | }
10 | ]
11 | }
12 |
--------------------------------------------------------------------------------
/exampleapp/recipes/run_recipe_json.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python
2 |
3 | from pygremlin import *
4 |
5 | import sys, requests, json, os
6 |
7 | def passOrfail(result):
8 | if result:
9 | return "PASS"
10 | else:
11 | return "FAIL"
12 |
13 | if len(sys.argv) < 4:
14 | print "usage: run_recipe.py topologySpec gremlins checklist"
15 | sys.exit(1)
16 |
17 | _, topologyFilename, gremlinFilename, checklistFilename = sys.argv
18 |
19 | debugMode = (os.getenv('GREMLINSDK_DEBUG', "") != "")
20 | if not os.path.isfile(topologyFilename):
21 | print u"Topology file {} not found".format(topologyFilename)
22 | sys.exit(2)
23 |
24 | if not os.path.isfile(gremlinFilename):
25 | print u"Gremlin file {} not found".format(gremlinFilename)
26 | sys.exit(2)
27 |
28 | if not os.path.isfile(checklistFilename):
29 | print u"Checklist file {} not found".format(checklistFilename)
30 | sys.exit(2)
31 |
32 | with open(topologyFilename) as fp:
33 | app = json.load(fp)
34 |
35 | topology = ApplicationGraph(app)
36 | if debugMode:
37 | print "Using topology:\n", topology
38 |
39 | with open(gremlinFilename) as fp:
40 | gremlins = json.load(fp)
41 |
42 | with open(checklistFilename) as fp:
43 | checklist = json.load(fp)
44 |
45 | fg = FailureGenerator(topology, debug=debugMode)
46 | fg.clear_rules_from_all_proxies()
47 | fg.setup_failures(gremlins)
48 | testID = fg.start_new_test()
49 |
50 | print ('Use `postman` to inject test requests,\n\twith HTTP header X-Gremlin-ID: \n\tpress Enter key to continue to validation phase')
51 | a = sys.stdin.read(1)
52 | sys.exit(0)
53 |
54 | #ac = AssertionChecker(checklist['log_server'], testID, debug=debugMode)
55 | #results = ac.checkAssertions(checklist)
56 | exit_status = 0
57 |
58 | for check in results:
59 | print 'Check %s %s %s' % (check.name, check.info, passOrfail(check.success))
60 | if not check.success:
61 | exit_status = 1
62 |
63 | sys.exit(exit_status)
64 |
--------------------------------------------------------------------------------
/exampleapp/recipes/topology.json:
--------------------------------------------------------------------------------
1 | {
2 | "services" : [
3 | { "name": "gateway", "service_proxies": ["127.0.0.1:9877"] },
4 | { "name": "productpage", "service_proxies": ["127.0.0.1:9876"] },
5 | { "name": "reviews"},
6 | { "name": "details"}
7 | ],
8 |
9 | "dependencies" : {
10 | "gateway" : ["productpage"],
11 | "productpage" : ["reviews", "details"]
12 | }
13 | }
14 |
--------------------------------------------------------------------------------
/exampleapp/reviews/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM ubuntu:14.04
2 |
3 | RUN apt-get update && apt-get -y upgrade
4 | RUN apt-get install python-pip python-dev -y
5 | RUN pip install flask flask_json json2html simplejson gevent
6 |
7 | RUN apt-get install -y supervisor
8 | RUN mkdir -p /var/log/supervisor
9 |
10 | ADD login.defs /etc/login.defs
11 |
12 | RUN mkdir -p /opt/microservices
13 | ADD start_all.sh /opt/microservices/start_all.sh
14 | RUN chmod a+x /opt/microservices/start_all.sh
15 |
16 | ADD templates /opt/microservices/templates
17 | ADD reviews-supervisor.conf /etc/supervisor/conf.d/reviews.conf
18 | ADD reviews.py /opt/microservices/reviews.py
19 |
20 | EXPOSE 9080
21 | WORKDIR /opt/microservices
22 |
23 | #ADD supervisord.conf /etc/supervisor/conf.d/supervisord.conf
24 |
25 | CMD ["/opt/microservices/start_all.sh"]
26 |
--------------------------------------------------------------------------------
/exampleapp/reviews/login.defs:
--------------------------------------------------------------------------------
1 | #
2 | # /etc/login.defs - Configuration control definitions for the login package.
3 | #
4 | # Three items must be defined: MAIL_DIR, ENV_SUPATH, and ENV_PATH.
5 | # If unspecified, some arbitrary (and possibly incorrect) value will
6 | # be assumed. All other items are optional - if not specified then
7 | # the described action or option will be inhibited.
8 | #
9 | # Comment lines (lines beginning with "#") and blank lines are ignored.
10 | #
11 | # Modified for Linux. --marekm
12 |
13 | # REQUIRED for useradd/userdel/usermod
14 | # Directory where mailboxes reside, _or_ name of file, relative to the
15 | # home directory. If you _do_ define MAIL_DIR and MAIL_FILE,
16 | # MAIL_DIR takes precedence.
17 | #
18 | # Essentially:
19 | # - MAIL_DIR defines the location of users mail spool files
20 | # (for mbox use) by appending the username to MAIL_DIR as defined
21 | # below.
22 | # - MAIL_FILE defines the location of the users mail spool files as the
23 | # fully-qualified filename obtained by prepending the user home
24 | # directory before $MAIL_FILE
25 | #
26 | # NOTE: This is no more used for setting up users MAIL environment variable
27 | # which is, starting from shadow 4.0.12-1 in Debian, entirely the
28 | # job of the pam_mail PAM modules
29 | # See default PAM configuration files provided for
30 | # login, su, etc.
31 | #
32 | # This is a temporary situation: setting these variables will soon
33 | # move to /etc/default/useradd and the variables will then be
34 | # no more supported
35 | MAIL_DIR /var/mail
36 | #MAIL_FILE .mail
37 |
38 | #
39 | # Enable logging and display of /var/log/faillog login failure info.
40 | # This option conflicts with the pam_tally PAM module.
41 | #
42 | FAILLOG_ENAB yes
43 |
44 | #
45 | # Enable display of unknown usernames when login failures are recorded.
46 | #
47 | # WARNING: Unknown usernames may become world readable.
48 | # See #290803 and #298773 for details about how this could become a security
49 | # concern
50 | LOG_UNKFAIL_ENAB no
51 |
52 | #
53 | # Enable logging of successful logins
54 | #
55 | LOG_OK_LOGINS no
56 |
57 | #
58 | # Enable "syslog" logging of su activity - in addition to sulog file logging.
59 | # SYSLOG_SG_ENAB does the same for newgrp and sg.
60 | #
61 | SYSLOG_SU_ENAB yes
62 | SYSLOG_SG_ENAB yes
63 |
64 | #
65 | # If defined, all su activity is logged to this file.
66 | #
67 | #SULOG_FILE /var/log/sulog
68 |
69 | #
70 | # If defined, file which maps tty line to TERM environment parameter.
71 | # Each line of the file is in a format something like "vt100 tty01".
72 | #
73 | #TTYTYPE_FILE /etc/ttytype
74 |
75 | #
76 | # If defined, login failures will be logged here in a utmp format
77 | # last, when invoked as lastb, will read /var/log/btmp, so...
78 | #
79 | FTMP_FILE /var/log/btmp
80 |
81 | #
82 | # If defined, the command name to display when running "su -". For
83 | # example, if this is defined as "su" then a "ps" will display the
84 | # command is "-su". If not defined, then "ps" would display the
85 | # name of the shell actually being run, e.g. something like "-sh".
86 | #
87 | SU_NAME su
88 |
89 | #
90 | # If defined, file which inhibits all the usual chatter during the login
91 | # sequence. If a full pathname, then hushed mode will be enabled if the
92 | # user's name or shell are found in the file. If not a full pathname, then
93 | # hushed mode will be enabled if the file exists in the user's home directory.
94 | #
95 | HUSHLOGIN_FILE .hushlogin
96 | #HUSHLOGIN_FILE /etc/hushlogins
97 |
98 | #
99 | # *REQUIRED* The default PATH settings, for superuser and normal users.
100 | #
101 | # (they are minimal, add the rest in the shell startup files)
102 | ENV_SUPATH PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
103 | ENV_PATH PATH=/usr/local/bin:/usr/bin:/bin:/usr/local/games:/usr/games
104 |
105 | #
106 | # Terminal permissions
107 | #
108 | # TTYGROUP Login tty will be assigned this group ownership.
109 | # TTYPERM Login tty will be set to this permission.
110 | #
111 | # If you have a "write" program which is "setgid" to a special group
112 | # which owns the terminals, define TTYGROUP to the group number and
113 | # TTYPERM to 0620. Otherwise leave TTYGROUP commented out and assign
114 | # TTYPERM to either 622 or 600.
115 | #
116 | # In Debian /usr/bin/bsd-write or similar programs are setgid tty
117 | # However, the default and recommended value for TTYPERM is still 0600
118 | # to not allow anyone to write to anyone else console or terminal
119 |
120 | # Users can still allow other people to write them by issuing
121 | # the "mesg y" command.
122 |
123 | TTYGROUP tty
124 | TTYPERM 0600
125 |
126 | #
127 | # Login configuration initializations:
128 | #
129 | # ERASECHAR Terminal ERASE character ('\010' = backspace).
130 | # KILLCHAR Terminal KILL character ('\025' = CTRL/U).
131 | # UMASK Default "umask" value.
132 | #
133 | # The ERASECHAR and KILLCHAR are used only on System V machines.
134 | #
135 | # UMASK is the default umask value for pam_umask and is used by
136 | # useradd and newusers to set the mode of the new home directories.
137 | # 022 is the "historical" value in Debian for UMASK
138 | # 027, or even 077, could be considered better for privacy
139 | # There is no One True Answer here : each sysadmin must make up his/her
140 | # mind.
141 | #
142 | # If USERGROUPS_ENAB is set to "yes", that will modify this UMASK default value
143 | # for private user groups, i. e. the uid is the same as gid, and username is
144 | # the same as the primary group name: for these, the user permissions will be
145 | # used as group permissions, e. g. 022 will become 002.
146 | #
147 | # Prefix these values with "0" to get octal, "0x" to get hexadecimal.
148 | #
149 | ERASECHAR 0177
150 | KILLCHAR 025
151 | UMASK 022
152 |
153 | #
154 | # Password aging controls:
155 | #
156 | # PASS_MAX_DAYS Maximum number of days a password may be used.
157 | # PASS_MIN_DAYS Minimum number of days allowed between password changes.
158 | # PASS_WARN_AGE Number of days warning given before a password expires.
159 | #
160 | PASS_MAX_DAYS 90
161 | PASS_MIN_DAYS 0
162 | PASS_WARN_AGE 7
163 |
164 | #
165 | # Min/max values for automatic uid selection in useradd
166 | #
167 | UID_MIN 1000
168 | UID_MAX 60000
169 | # System accounts
170 | #SYS_UID_MIN 100
171 | #SYS_UID_MAX 999
172 |
173 | #
174 | # Min/max values for automatic gid selection in groupadd
175 | #
176 | GID_MIN 1000
177 | GID_MAX 60000
178 | # System accounts
179 | #SYS_GID_MIN 100
180 | #SYS_GID_MAX 999
181 |
182 | #
183 | # Max number of login retries if password is bad. This will most likely be
184 | # overriden by PAM, since the default pam_unix module has it's own built
185 | # in of 3 retries. However, this is a safe fallback in case you are using
186 | # an authentication module that does not enforce PAM_MAXTRIES.
187 | #
188 | LOGIN_RETRIES 5
189 |
190 | #
191 | # Max time in seconds for login
192 | #
193 | LOGIN_TIMEOUT 60
194 |
195 | #
196 | # Which fields may be changed by regular users using chfn - use
197 | # any combination of letters "frwh" (full name, room number, work
198 | # phone, home phone). If not defined, no changes are allowed.
199 | # For backward compatibility, "yes" = "rwh" and "no" = "frwh".
200 | #
201 | CHFN_RESTRICT rwh
202 |
203 | #
204 | # Should login be allowed if we can't cd to the home directory?
205 | # Default in no.
206 | #
207 | DEFAULT_HOME yes
208 |
209 | #
210 | # If defined, this command is run when removing a user.
211 | # It should remove any at/cron/print jobs etc. owned by
212 | # the user to be removed (passed as the first argument).
213 | #
214 | #USERDEL_CMD /usr/sbin/userdel_local
215 |
216 | #
217 | # Enable setting of the umask group bits to be the same as owner bits
218 | # (examples: 022 -> 002, 077 -> 007) for non-root users, if the uid is
219 | # the same as gid, and username is the same as the primary group name.
220 | #
221 | # If set to yes, userdel will remove the user´s group if it contains no
222 | # more members, and useradd will create by default a group with the name
223 | # of the user.
224 | #
225 | USERGROUPS_ENAB yes
226 |
227 | #
228 | # Instead of the real user shell, the program specified by this parameter
229 | # will be launched, although its visible name (argv[0]) will be the shell's.
230 | # The program may do whatever it wants (logging, additional authentification,
231 | # banner, ...) before running the actual shell.
232 | #
233 | # FAKE_SHELL /bin/fakeshell
234 |
235 | #
236 | # If defined, either full pathname of a file containing device names or
237 | # a ":" delimited list of device names. Root logins will be allowed only
238 | # upon these devices.
239 | #
240 | # This variable is used by login and su.
241 | #
242 | #CONSOLE /etc/consoles
243 | #CONSOLE console:tty01:tty02:tty03:tty04
244 |
245 | #
246 | # List of groups to add to the user's supplementary group set
247 | # when logging in on the console (as determined by the CONSOLE
248 | # setting). Default is none.
249 | #
250 | # Use with caution - it is possible for users to gain permanent
251 | # access to these groups, even when not logged in on the console.
252 | # How to do it is left as an exercise for the reader...
253 | #
254 | # This variable is used by login and su.
255 | #
256 | #CONSOLE_GROUPS floppy:audio:cdrom
257 |
258 | #
259 | # If set to "yes", new passwords will be encrypted using the MD5-based
260 | # algorithm compatible with the one used by recent releases of FreeBSD.
261 | # It supports passwords of unlimited length and longer salt strings.
262 | # Set to "no" if you need to copy encrypted passwords to other systems
263 | # which don't understand the new algorithm. Default is "no".
264 | #
265 | # This variable is deprecated. You should use ENCRYPT_METHOD.
266 | #
267 | #MD5_CRYPT_ENAB no
268 |
269 | #
270 | # If set to MD5 , MD5-based algorithm will be used for encrypting password
271 | # If set to SHA256, SHA256-based algorithm will be used for encrypting password
272 | # If set to SHA512, SHA512-based algorithm will be used for encrypting password
273 | # If set to DES, DES-based algorithm will be used for encrypting password (default)
274 | # Overrides the MD5_CRYPT_ENAB option
275 | #
276 | # Note: It is recommended to use a value consistent with
277 | # the PAM modules configuration.
278 | #
279 | ENCRYPT_METHOD SHA512
280 |
281 | #
282 | # Only used if ENCRYPT_METHOD is set to SHA256 or SHA512.
283 | #
284 | # Define the number of SHA rounds.
285 | # With a lot of rounds, it is more difficult to brute forcing the password.
286 | # But note also that it more CPU resources will be needed to authenticate
287 | # users.
288 | #
289 | # If not specified, the libc will choose the default number of rounds (5000).
290 | # The values must be inside the 1000-999999999 range.
291 | # If only one of the MIN or MAX values is set, then this value will be used.
292 | # If MIN > MAX, the highest value will be used.
293 | #
294 | # SHA_CRYPT_MIN_ROUNDS 5000
295 | # SHA_CRYPT_MAX_ROUNDS 5000
296 |
297 | ################# OBSOLETED BY PAM ##############
298 | # #
299 | # These options are now handled by PAM. Please #
300 | # edit the appropriate file in /etc/pam.d/ to #
301 | # enable the equivelants of them.
302 | #
303 | ###############
304 |
305 | #MOTD_FILE
306 | #DIALUPS_CHECK_ENAB
307 | #LASTLOG_ENAB
308 | #MAIL_CHECK_ENAB
309 | #OBSCURE_CHECKS_ENAB
310 | #PORTTIME_CHECKS_ENAB
311 | #SU_WHEEL_ONLY
312 | #CRACKLIB_DICTPATH
313 | #PASS_CHANGE_TRIES
314 | #PASS_ALWAYS_WARN
315 | #ENVIRON_FILE
316 | #NOLOGINS_FILE
317 | #ISSUE_FILE
318 | PASS_MIN_LEN=8
319 | #PASS_MAX_LEN
320 | #ULIMIT
321 | #ENV_HZ
322 | #CHFN_AUTH
323 | #CHSH_AUTH
324 | #FAIL_DELAY
325 |
326 | ################# OBSOLETED #######################
327 | # #
328 | # These options are no more handled by shadow. #
329 | # #
330 | # Shadow utilities will display a warning if they #
331 | # still appear. #
332 | # #
333 | ###################################################
334 |
335 | # CLOSE_SESSIONS
336 | # LOGIN_STRING
337 | # NO_PASSWORD_CONSOLE
338 | # QMAIL_DIR
339 |
340 |
341 |
342 |
--------------------------------------------------------------------------------
/exampleapp/reviews/reviews-supervisor.conf:
--------------------------------------------------------------------------------
1 | [program:reviews]
2 | command=python /opt/microservices/reviews.py 9080
3 | directory=/opt/microservices
4 | stdout_logfile=/dev/stdout
5 | stdout_logfile_maxbytes=0
6 | stderr_logfile=/dev/stdout
7 | stderr_logfile_maxbytes=0
8 |
--------------------------------------------------------------------------------
/exampleapp/reviews/reviews.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python
2 | from flask import Flask, request
3 | import simplejson as json
4 | import requests
5 | import sys
6 | from json2html import *
7 |
8 | app = Flask(__name__)
9 |
10 | reviews_resp="""
11 |
12 |
13 | An extremely entertaining and comic series by Herge, with expressive drawings!
14 |
Reviewer1 New York Times
15 |
16 |
17 |
18 | Its well-researched plots straddle a variety of genres:
19 | swashbuckling adventures with elements of fantasy, mysteries,
20 | political thrillers, and science fiction.
21 |
Reviewer2 Barnes and Noble
22 |
23 | """
24 |
25 | @app.route('/reviews')
26 | def bookReviews():
27 | global reviews_resp
28 | return reviews_resp
29 |
30 | @app.route('/')
31 | def index():
32 | """ Display frontpage with normal user and test user buttons"""
33 |
34 | top = """
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 | Book reviews service
55 |
56 |
Hello! This is the book reviews service. My content is
This is a series of comic albums created by the Belgian cartoonist Georges Remi,
30 | who wrote under the pen name Herge. The series was one of the most popular
31 | European comics of the 20th century.
This is a series of comic albums created by the Belgian cartoonist Georges Remi,
27 | who wrote under the pen name Herge. The series was one of the most popular
28 | European comics of the 20th century.
29 |
30 |
31 |
32 |
{{bookdetails}}
33 |
{{bookreviews}}
34 |
35 |
36 |
37 |
38 |
--------------------------------------------------------------------------------
/exampleapp/runapps.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | #gunicorn -D -w 1 -b 0.0.0.0:10081 --reload details:app
3 | #gunicorn -D -w 1 -b 0.0.0.0:10082 --reload reviews:app
4 | #gunicorn -w 1 -b 0.0.0.0:19080 --reload --access-logfile prod.log --error-logfile prod.log productpage:app >>prod.log 2>&1 &
5 | docker-compose -f compose-app.yml up -d
6 |
--------------------------------------------------------------------------------
/exampleapp/undochanges.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | cp productpage/productpage-v1.py productpage/productpage.py
3 | ./rebuild-productpage.sh
4 | ./killapps.sh
5 |
6 |
--------------------------------------------------------------------------------
/gremlin-testing-architecture.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ResilienceTesting/gremlinsdk-python/c5cc439ea1c0d6a98ff88f5604bf739f3c48d1e6/gremlin-testing-architecture.png
--------------------------------------------------------------------------------
/python/pygremlin/__init__.py:
--------------------------------------------------------------------------------
1 | # coding utf-8
2 |
3 | from .failuregenerator import *
4 | from .assertionchecker import *
5 | from .applicationgraph import *
6 |
--------------------------------------------------------------------------------
/python/pygremlin/applicationgraph.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | from collections import defaultdict
3 | import networkx as nx
4 |
5 | class ApplicationGraph(object):
6 | """Represent the topology of an application to be tested by Gremlin"""
7 |
8 | def __init__(self, model=None, debug=False):
9 | """
10 | @param dependency graph of microservices with some details
11 | {
12 | "services" : [
13 | { "name": "gateway", "service_proxies": ["127.0.0.1:9877"] },
14 | { "name": "productpage", "service_proxies": ["127.0.0.1:9876"] },
15 | { "name": "reviews"},
16 | { "name": "details"}
17 | ],
18 |
19 | "dependencies" : {
20 | "gateway" : ["productpage"],
21 | "productpage" : ["reviews", "details"]
22 | }
23 | }
24 | """
25 |
26 | assert isinstance(debug, bool)
27 | assert model is None or isinstance(model, dict)
28 |
29 | self._graph = nx.DiGraph()
30 | self.debug = debug
31 |
32 | if model:
33 | assert 'services' in model and 'dependencies' in model
34 | for service in model['services']:
35 | self.add_service(**service)
36 | for source, destinations in model['dependencies'].iteritems():
37 | assert isinstance(destinations, list)
38 | for destination in destinations:
39 | self.add_dependency(source, destination)
40 |
41 | def add_service(self, name, service_proxies=None):
42 | self._graph.add_node(name)
43 | if service_proxies is None:
44 | service_proxies = []
45 | self._graph.node[name]['instances'] = service_proxies
46 |
47 | def add_dependency(self, fromS, toS):
48 | self._graph.add_path([fromS, toS])
49 |
50 | def get_dependents(self, service):
51 | dservices = []
52 | for e in self._graph.in_edges(service):
53 | dservices.append(e[0])
54 | return dservices
55 |
56 | def get_dependencies(self, service):
57 | dservices = []
58 | for e in self._graph.out_edges(service):
59 | dservices.append(e[0])
60 | return dservices
61 |
62 | def get_services(self):
63 | return self._graph.nodes()
64 |
65 | def get_service_instances(self, service):
66 | if 'instances' in self._graph.node[service]:
67 | return self._graph.node[service]['instances']
68 | else:
69 | #print("No instances for service {}".format(service))
70 | return []
71 |
72 | def _get_networkX(self):
73 | return self._graph
74 |
75 | def __str__(self):
76 | retval = ""
77 | for node in self._graph.nodes():
78 | retval = retval + "Node: {}\n".format(node)
79 | for edge in self._graph.edges():
80 | retval = retval + "Edge: {}->{}\n".format(edge[0], edge[1])
81 | return retval
82 |
--------------------------------------------------------------------------------
/python/pygremlin/assertionchecker.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python
2 | # -*- coding: utf-8 -*-
3 | import json
4 |
5 | from elasticsearch import Elasticsearch
6 | import datetime
7 | import pprint
8 | import warnings
9 | import isodate
10 | import sys
11 |
12 | import re
13 | from collections import defaultdict, namedtuple
14 | import datetime
15 | import time
16 | from __builtin__ import dict
17 |
18 | GremlinTestResult = namedtuple('GremlinTestResult', ['success','errormsg'])
19 | AssertionResult = namedtuple('AssertionResult', ['name','info','success','errormsg'])
20 |
21 | max_query_results = 2**31-1
22 |
23 | def _parse_duration(s):
24 | r = re.compile(r"(([0-9]*(\.[0-9]*)?)(\D+))", re.UNICODE)
25 | start=0
26 | m = r.search(s, start)
27 | vals = defaultdict(lambda: 0)
28 | while m is not None:
29 | unit = m.group(4)
30 | try:
31 | value = float(m.group(2))
32 | except ValueError:
33 | print(s, unit, m.group(2))
34 | return datetime.timedelta()
35 | if unit == "h":
36 | vals["hours"] = value
37 | elif unit == 'm':
38 | vals["minutes"] = value
39 | elif unit == 's':
40 | vals["seconds"] = value
41 | elif unit == "ms":
42 | vals["milliseconds"] = value
43 | elif unit == "us" or unit == "µs":
44 | vals["microseconds"] = value
45 | else:
46 | raise("Unknown time unit")
47 | start = m.end(1)
48 | m = r.search(s, start)
49 | return datetime.timedelta(**vals)
50 |
51 | def _since(timestamp):
52 | return time.time()-timestamp
53 |
54 | def _check_value_recursively(key, val, haystack):
55 | """
56 | Check if there is key _key_ with value _val_ in the given dictionary.
57 | ..warning:
58 | This is geared at JSON dictionaries, so some corner cases are ignored,
59 | we assume all iterables are either arrays or dicts
60 | """
61 | if isinstance(haystack, list):
62 | return any([_check_value_recursively(key, val, l) for l in haystack])
63 | elif isinstance(haystack, dict):
64 | if not key in haystack:
65 | return any([_check_value_recursively(key, val, d) for k, d in haystack.items()
66 | if isinstance(d, list) or isinstance(d, dict)])
67 | else:
68 | return haystack[key] == val
69 | else:
70 | return False
71 |
72 |
73 | def _get_by(key, val, l):
74 | """
75 | Out of list *l* return all elements that have *key=val*
76 | This comes in handy when you are working with aggregated/bucketed queries
77 | """
78 | return [x for x in l if _check_value_recursively(key, val, x)]
79 |
80 |
81 | def _get_by_id(ID, l):
82 | """
83 | A convenience wrapper over _get_by
84 | that fetches things based on the "reqID" field
85 | """
86 | return _get_by("reqID", ID, l)
87 |
88 |
89 | class AssertionChecker(object):
90 |
91 | """
92 | The asssertion checker
93 | """
94 |
95 | def __init__(self, host, test_id, debug=False):
96 | """
97 | param host: the elasticsearch host
98 | test_id: id of the test to which we are reqstricting the queires
99 | """
100 | self._es = Elasticsearch(host)
101 | self._id = test_id
102 | self.debug=debug
103 | self.functiondict = {
104 | 'no_proxy_errors' : self.check_no_proxy_errors,
105 | 'bounded_response_time' : self.check_bounded_response_time,
106 | 'http_success_status' : self.check_http_success_status,
107 | 'http_status' : self.check_http_status,
108 | # 'reachability' : self.check_reachability,
109 | 'bounded_retries' : self.check_bounded_retries,
110 | 'circuit_breaker' : self.check_circuit_breaker,
111 | 'at_most_requests': self.check_at_most_requests
112 | }
113 |
114 | def _check_non_zero_results(self, data):
115 | """
116 | Checks wheter the output we got from elasticsearch is empty or not
117 | """
118 | return data["hits"]["total"] != 0 and len(data["hits"]["hits"]) != 0
119 |
120 | #was ProxyErrorsBad
121 | def check_no_proxy_errors(self, **kwargs):
122 | """
123 | Helper method to determine if the proxies logged any major errors related to the functioning of the proxy itself
124 | """
125 | data = self._es.search(body={
126 | "size": max_query_results,
127 | "query": {
128 | "filtered": {
129 | "query": {
130 | "match_all": {}
131 | },
132 | "filter": {
133 | "term": {
134 | "level": "error"
135 | }
136 | }
137 | }
138 | }
139 | })
140 | # if self.debug:
141 | # print(data)
142 | return GremlinTestResult(data["hits"]["total"] == 0, data)
143 |
144 | #was ProxyErrors
145 | def get_requests_with_errors(self):
146 | """ Helper method to determine if proxies logged any error related to the requests passing through"""
147 | data = self._es.search(body={
148 | "size": max_query_results,
149 | "query": {
150 | "filtered": {
151 | "query": {
152 | "match_all": {}
153 | },
154 | "filter": {
155 | "exists": {
156 | "field": "errmsg"
157 | }
158 | }
159 | }
160 | }
161 | })
162 | return GremlinTestResult(False, data)
163 |
164 | def check_bounded_response_time(self, **kwargs):
165 | assert 'source' in kwargs and 'dest' in kwargs and 'max_latency' in kwargs
166 | dest = kwargs['dest']
167 | source = kwargs['source']
168 | max_latency = _parse_duration(kwargs['max_latency'])
169 | data = self._es.search(body={
170 | "size": max_query_results,
171 | "query": {
172 | "filtered": {
173 | "query": {
174 | "match_all": {}
175 | },
176 | "filter": {
177 | "bool": {
178 | "must": [
179 | {"term": {"msg": "Response"}},
180 | {"term": {"source": source}},
181 | {"term": {"dest": dest}},
182 | {"term": {"testid": self._id}}
183 | ]
184 | }
185 | }
186 | }
187 | }
188 | })
189 | if self.debug:
190 | pprint.pprint(data)
191 |
192 | result = True
193 | errormsg = ""
194 | if not self._check_non_zero_results(data):
195 | result = False
196 | errormsg = "No log entries found"
197 | return GremlinTestResult(result, errormsg)
198 |
199 | for message in data["hits"]["hits"]:
200 | if _parse_duration(message['_source']["duration"]) > max_latency:
201 | result = False
202 | # Request ID from service did not
203 | errormsg = "{} did not reply in time for request {}, {}".format(
204 | dest, message['_source']["reqID"], message['_source']["duration"])
205 | if self.debug:
206 | print errormsg
207 | return GremlinTestResult(result, errormsg)
208 |
209 | def check_http_success_status(self, **kwargs):
210 | data = self._es.search(body={
211 | "size": max_query_results,
212 | "query": {
213 | "filtered": {
214 | "query": {
215 | "match_all": {}
216 | },
217 | "filter": {
218 | "exists": {
219 | "field": "status"
220 | }
221 | }
222 | }
223 | }})
224 | result = True
225 | errormsg = ""
226 | if not self._check_non_zero_results(data):
227 | result = False
228 | errormsg = "No log entries found"
229 | return GremlinTestResult(result, errormsg)
230 |
231 | for message in data["hits"]["hits"]:
232 | if message['_source']["status"] != 200:
233 | if self.debug:
234 | print(message['_source'])
235 | result = False
236 | return GremlinTestResult(result, errormsg)
237 |
238 | ##check if the interaction between a given pair of services resulted in the required response status
239 | def check_http_status(self, **kwargs):
240 | assert 'source' in kwargs and 'dest' in kwargs and 'status' in kwargs and 'req_id' in kwargs
241 | source = kwargs['source']
242 | dest = kwargs['dest']
243 | status = kwargs['status']
244 | req_id = kwargs['req_id']
245 | data = self._es.search(body={
246 | "size": max_query_results,
247 | "query": {
248 | "filtered": {
249 | "query": {
250 | "match_all": {}
251 | },
252 | "filter": {
253 | "bool" : {
254 | "must": [
255 | {"term": {"msg": "Response"}},
256 | {"term": {"source": source}},
257 | {"term": {"dest": dest}},
258 | {"term": {"req_id": req_id}},
259 | {"term": {"protocol" : "http"}},
260 | {"term": {"testid": self._id}}
261 | ]
262 | }
263 | }
264 | }
265 | }})
266 |
267 | result = True
268 | errormsg = ""
269 | if not self._check_non_zero_results(data):
270 | result = False
271 | errormsg = "No log entries found"
272 | return GremlinTestResult(result, errormsg)
273 |
274 | for message in data["hits"]["hits"]:
275 | if message['_source']["status"] != status:
276 | if self.debug:
277 | print(message['_source'])
278 | result = False
279 | return GremlinTestResult(result, errormsg)
280 |
281 | def check_at_most_requests(self, source, dest, num_requests, **kwargs):
282 | """
283 | Check that source service sent at most num_request to the dest service
284 | :param source the source service name
285 | :param dest the destination service name
286 | :param num_requests the maximum number of requests that we expect
287 | :return:
288 | """
289 | # TODO: Does the proxy support logging of instances so that grouping by instance is possible?
290 |
291 | if self.debug:
292 | print 'in check_at_most_requests (%s, %s, %s, %s)' % (source, dest, num_requests, self._id)
293 |
294 | # Fetch requests for src->dst
295 | data = self._es.search(body={
296 | "size": max_query_results,
297 | "query": {
298 | "filtered": {
299 | "query": {
300 | "match_all": {}
301 | },
302 | "filter": {
303 | "bool": {
304 | "must": [
305 | {"term": {"msg": "Request"}},
306 | {"term": {"source": source}},
307 | {"term": {"dest": dest}},
308 | {"term": {"protocol": "http"}},
309 | {"term": {"testid": self._id}}
310 | ]
311 | }
312 | }
313 | }
314 | },
315 | "aggs": {
316 | # Need size, otherwise only top buckets are returned
317 | #"size": max_query_results,
318 | "byid": {
319 | "terms": {
320 | "field": "reqID",
321 | }
322 | }
323 | }
324 | })
325 |
326 | if self.debug:
327 | pprint.pprint(data)
328 |
329 | result = True
330 | errormsg = ""
331 | if not self._check_non_zero_results(data):
332 | result = False
333 | errormsg = "No log entries found"
334 | return GremlinTestResult(result, errormsg)
335 |
336 | # Check number of requests in each bucket
337 | for bucket in data["aggregations"]["byid"]["buckets"]:
338 | if bucket["doc_count"] > (num_requests + 1):
339 | errormsg = "{} -> {} - expected {} requests, but found {} "\
340 | "requests for id {}".format(
341 | source, dest, num_requests, bucket['doc_count'] - 1,
342 | bucket['key'])
343 | result = False
344 | if self.debug:
345 | print errormsg
346 | return GremlinTestResult(result, errormsg)
347 | return GremlinTestResult(result, errormsg)
348 |
349 | def check_bounded_retries(self, **kwargs):
350 | assert 'source' in kwargs and 'dest' in kwargs and 'retries' in kwargs
351 | source = kwargs['source']
352 | dest = kwargs['dest']
353 | retries = kwargs['retries']
354 | wait_time = kwargs.pop('wait_time', None)
355 | errdelta = kwargs.pop('errdelta', datetime.timedelta(milliseconds=10))
356 | by_uri = kwargs.pop('by_uri', False)
357 |
358 | if self.debug:
359 | print 'in bounded retries (%s, %s, %s)' % (source, dest, retries)
360 |
361 | data = self._es.search(body={
362 | "size": max_query_results,
363 | "query": {
364 | "filtered": {
365 | "query": {
366 | "match_all": {}
367 | },
368 | "filter": {
369 | "bool": {
370 | "must": [
371 | {"term": {"source": source}},
372 | {"term": {"msg": "Request"}},
373 | {"term": {"dest": dest}},
374 | {"term": {"testid": self._id}}
375 | ]
376 | }
377 | }
378 | }
379 | },
380 | "aggs": {
381 | "byid": {
382 | "terms": {
383 | "field": "reqID" if not by_uri else "uri",
384 | }
385 | }
386 | }
387 | })
388 |
389 | if self.debug:
390 | pprint.pprint(data)
391 |
392 | result = True
393 | errormsg = ""
394 | if not self._check_non_zero_results(data):
395 | result = False
396 | errormsg = "No log entries found"
397 | return GremlinTestResult(result, errormsg)
398 |
399 | # Check number of req first
400 | for bucket in data["aggregations"]["byid"]["buckets"]:
401 | if bucket["doc_count"] > (retries + 1):
402 | errormsg = "{} -> {} - expected {} retries, but found {} retries for request {}".format(
403 | source, dest, retries, bucket['doc_count']-1, bucket['key'])
404 | result = False
405 | if self.debug:
406 | print errormsg
407 | return GremlinTestResult(result, errormsg)
408 | if wait_time is None:
409 | return GremlinTestResult(result, errormsg)
410 |
411 | wait_time = _parse_duration(wait_time)
412 | # Now we have to check the timestamps
413 | for bucket in data["aggregations"]["byid"]["buckets"]:
414 | req_id = bucket["key"]
415 | req_seq = _get_by_id(req_id, data["hits"]["hits"])
416 | req_seq.sort(key=lambda x: isodate.parse_datetime(x['_source']["ts"]))
417 | for i in range(len(req_seq) - 1):
418 | observed = isodate.parse_datetime(
419 | req_seq[i + 1]['_source']["ts"]) - isodate.parse_datetime(req_seq[i]['_source']["ts"])
420 | if not (((wait_time - errdelta) <= observed) or (observed <= (wait_time + errdelta))):
421 | errormsg = "{} -> {} - expected {}+/-{}ms spacing for retry attempt {}, but request {} had a spacing of {}ms".format(
422 | source, dest, wait_time, errdelta.microseconds/1000, i+1, req_id, observed.microseconds/1000)
423 | result = False
424 | if self.debug:
425 | print errormsg
426 | break
427 | return GremlinTestResult(result, errormsg)
428 |
429 | #remove_retries is a boolean argument. Set to true if reties are attempted inside circuit breaker logic, else set to false
430 | def check_circuit_breaker(self, **kwargs): #dest, closed_attempts, reset_time, halfopen_attempts):
431 | assert 'dest' in kwargs and 'source' in kwargs and 'closed_attempts' in kwargs and 'reset_time' in kwargs and 'headerprefix' in kwargs
432 |
433 | dest = kwargs['dest']
434 | source = kwargs['source']
435 | closed_attempts = kwargs['closed_attempts']
436 | reset_time = kwargs['reset_time']
437 | headerprefix = kwargs['headerprefix']
438 | if 'halfopen_attempts' not in kwargs:
439 | halfopen_attempts = 1
440 | else:
441 | halfopen_attempts = kwargs['halfopen_attempts']
442 | if 'remove_retries' not in kwargs:
443 | remove_retries = False
444 | else:
445 | remove_retries = kwargs['remove_retries']
446 |
447 |
448 | # TODO: this has been tested for thresholds but not for recovery
449 | # timeouts
450 | data = self._es.search(body={
451 | "size": max_query_results,
452 | "query": {
453 | "filtered": {
454 | "query": {
455 | "match_all": {}
456 | },
457 | "filter": {
458 | "bool": {
459 | "must": [
460 | {"term": {"source": source}},
461 | {"term": {"dest": dest}},
462 | {"prefix": {"reqID": headerprefix}},
463 | {"term": {"testid": self._id}}
464 | ],
465 | "should": [
466 | {"term": {"msg": "Request"}},
467 | {"term": {"msg": "Response"}},
468 | ]
469 | }
470 | }
471 | }
472 | },
473 | "aggs": {
474 | "bysource": {
475 | "terms": {
476 | "field": "source",
477 | }
478 | }
479 | }
480 | })
481 |
482 | if(self.debug):
483 | #pprint.pprint(data)
484 | pprint.pprint(data["aggregations"]["bysource"]["buckets"])
485 |
486 |
487 |
488 | result = True
489 | errormsg = ""
490 | if not self._check_non_zero_results(data):
491 | result = False
492 | errormsg = "No log entries found"
493 | return GremlinTestResult(result, errormsg)
494 |
495 | reset_time = _parse_duration(reset_time)
496 | circuit_mode = "closed"
497 |
498 | # TODO - remove aggregations
499 | for bucket in data["aggregations"]["bysource"]["buckets"]:
500 | req_seq = _get_by("source", source, data["hits"]["hits"])
501 | req_seq.sort(key=lambda x: isodate.parse_datetime(x['_source']["ts"]))
502 |
503 | #Remove duplicate retries
504 | if(remove_retries):
505 | req_seq_dup = []
506 | for i in range(len(req_seq)):
507 | if(i == len(req_seq)-1):
508 | req_seq_dup.append(req_seq[i])
509 |
510 |
511 | elif(req_seq[i]['_source']['reqID'] != req_seq[i+1]['_source']['reqID']):
512 | req_seq_dup.append(req_seq[i])
513 |
514 | req_seq = req_seq_dup
515 |
516 | failures = 0
517 | circuit_open_ts = None
518 | successes = 0
519 | print "starting " + circuit_mode
520 | for req in req_seq:
521 | if circuit_mode is "open": #circuit_open_ts is not None:
522 | req_spacing = isodate.parse_datetime(req['_source']["ts"]) - circuit_open_ts
523 |
524 | # Restore to half-open
525 | if req_spacing >= reset_time:
526 | circuit_open_ts = None
527 | circuit_mode = "half-open"
528 | if self.debug:
529 | print "%d: open -> half-open" %(failures +1)
530 | failures = 0 #-1
531 | else: # We are in open state
532 | # this is an assertion fail, no requests in open state
533 | if req['_source']["msg"] == "Request":
534 | if self.debug:
535 | print "%d: open -> failure" % (failures + 1)
536 | if self.debug:
537 | print "Service %s failed to trip circuit breaker" % source
538 | errormsg = "{} -> {} - new request was issued at ({}s) before reset_timer ({}s)expired".format(source,
539 | dest,
540 | req_spacing,
541 | reset_time) #req['_source'])
542 | result = False
543 | break
544 | if circuit_mode is "half-open":
545 | if ((req['_source']["msg"] == "Response" and req['_source']["status"] != 200)
546 | or (req['_source']["msg"] == "Request" and ("abort" in req['_source']["actions"]))):
547 | if self.debug:
548 | print "half-open -> open"
549 | circuit_mode = "open"
550 | circuit_open_ts = isodate.parse_datetime(req['_source']["ts"])
551 | successes = 0
552 | elif (req['_source']["msg"] == "Response" and req['_source']["status"] == 200):
553 | successes += 1
554 | if self.debug:
555 | print "half-open -> half-open (%d)" % successes
556 | # If over threshold, return to closed state
557 | if successes > halfopen_attempts:
558 | if self.debug:
559 | print "half-open -> closed"
560 | circuit_mode = "closed"
561 | failures = 0
562 | circuit_open_ts = None
563 | #else:
564 | elif circuit_mode is "closed":
565 |
566 | if ((req['_source']["msg"] == "Response" and req['_source']["status"] != 200)
567 | or (req['_source']["msg"] == "Request" and len(req['_source']["actions"]) > 0)):
568 | # Increment failures
569 | failures += 1
570 | if self.debug:
571 | print "%d: closed->closed" % failures
572 | # print(failures)
573 | # Trip CB, go to open state
574 | if failures > closed_attempts:
575 | if self.debug:
576 | print "%d: closed->open" % failures
577 | circuit_open_ts = isodate.parse_datetime(req['_source']["ts"])
578 | successes = 0
579 | circuit_mode = "open"
580 |
581 | # pprint.pprint(data)
582 | return GremlinTestResult(result, errormsg)
583 |
584 |
585 | def check_num_requests(self, source, dest, num_requests, **kwargs):
586 | """
587 | Check that source service sent at exactly num_request to the dest service, in total, for all request headers
588 | :param source the source service name
589 | :param dest the destination service name
590 | :param num_requests the maximum number of requests that we expect
591 | :return:
592 | """
593 |
594 | if self.debug:
595 | print 'in check_num_requests (%s, %s, %s, %s)' % (source, dest, num_requests, self._id)
596 |
597 | # Fetch requests for src->dst
598 | data = self._es.search(body={
599 | "size": max_query_results,
600 | "query": {
601 | "filtered": {
602 | "query": {
603 | "match_all": {}
604 | },
605 | "filter": {
606 | "bool": {
607 | "must": [
608 | {"term": {"msg": "Request"}},
609 | {"term": {"source": source}},
610 | {"term": {"dest": dest}},
611 | {"term": {"protocol": "http"}},
612 | {"term": {"testid": self._id}}
613 | ]
614 | }
615 | }
616 | }
617 | },
618 | "aggs": {
619 | "byid": {
620 | "terms": {
621 | "field": "testid",
622 | }
623 | }
624 | }
625 | })
626 |
627 | if self.debug:
628 | pprint.pprint(data)
629 | pprint.pprint(data["aggregations"])
630 |
631 | result = True
632 | errormsg = ""
633 | if not self._check_non_zero_results(data):
634 | result = False
635 | errormsg = "No log entries found"
636 | return GremlinTestResult(result, errormsg)
637 |
638 | # Check number of requests in each bucket
639 | for bucket in data["aggregations"]["byid"]["buckets"]:
640 | if bucket["doc_count"] is not (num_requests):
641 | errormsg = "{} -> {} - expected {} requests, but found {} "\
642 | "requests for id {}".format(
643 | source, dest, num_requests, bucket['doc_count'],
644 | bucket['key'])
645 | result = False
646 | if self.debug:
647 | print errormsg
648 | return GremlinTestResult(result, errormsg)
649 | return GremlinTestResult(result, errormsg)
650 |
651 |
652 |
653 | def check_bulkhead(self, source, dependencies, slow_dest, rate):
654 | """
655 | Asserts bulkheads by ensuring that the rate of requests to other dests is kept when slow_dest is slow
656 | :param source the source service name
657 | :param dependencies list of dependency names of source
658 | :param slow_dest the name of the dependency that independence is being tested for
659 | :param rate number of requests per second that should occur to each dependency
660 | :return:
661 | """
662 | #Remove slow dest
663 | dependencies.remove(slow_dest)
664 |
665 |
666 | s =str(float(1)/float(rate))
667 | max_spacing = _parse_duration(s+'s')
668 |
669 |
670 |
671 | for dest in dependencies:
672 | data = self._es.search(body={
673 | "size": max_query_results,
674 | "query": {
675 | "filtered": {
676 | "query": {
677 | "match_all": {}
678 | },
679 | "filter": {
680 | "bool": {
681 | "must": [
682 | {"term": {"msg": "Request"}},
683 | {"term": {"source": source}},
684 | {"term": {"dest": dest}},
685 | {"term": {"testid": self._id}}
686 | ]
687 | }
688 | }
689 | }
690 | }
691 | })
692 |
693 | if self.debug:
694 | pprint.pprint(data)
695 |
696 | result = True
697 | errormsg = ""
698 | if not self._check_non_zero_results(data):
699 | result = False
700 | errormsg = "No log entries found"
701 | return GremlinTestResult(result, errormsg)
702 |
703 | req_seq = _get_by("source", source, data["hits"]["hits"])
704 | req_seq.sort(key=lambda x: isodate.parse_datetime(x['_source']["ts"]))
705 |
706 | last_request = isodate.parse_datetime(req_seq[0]['_source']["ts"])
707 |
708 | for req in req_seq:
709 | req_spacing = isodate.parse_datetime(req['_source']["ts"]) - last_request
710 | last_request = isodate.parse_datetime(req['_source']["ts"])
711 | if self.debug:
712 | print "spacing", req_spacing, max_spacing
713 | if req_spacing > max_spacing:
714 |
715 | errormsg = "{} -> {} - new request was issued at ({}s) but max spacing should be ({}s)".format(source,
716 | dest,
717 | req_spacing,
718 | max_spacing)
719 | result = False
720 | return GremlinTestResult(result, errormsg)
721 |
722 |
723 |
724 |
725 | return GremlinTestResult(result, errormsg)
726 |
727 |
728 | def check_assertion(self, name=None, **kwargs):
729 | # assertion is something like {"name": "bounded_response_time",
730 | # "service": "productpage",
731 | # "max_latency": "100ms"}
732 |
733 | assert name is not None and name in self.functiondict
734 | gremlin_test_result = self.functiondict[name](**kwargs)
735 |
736 | if self.debug and not gremlin_test_result.success:
737 | print gremlin_test_result.errormsg
738 |
739 |
740 | return AssertionResult(name, str(kwargs), gremlin_test_result.success, gremlin_test_result.errormsg)
741 |
742 | def check_assertions(self, checklist, all=False):
743 | """Check a set of assertions
744 | @param all boolean if False, stop at first failure
745 | @return: False if any assertion fails.
746 | """
747 |
748 | assert isinstance(checklist, dict) and 'checks' in checklist
749 |
750 | retval = None
751 | retlist = []
752 |
753 | for assertion in checklist['checks']:
754 | retval = self.check_assertion(**assertion)
755 | retlist.append(retval)
756 | if not retval.success and not all:
757 | print "Error message:", retval[3]
758 | return retlist
759 |
760 | return retlist
761 |
--------------------------------------------------------------------------------
/python/pygremlin/failuregenerator.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 |
3 | import requests
4 | import json
5 | from collections import defaultdict
6 | import uuid
7 | import logging
8 | import httplib
9 | logging.basicConfig()
10 | requests_log = logging.getLogger("requests.packages.urllib3")
11 |
12 | class FailureGenerator(object):
13 |
14 | def __init__(self, app, debug=False):
15 | """
16 | Create a new failure generator
17 | @param app ApplicationGraph: instance of ApplicationGraph object
18 | """
19 | self.app = app
20 | self.debug = debug
21 | self._id = None
22 | self._queue = []
23 | #some common scenarios
24 | self.functiondict = {
25 | 'delay_requests' : self.delay_requests,
26 | 'delay_responses' : self.delay_responses,
27 | 'abort_requests' : self.abort_requests,
28 | 'abort_responses' : self.abort_responses,
29 | 'partition_services' : self.partition_services,
30 | 'crash_service' : self.crash_service,
31 | 'overload_service' : self.overload_service
32 | }
33 | if debug:
34 | httplib.HTTPConnection.debuglevel = 1
35 | logging.getLogger().setLevel(logging.DEBUG)
36 | requests_log.setLevel(logging.DEBUG)
37 | requests_log.propagate = True
38 |
39 | def _notify_proxies(self):
40 | if self.debug:
41 | print 'in _notifyProxies'
42 | # TODO: modify this so we can support more than one test at a time
43 | for service in self.app.get_services():
44 | if self.debug:
45 | print(service)
46 | for instance in self.app.get_service_instances(service):
47 | resp = requests.get("http://{}/gremlin/v1/test/{}".format(instance,self._id))
48 | resp.raise_for_status()
49 |
50 |
51 | def start_new_test(self):
52 | self._id = uuid.uuid4().hex
53 | for service in self.app.get_services():
54 | if self.debug:
55 | print(service)
56 | for instance in self.app.get_service_instances(service):
57 | resp = requests.put("http://{}/gremlin/v1/test/{}".format(instance,self._id))
58 | resp.raise_for_status()
59 | return self._id
60 |
61 | def get_test_id(self):
62 | return self._id
63 |
64 |
65 | def add_rule(self, **args):
66 | """
67 | @param args keyword argument list, consisting of:
68 |
69 | source: ,
70 | dest: ,
71 | messagetype:
72 |
73 | headerpattern:
74 | bodypattern:
75 |
76 | delayprobability:
77 | delaydistribution: probability distribution function
78 | mangleprobability:
79 | mangledistribution: probability distribution function
80 |
81 | abortprobability:
82 | abortdistribution: probability distribution function
83 |
84 | delaytime: latency to inject into requests
85 | errorcode: HTTP error code or -1 to reset TCP connection
86 | searchstring: string to replace when Mangle is enabled
87 | replacestring: string to replace with for Mangle fault
88 | """
89 | #The defaults are indicated below
90 | myrule = {
91 | "source": "",
92 | "dest": "",
93 | "messagetype": "request",
94 | "headerpattern": "*",
95 | "bodypattern": "*",
96 | "delayprobability": 0.0,
97 | "delaydistribution": "uniform",
98 | "mangleprobability": 0.0,
99 | "mangledistribution": "uniform",
100 | "abortprobability": 0.0,
101 | "abortdistribution": "uniform",
102 | "delaytime": "0s",
103 | "errorcode": -1,
104 | "searchstring": "",
105 | "replacestring": ""
106 | }
107 | rule = args.copy()
108 | #copy
109 | for i in rule.keys():
110 | if i not in myrule:
111 | continue
112 | myrule[i] = rule[i]
113 | #check defaults
114 | services = self.app.get_services()
115 | assert myrule["source"] != "" and myrule["dest"] != ""
116 | assert myrule["source"] in services and myrule["dest"] in services
117 | assert myrule['headerpattern'] != "" or myrule["bodypattern"] != ""
118 | assert myrule['delayprobability'] >0.0 or myrule['abortprobability'] >0.0 or myrule['mangleprobability'] >0.0
119 | if myrule["delayprobability"] > 0.0:
120 | assert myrule["delaytime"] != ""
121 | if myrule["abortprobability"] > 0.0:
122 | assert myrule["errorcode"] >= -1
123 | assert myrule["messagetype"] in ["request", "response", "publish", "subscribe"]
124 | self._queue.append(myrule)
125 |
126 | def clear_rules_from_all_proxies(self):
127 | """
128 | Clear fault injection rules from all known service proxies.
129 | """
130 | self._queue = []
131 | if self.debug:
132 | print 'Clearing rules'
133 | for service in self.app.get_services():
134 | for instance in self.app.get_service_instances(service):
135 | if self.debug:
136 | print 'Clearing rules for %s - instance %s' % (service, instance)
137 | resp = requests.delete("http://{}/gremlin/v1/rules".format(instance))
138 | if resp.status_code != 200:
139 | print 'Failed to clear rules for %s - instance %s' % (service, instance)
140 |
141 | def list_rules(self, service=None):
142 | """
143 | List fault fault injection rules installed on instances of a given service (or all services)
144 | returns a JSON dictionary
145 | """
146 | rules = {}
147 | for service in self.app.get_services():
148 | rules[service] = {}
149 | for instance in self.app.get_service_instances(service):
150 | rules[service][instance] = {}
151 | resp = requests.get("http://{}/gremlin/v1/rules/list".format(instance))
152 | if resp.status_code != 200:
153 | print 'Failed to fetch rules from %s - instance %s' % (service, instance)
154 | continue
155 | rules[service][instance] = resp.json()
156 | return rules
157 |
158 | #TODO: Create a plugin model here, to support gremlinproxy and nginx
159 | def push_rules(self, continue_on_errors=False):
160 | for rule in self._queue:
161 | instances = self.app.get_service_instances(rule["source"])
162 | for instance in instances:
163 | try:
164 | resp = requests.post("http://{}/gremlin/v1/rules/add".format(instance),
165 | headers = {"Content-Type" : "application/json"},
166 | data=json.dumps(rule))
167 | resp.raise_for_status()
168 | except requests.exceptions.ConnectionError, e:
169 | print "FAILURE: Could not add rule to instance %s of service %s" % (instance, rule["source"])
170 | print e
171 | if not continue_on_errors:
172 | raise e
173 |
174 | def _generate_rules(self, rtype, **args):
175 | rule = args.copy()
176 | assert rtype is not None and rtype != "" and (rtype is "delay" or rtype is "abort")
177 |
178 | if rtype is "abort":
179 | rule['abortprobability']=rule.pop('abortprobability',1) or 1
180 | rule['errorcode']=rule.pop('errorcode',-1) or -1
181 | else:
182 | rule['delayprobability']=rule.pop('delayprobability',1) or 1
183 | rule['delaytime']=rule.pop('delaytime',"1s") or "1s"
184 |
185 | assert 'source' in rule or 'dest' in rule
186 | if 'source' in rule:
187 | assert rule['source'] != ""
188 | if 'dest' in rule:
189 | assert rule['dest'] != ""
190 |
191 | rule['headerpattern'] = rule.pop('headerpattern', '*') or '*'
192 | rule['bodypattern'] = rule.pop('bodypattern', '*') or '*'
193 | sources = []
194 | destinations = []
195 | if 'source' not in rule:
196 | sources = self.app.get_dependents(rule['dest'])
197 | else:
198 | sources.append(rule['source'])
199 |
200 | if 'dest'not in rule:
201 | destinations = self.app.get_dependencies(rule['source'])
202 | else:
203 | destinations.append(rule['dest'])
204 |
205 | for s in sources:
206 | for d in destinations:
207 | rule["source"] = s
208 | rule["dest"] = d
209 | self.add_rule(**rule)
210 | if self.debug:
211 | print '%s - %s' % (rtype, str(rule))
212 |
213 | def abort_requests(self, **args):
214 | args['messagetype']='request'
215 | self._generate_rules('abort', **args)
216 |
217 | def abort_responses(self, **args):
218 | args['messagetype']='response'
219 | self._generate_rules('abort', **args)
220 |
221 | def delay_requests(self, **args):
222 | args['messagetype']='request'
223 | self._generate_rules('delay', **args)
224 |
225 | def delay_responses(self, **args):
226 | args['messagetype']='response'
227 | self._generate_rules('delay', **args)
228 |
229 | """
230 | Gives the impression of an overloaded service. If no probability is given
231 | 50% of requests will be delayed by 10s (default) and rest 50% will get HTTP 503.
232 | """
233 | def overload_service(self, **args):
234 | rule = args.copy()
235 | assert 'dest' in rule
236 |
237 | rule['delayprobability'] = rule.pop('delayprobability', 0.5) or 0.5
238 | rule['abortprobability'] = rule.pop('abortprobability', 0.5) or 0.5
239 | rule['delaytime'] = rule.pop('delaytime', "10s") or "10s"
240 | rule['errorcode'] = rule.pop("errorcode", 503) or 503
241 | rule['messagetype'] = rule.pop('messagetype', 'request') or 'request'
242 | rule['headerpattern'] = rule.pop('headerpattern', '*') or '*'
243 | rule['bodypattern'] = rule.pop('bodypattern','*') or '*'
244 |
245 | sources = []
246 | if 'source' not in rule or rule['source'] == "":
247 | sources = self.app.get_dependents(rule['dest'])
248 | else:
249 | sources.append(rule['source'])
250 |
251 | for s in sources:
252 | rule["source"] = s
253 | self.add_rule(**rule)
254 | if self.debug:
255 | print 'Overload %s ' % str(rule)
256 |
257 | def partition_services(self, **args):
258 | """Partitions two connected services. Not two sets of services (TODO)
259 | Expects usual arguments and srcprobability and dstprobability, that indicates probability of
260 | terminating connections from source to dest and vice versa
261 | """
262 | rule = args.copy()
263 | assert 'source' in rule and 'dest' in rule
264 | #assert 'srcprobability' in rule and 'dstprobability' in rule
265 | assert rule['source'] != "" and rule['dest'] != ""
266 | #check if the two services are connected
267 | assert rule['dest'] in self.app.get_dependencies(rule['source'])
268 |
269 | rule['errorcode'] = rule.pop('errorcode', -1) or -1
270 | rule['abortprobability'] = rule.pop('srcprobability', 1) or 1
271 | self.abort_requests(**rule)
272 |
273 | rule['abortprobability'] = rule.pop('dstprobability', 1) or 1
274 | temp = rule['source']
275 | rule['source'] = rule['dest']
276 | rule['dest'] = temp
277 | self.abort_requests(**rule)
278 |
279 | """
280 | Causes the dest service to become unavailable to all callers
281 | """
282 | def crash_service(self, **args):
283 | rule = args.copy()
284 | rule['source']=''
285 | rule['errorcode']=rule.pop('errorcode', -1) or -1
286 | self.abort_requests(**rule)
287 |
288 | def setup_failure(self, scenario=None, **args):
289 | """Add a given failure scenario
290 | @param scenario: string 'delayrequests' or 'crash'
291 | """
292 | assert scenario is not None and scenario in self.functiondict
293 | self.functiondict[scenario](**args)
294 |
295 | def setup_failures(self, gremlins):
296 | """Add gremlins to environment"""
297 |
298 | assert isinstance(gremlins, dict) and 'gremlins' in gremlins
299 | for gremlin in gremlins['gremlins']:
300 | self.setup_failure(**gremlin)
301 | self.push_rules()
302 |
--------------------------------------------------------------------------------
/python/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import setup
2 |
3 | setup(
4 | name = 'pygremlin',
5 | version = '0.1',
6 | description = 'Python SDK for Gremlin framework',
7 | author = 'Shriram Rajagopalan',
8 | author_email = 'shriram@us.ibm.com',
9 | license = 'Apache 2.0',
10 | packages = ['pygremlin'],
11 | install_requires=[
12 | 'requests',
13 | 'networkx',
14 | 'elasticsearch',
15 | 'isodate'
16 | ],
17 | zip_safe = False
18 | )
19 |
--------------------------------------------------------------------------------
/recipes/json_recipes/example_checklist.json:
--------------------------------------------------------------------------------
1 | {
2 | "log_server" : "192.168.64.2:29200",
3 | "checks" : [
4 | {
5 | "name" : "bounded_response_time",
6 | "source" : "Client1",
7 | "dest" : "Server",
8 | "max_latency" : "100ms"
9 | }
10 | ]
11 | }
12 |
--------------------------------------------------------------------------------
/recipes/json_recipes/example_gremlins.json:
--------------------------------------------------------------------------------
1 | {
2 | "gremlins" : [
3 | {
4 | "scenario" : "delay_requests",
5 | "source" : "Client1",
6 | "dest" : "Server",
7 | "headerpattern" : "test-delay-*",
8 | "delaytime" : "3s",
9 | "delayprobability" : 0.7,
10 | "delaydistribution" : "exponential"
11 | },
12 | {
13 | "scenario" : "overload_service",
14 | "source" : "Client2",
15 | "dest" : "Server",
16 | "headerpattern" : "test-overload-*",
17 | "delaytime" : "8s",
18 | "errorcode" : 500,
19 | "delayprobability" : 0.7,
20 | "abortprobability" : 0.5
21 | }
22 | ]
23 | }
24 |
--------------------------------------------------------------------------------
/recipes/json_recipes/example_topology.json:
--------------------------------------------------------------------------------
1 | {
2 | "services" : [
3 | { "name": "Client1", "service_proxies": ["127.0.0.1:9876"] },
4 | { "name": "Client2", "service_proxies": ["127.0.0.1:9877"] },
5 | { "name": "Server"}
6 | ],
7 | "dependencies" : {
8 | "Client1" : ["Server"],
9 | "Client2" : ["Server"]
10 | }
11 | }
12 |
--------------------------------------------------------------------------------
/recipes/json_recipes/generic_gremlin_template.json:
--------------------------------------------------------------------------------
1 | [
2 | {
3 | "scenario": "crash_service|partition_services|overload_service|delay_requests|delay_responses|abort_requests|abort_responses",
4 | "comments" : "JSON schema for expressing a failure",
5 | "source": "servicename;IF NULL, DEST SHOULD BE NOT NULL",
6 | "dest": "servicename;IF NULL, DEST SHOULD BE NOT NULL",
7 | "messagetype": "request|response|publish|subscribe",
8 | "headerpattern": "regexpattern|*",
9 | "bodypattern": "regexpattern|*",
10 | "abortprobability": 0.0,
11 | "abortdistribution": "uniform|normal|exponential",
12 | "delayprobability": 0.0,
13 | "delaydistribution": "uniform|normal|exponential",
14 | "mangleprobability": 0.0,
15 | "mangledistribution": "uniform|normal|exponential",
16 | "delaytime": "1ms|1s|1m|1h",
17 | "errorcode": -1,
18 | "srcprobability" : 0.0,
19 | "dstprobability" : 0.0,
20 | "searchstring": "IGNORED",
21 | "replacestring": "IGNORED",
22 | "YOURCUSTOMFIELD" : "IGNORED",
23 | "ANOTHERCUSTOMFIELD" : "IGNORED. atleast one of the abort|delay probability should be non 0. for partition_services, if srcprobability|dstprobability are not supplied, they default to 1. Errorcode for abort defaults to 1. Default delay is 1s"
24 | },
25 | {
26 | "scenario" : "abort_requests",
27 | "comments" : "Return error on all requests from source to destination. Atleast one of source or destination should be present. errorcode represents the HTTP errorcode to return to the caller. If errorcode is -1, the tcp connection to dest will be reset immediately. If source is omitted, requests from all services to given destination will be aborted with given errorcode. If destination is omitted, requests to any destination from source service will be aborted.",
28 | "more_comments" : "You can customize the default error behavior by providing values for the fields above",
29 | "source" : "webapp",
30 | "dest" : "backend_service",
31 | "errorcode" : 429,
32 | "abortprobability" : 0.7,
33 | "abortdistribution" : "normal",
34 | "headerpattern" : "gremlin-test-abortreq-*"
35 | },
36 | {
37 | "scenario" : "abort_responses",
38 | "comments" : "Same as abort_requests, but acts after receiving the response from dest, but before passing it on to source. This might come in handy, if the requests are non idempotent, where you may want a change to be made in the backend, but fake a failure that happens after the backend has completed the change",
39 | "more_comments" : "You can customize the default error behavior by providing values for the fields above",
40 | "source" : "webapp",
41 | "dest" : "backend_service",
42 | "errorcode" : 429,
43 | "abortprobability" : 0.7,
44 | "abortdistribution" : "normal",
45 | "headerpattern" : "gremlin-test-abortresp-*"
46 | },
47 | {
48 | "scenario" : "delay_requests",
49 | "comments" : "Delay all requests from source to destination. Atleast one of source or destination should be present. delaytime is expressed as a string based on Golang's ParseDuration function. (https://golang.org/pkg/time/#ParseDuration). A duration string is a possibly signed sequence of decimal numbers, each with optional fraction and a unit suffix, such as '300ms'. Valid time units are 'ns', 'us', 'ms', 's', 'm', 'h'. If source is omitted, requests from all services to given destination will be delayed with given delaytime. If destination is omitted, requests to any destination from source service will be delayed.",
50 | "more_comments" : "You can customize the default error behavior by providing values for the fields above",
51 | "source" : "webapp",
52 | "dest" : "backend_service",
53 | "delayprobability" : 0.7,
54 | "delaydistribution" : "uniform",
55 | "headerpattern" : "gremlin-test-delayreq-*"
56 | },
57 | {
58 | "scenario" : "delay_responses",
59 | "comments" : "Same as abort_requests, but acts after receiving the response from dest, but before passing it on to source",
60 | "more_comments" : "You can customize the default error behavior by providing values for the fields above",
61 | "source" : "webapp",
62 | "dest" : "backend_service",
63 | "delayprobability" : 0.7,
64 | "delaydistribution" : "uniform",
65 | "headerpattern" : "gremlin-test-delayresp-*"
66 | },
67 | {
68 | "scenario" : "crash_service",
69 | "comments" : "crash a destination service. All connections will be reset.",
70 | "more_comments" : "You can customize the default error behavior by providing values for the fields above",
71 | "dest" : "backend_service",
72 | "headerpattern" : "gremlin-test-crash-*"
73 | },
74 | {
75 | "scenario" : "partition_services",
76 | "comments" : "Fail all communications between a given pair of services. srcprobability and dstprobability are optional",
77 | "more_comments" : "You can customize the default error behavior by providing values for the fields above",
78 | "source" : "src_service",
79 | "dest" : "dst_service",
80 | "srcprobability" : 0.8,
81 | "dstprobability" : 0.6,
82 | "headerpattern" : "gremlin-test-partition-*"
83 | },
84 | {
85 | "scenario" : "overload_service",
86 | "comments" : "Fake overload of a target service. Requests will be delayed, or will return error or both, based on the probability values provided. Default probability is 0.5 for delay and abort. If source is provided, only emulate overload scenario for the source service to target",
87 | "more_comments" : "You can customize the default error behavior by providing values for the fields above",
88 | "source" : "src_service or OMIT field",
89 | "dest" : "target_service",
90 | "delaytime" : "8s",
91 | "delayprobability" : 0.7,
92 | "errorcode" : 503,
93 | "abortprobability" : 0.6,
94 | "headerpattern" : "gremlin-test-overload-*"
95 | }
96 | ]
97 |
--------------------------------------------------------------------------------
/recipes/json_recipes/run_recipe_json.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python
2 |
3 | from pygremlin import *
4 |
5 | import sys, requests, json, os
6 |
7 | def passOrfail(result):
8 | if result:
9 | return "PASS"
10 | else:
11 | return "FAIL"
12 |
13 | if len(sys.argv) < 4:
14 | print "usage: run_recipe.py topologySpec gremlins checklist"
15 | sys.exit(1)
16 |
17 | _, topologyFilename, gremlinFilename, checklistFilename = sys.argv
18 |
19 | debugMode = (os.getenv('GREMLINSDK_DEBUG', "") != "")
20 | if not os.path.isfile(topologyFilename):
21 | print u"Topology file {} not found".format(topologyFilename)
22 | sys.exit(2)
23 |
24 | if not os.path.isfile(gremlinFilename):
25 | print u"Gremlin file {} not found".format(gremlinFilename)
26 | sys.exit(2)
27 |
28 | if not os.path.isfile(checklistFilename):
29 | print u"Checklist file {} not found".format(checklistFilename)
30 | sys.exit(2)
31 |
32 | with open(topologyFilename) as fp:
33 | app = json.load(fp)
34 |
35 | topology = ApplicationGraph(app)
36 | if debugMode:
37 | print "Using topology:\n", topology
38 |
39 | with open(gremlinFilename) as fp:
40 | gremlins = json.load(fp)
41 |
42 | with open(checklistFilename) as fp:
43 | checklist = json.load(fp)
44 |
45 | fg = FailureGenerator(topology, debug=debugMode)
46 | fg.clear_rules_from_all_proxies()
47 | fg.setup_failures(gremlins)
48 | testID = fg.start_new_test()
49 |
50 | print ('Use `postman` to inject test requests,\n\twith HTTP header X-Gremlin-ID: \n\tpress Enter key to continue to validation phase')
51 | a = sys.stdin.read(1)
52 | sys.exit(0)
53 |
54 | ac = AssertionChecker(checklist['log_server'], testID, debug=debugMode)
55 | results = ac.check_assertions(checklist)
56 | exit_status = 0
57 |
58 | for check in results:
59 | print 'Check %s %s %s' % (check.name, check.info, passOrfail(check.success))
60 | if not check.success:
61 | exit_status = 1
62 |
63 | sys.exit(exit_status)
64 |
--------------------------------------------------------------------------------