├── .github ├── solutionid_validator.sh └── workflows │ └── maintainer_workflows.yml ├── .gitignore ├── .gitlab-ci.yml ├── CODEOWNERS ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── LICENSE ├── README.md ├── cellularctl ├── clientctl ├── content └── images │ └── arch_diagram.png ├── requirements.txt └── source ├── cdk ├── .npmignore ├── README.md ├── bin │ ├── cdk.ts │ ├── cell-stack.ts │ ├── repos-stack.ts │ └── router-stack.ts ├── canary │ ├── canary-script-2.py │ └── requirements.txt ├── cdk.json ├── jest.config.js ├── package-lock.json ├── package.json ├── statemachines │ ├── sfn_check_canary.asl.json │ ├── sfn_create_cell.asl.json │ └── sfn_update_cell.asl.json ├── templates │ └── .dummy ├── test │ └── cdk.test.ts └── tsconfig.json ├── cell-container ├── Dockerfile ├── app.py └── requirements.txt ├── cells_for_codepipeline ├── __init__.py ├── cell_lib.py ├── handler.py ├── handler_test.py └── requirements.txt ├── cellularctl ├── cellular.py └── cellularctl.py ├── client ├── __init__.py ├── client_lib.py ├── clientctl.py ├── test_cell.py ├── test_integration.py ├── test_router.py └── testlibs.py └── routing-container ├── Dockerfile ├── app.py ├── requirements.txt └── routing.py /.github/solutionid_validator.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | #set -e 3 | 4 | echo "checking solution id $1" 5 | echo "grep -nr --exclude-dir='.github' "$1" ./.." 6 | result=$(grep -nr --exclude-dir='.github' "$1" ./..) 7 | if [ $? -eq 0 ] 8 | then 9 | echo "Solution ID $1 found\n" 10 | echo "$result" 11 | exit 0 12 | else 13 | echo "Solution ID $1 not found" 14 | exit 1 15 | fi 16 | 17 | export result 18 | -------------------------------------------------------------------------------- /.github/workflows/maintainer_workflows.yml: -------------------------------------------------------------------------------- 1 | # Workflows managed by aws-solutions-library-samples maintainers 2 | name: Maintainer Workflows 3 | on: 4 | # Triggers the workflow on push or pull request events but only for the "main" branch 5 | push: 6 | branches: [ "main" ] 7 | pull_request: 8 | branches: [ "main" ] 9 | types: [opened, reopened, edited] 10 | 11 | jobs: 12 | CheckSolutionId: 13 | runs-on: ubuntu-latest 14 | steps: 15 | - uses: actions/checkout@v4 16 | - name: Run solutionid validator 17 | run: | 18 | chmod u+x ./.github/solutionid_validator.sh 19 | ./.github/solutionid_validator.sh ${{ vars.SOLUTIONID }} -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.js 2 | !jest.config.js 3 | *.d.ts 4 | node_modules 5 | venv 6 | .venv 7 | __pycache__ 8 | cdk.context.json 9 | 10 | .vscode 11 | .idea 12 | out 13 | 14 | # CDK asset staging directory 15 | .cdk.staging 16 | cdk.out 17 | 18 | source/cdk/templates/template_cell.yaml 19 | 20 | source/package.json 21 | source/package-lock.json 22 | .rtx.toml 23 | -------------------------------------------------------------------------------- /.gitlab-ci.yml: -------------------------------------------------------------------------------- 1 | #(SO9093)#) 2 | # You can override the included template(s) by including variable overrides 3 | # SAST customization: https://docs.gitlab.com/ee/user/application_security/sast/#customizing-the-sast-settings 4 | # Secret Detection customization: https://docs.gitlab.com/ee/user/application_security/secret_detection/#customizing-settings 5 | # Dependency Scanning customization: https://docs.gitlab.com/ee/user/application_security/dependency_scanning/#customizing-the-dependency-scanning-settings 6 | # Container Scanning customization: https://docs.gitlab.com/ee/user/application_security/container_scanning/#customizing-the-container-scanning-settings 7 | # Note that environment variables can be set in several places 8 | # See https://docs.gitlab.com/ee/ci/variables/#cicd-variable-precedence 9 | stages: 10 | - test 11 | sast: 12 | stage: test 13 | include: 14 | - template: Security/SAST.gitlab-ci.yml 15 | -------------------------------------------------------------------------------- /CODEOWNERS: -------------------------------------------------------------------------------- 1 | CODEOWNERS @aws-solutions-library-samples/maintainers 2 | /.github/workflows/maintainer_workflows.yml @aws-solutions-library-samples/maintainers 3 | /.github/solutionid_validator.sh @aws-solutions-library-samples/maintainers 4 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | ## Code of Conduct 2 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). 3 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact 4 | opensource-codeofconduct@amazon.com with any additional questions or comments. 5 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing Guidelines 2 | 3 | Thank you for your interest in contributing to our project. Whether it's a bug report, new feature, correction, or additional 4 | documentation, we greatly value feedback and contributions from our community. 5 | 6 | Please read through this document before submitting any issues or pull requests to ensure we have all the necessary 7 | information to effectively respond to your bug report or contribution. 8 | 9 | 10 | ## Reporting Bugs/Feature Requests 11 | 12 | We welcome you to use the GitHub issue tracker to report bugs or suggest features. 13 | 14 | When filing an issue, please check existing open, or recently closed, issues to make sure somebody else hasn't already 15 | reported the issue. Please try to include as much information as you can. Details like these are incredibly useful: 16 | 17 | * A reproducible test case or series of steps 18 | * The version of our code being used 19 | * Any modifications you've made relevant to the bug 20 | * Anything unusual about your environment or deployment 21 | 22 | 23 | ## Contributing via Pull Requests 24 | Contributions via pull requests are much appreciated. Before sending us a pull request, please ensure that: 25 | 26 | 1. You are working against the latest source on the *main* branch. 27 | 2. You check existing open, and recently merged, pull requests to make sure someone else hasn't addressed the problem already. 28 | 3. You open an issue to discuss any significant work - we would hate for your time to be wasted. 29 | 30 | To send us a pull request, please: 31 | 32 | 1. Fork the repository. 33 | 2. Modify the source; please focus on the specific change you are contributing. If you also reformat all the code, it will be hard for us to focus on your change. 34 | 3. Ensure local tests pass. 35 | 4. Commit to your fork using clear commit messages. 36 | 5. Send us a pull request, answering any default questions in the pull request interface. 37 | 6. Pay attention to any automated CI failures reported in the pull request, and stay involved in the conversation. 38 | 39 | GitHub provides additional document on [forking a repository](https://help.github.com/articles/fork-a-repo/) and 40 | [creating a pull request](https://help.github.com/articles/creating-a-pull-request/). 41 | 42 | 43 | ## Finding contributions to work on 44 | Looking at the existing issues is a great way to find something to contribute on. As our projects, by default, use the default GitHub issue labels (enhancement/bug/duplicate/help wanted/invalid/question/wontfix), looking at any 'help wanted' issues is a great place to start. 45 | 46 | 47 | ## Code of Conduct 48 | This project has adopted the [Amazon Open Source Code of Conduct](https://aws.github.io/code-of-conduct). 49 | For more information see the [Code of Conduct FAQ](https://aws.github.io/code-of-conduct-faq) or contact 50 | opensource-codeofconduct@amazon.com with any additional questions or comments. 51 | 52 | 53 | ## Security issue notifications 54 | If you discover a potential security issue in this project we ask that you notify AWS/Amazon Security via our [vulnerability reporting page](http://aws.amazon.com/security/vulnerability-reporting/). Please do **not** create a public github issue. 55 | 56 | 57 | ## Licensing 58 | 59 | See the [LICENSE](LICENSE) file for our project's licensing. We will ask you to confirm the licensing of your contribution. 60 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT No Attribution 2 | 3 | Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of 6 | this software and associated documentation files (the "Software"), to deal in 7 | the Software without restriction, including without limitation the rights to 8 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 9 | the Software, and to permit persons to whom the Software is furnished to do so. 10 | 11 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 12 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 13 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 14 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 15 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 16 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Cellular Architecture 2 | 3 | Reducing the blast radius of failures. 4 | 5 | --- 6 | 7 | # Introduction 8 | 9 | Blast radius is defined as the maximum impact that might be sustained in the event of a system failure. Using a cellular architecture we can manage the size of the blast radius of any individual failure in a system. 10 | 11 | A cell-based architecture, is an advanced resiliency architecture which creates partitions of a system, a collective of replicas each of which serves only a subset of the system's overall set of clients. In this way, a failure which disrupts any one of the partitions is unlikely to affect the other partitions and the clients they support. 12 | 13 | Other infrastructure techniques such as redundancy across AWS Availability Zones or across AWS Regions will protect against disruptions to individual server instances, disruption to an entire availability zone, or disruption to a regional AWS service. But what about other types of disruption such as a malformed request, operator error, or a bad code deployment? 14 | 15 | These types of failures can be partially mitigated using techniques like canary deployments, static tests, code reviews and automation. But they only reduce the likelihood and at large scale even unlikely events can occur regularly. Using a cell-based architecture to implement fault isolation will reduce the blast radius of these kinds of failures. 16 | 17 | For example, if due to a human error a main database is wiped, this will only affect one cell. For 1000 users, that would be only around 0.1% of the users. Plus, recovering a database with 0.1% of the data is much faster than recovering one with 100% of the data. 18 | 19 | To learn more about cell-based architectures please watch [How to scale beyond limits with cell-based architectures](https://youtu.be/HUwz8uko7HY). 20 | 21 | # Architecture Overview 22 | 23 | The cell-based key value store deployed creates a simple router which is responsible for assigning users to individual cells. The client first authenticates to the router and is then given the address of the user's cell. To then get or set key / value pairs the client connects to their assigned cell using the session token provided by the router. 24 | 25 | ![Cell-based Architecture Diagram](content/images/arch_diagram.png "Architecture diagram of cell-based key/value store") 26 | 27 | 28 | # Solution Components 29 | 30 | ## Cells 31 | 32 | A key characteristic of cell-based architectures is that every cell in the system is standalone, isolated from the other cells, with no dependency on other cells. This results in multiple, smaller, copies of the entire application being deployed, each as an individual cell. 33 | 34 | The data required for or generated by each cell is also partitioned, so that there is no replication of data between cells. This complete isolation of cells from one another is what limits the blast radius of a failure. 35 | 36 | An additional benefit, aside from resilience to failure, is that the system naturally has an ability to linearly scale. To add additional clients you need only deploy additional cells. 37 | 38 | ## Router 39 | 40 | In this solution, clients first connect to the cell router. The router authenticates the user (or creates a new user if it's a registration request) and returns the DNS name of the customer's assigned cell. The client then makes any subsequent requests to through this DNS name. Note that this is only one way to implement a cell router. Different options are possible, for example ones that are transparent to the client or that work through DNS routing. 41 | 42 | # Design Considerations 43 | 44 | ## Number of cells 45 | 46 | There is a tradeoff to be managed when determining the size of your cells. Smaller cells have a reduced blast radius as they support fewer customers per cell. Smaller cells are easier to test and deploy. And smaller cells are easier to operate for their simplicity. However, they increase complexity as you are likely to have a greater number of small cells which could lead to operational burden if not well automated. 47 | 48 | Larger cells however provide cost efficiency, dedicating more users to any individual cell. This reduction in splits between the cells also means that there are fewer cells to manage, reducing operational burden. 49 | 50 | Larger cells are more easily operated and reduce operating costs but increase the blast radius of a failure while smaller cells decrease blast radius but increase operational burden if not well automated. A key point to remember is that many who adopt cell-based architectures start with a small number of large cells and over time decrease their cell size as their tooling and automation develops. 51 | 52 | ## Monitoring 53 | 54 | Creating multiple copies of a system to produce multiple, smaller cells will increase monitoring requirements. Each cell will individually need to be monitored for health, using both white-box and black-box monitoring and business metrics. You will also want to create aggregate metrics into healthy and unhealthy cells per metric. Finally, you will want to ensure that messages and errors can be easily correlated with the cells. 55 | 56 | ## Deployment of cells 57 | 58 | To manage the blast radius with respect to software changes within the cells we can think about release engineering concepts like fractional deployments. 59 | 60 | A blue / green deployment strategy is an example of a fractional deployment. Canary releases are another strategy - the key concept being making small, gradual changes to observe whether stability is maintained over the course of the release. 61 | 62 | It is important to allow for a 'bake time' during the release cycle. After every incremental step in the release process allow a period of time to observe whether the release is causing any adverse behavior. This bake time could be 15 min, an hour, or multiple hours depending on the characteristics of the system and when you have enough confidence that the release is working as expected. 63 | 64 | If the system stability looks like it may be waning don't hesitate to roll back before you hit a failure point. It is better to have a fractional failure than to have a whole sub-system fail. And be particularly careful with stateful changes that can't be sharded or partitioned as those tend to be one-way doors that are more challenging to recover from. 65 | 66 | ## Routing 67 | 68 | Any cell-based architecture will have a requirement to direct client traffic to individual cells. This requirement is implemented using a routing layer that sits between the client and the cells. 69 | 70 | There are different ways to implement the routing layer. Each one has advantages and disadvantages and different scenarios to which each is best suited. Below are three examples of how a routing layer can be implemented. 71 | 72 | The first type of routing layer acts as a load balancer. I.e., the router accepts connections and and forwards them to the cell, relaying any packages then back to the clients. This is transparent to clients as the clients are always speaking to a single fully qualified domain name resource. However, it puts the routing layer in the critical path for all transactions with the cells as they pass through the routing layer. 73 | 74 | The second type of routing layer will have the routing layer forwarding requests to the cells. This has the benefit of being simple to implement. Also, the routing layer is only needed during the initial connection of the client to the cell. After that any disruption to the routing layer will not disrupt already established client-cell connections. However, custom logic is required in the client to communicate with the routing layer to obtain the FQDN of the cell to which the client is assigned. This could also lead to a potentially higher latency. 75 | 76 | The third type of routing layer is to use the Domain Naming Service (DNS). This has the benefit that the DNS system is highly reliable and is simple to set up. However, it puts a requirement on the client to be able to map users to the right cell through DNS. 77 | 78 | ## Routing layer resilience 79 | 80 | As suggested above, the routing layer has the risk of being a single point of failure. To improve the resilience of the routing layer we can use patterns like separating the control plane from the data plane. We can also deploy multiple copies of the data plane behind a load balancer. And employ advanced patterns like constant work to improve the predictability of the routing layer data plane. 81 | 82 | ## Other considerations 83 | 84 | Moving to a cell-based architecture may require that the data architecture be re-examined, in order to partition the data and split it across multiple databases. 85 | 86 | Other considerations include whether cells should be in individual AWS accounts. Creating many AWS accounts can become unwieldy. They need to integrate with the billing, monitoring, etc. But on the other hand multiple accounts limit the blast radius of events such as compromised account credentials or service limits. 87 | 88 | Consider whether authentication should occur at the cell or the router, and where should the credentials be stored? Authentication can happen in the router or in the cells. Having it in the cells limits the blast radius while performing authentication and authorization at the routing layer adds simplicity. 89 | 90 | With multi-layered architectures cells need to talk to cells in other layers. A single Transit Gateway can orchestrate this but becomes a single point of failure. If frontend cells can be limited to a small set of backend cells then bilateral connections can be used. 91 | 92 | SSL Certificates can also be single points of failure if they are allowed to expire or if a bad update occurs making the certificate invalid for its host. Ideally each cell uses a unique certificate with a unique expiration date. 93 | 94 | Team boundaries should also be considered. Each layer of cells is handled by a single team. This team might provide the cell infrastructure for multiple teams, deploying services in the cell. Or teams can provide libraries that the cell owner uses. 95 | 96 | Disaster Recovery also needs to be reexamined. Failover to a different region (if required) can be handled by each cell or centrally. A failover strategy per cell can also help with catastrophic failures within a cell, for example when a database table is accidentally deleted. 97 | 98 | # Deployment 99 | 100 | ## Prerequisites 101 | 102 | 1. You will need a Linux or macOS environment in which to run commands. This can also be an EC2 instance or Cloud9 environment. 103 | 1. Docker, python and npm need to be installed. 104 | 1. You need to have an AWS account and have permissions to run commands in it. 105 | 1. The region needs to be set. (For example, by setting the environment variable AWS_REGION) 106 | 1. The routing layer and each cell will create an Elastic IP, VPC and NAT Gateway. You may need to raise the limits on these resources. (An initial limit increase often is auto-approved within a few seconds.) See https://docs.aws.amazon.com/general/latest/gr/aws_service_limits.html for details. Any stacks that fail with a limit being exceed you will need to destroy and manually recreate. It is always recommended to monitor AWS service limits and proactively increase them when you start approaching one of them. 107 | 108 | ## Folder Structure 109 | 110 | - cdk: Contains CDK files to build three stacks: One for ECR repos, one for the central routing components and one for cells. The last one will be deployed multiple times. 111 | - cell-container: The container and code that runs within a cell. 112 | - router-container: The container that runs within the cell router. 113 | - cellularctl: The cellularctl command line. 114 | - client: A python client. Mainly used for automated test of router functionality. 115 | - cells_for_codepipeline: Lambda function that runs as part of CodePipeline to orchestrate updates. 116 | 117 | ## Configure the `cellularctl` 118 | 119 | Most actions can be done via cellularctl. You need to install the required modules. 120 | 121 | A python venv can be created like this: 122 | ``` 123 | python -m venv .venv 124 | source .venv/bin/activate 125 | pip install -r requirements.txt 126 | ``` 127 | 128 | CDK can be installed globally like this: 129 | ``` 130 | npm install -g aws-cdk 131 | ``` 132 | 133 | 134 | Afterwards you can run cellularctl: 135 | 136 | ``` 137 | ./cellularctl 138 | ``` 139 | 140 | ## Deploy the Cell-based Architecture 141 | 142 | Using the commands below configure CDK to proceed without requiring interactive approval. Then instruct the `cellularctl` tool to deploy the cell-based architecture using CloudFormation and AWS CDK: 143 | 144 | ``` 145 | export cdkRequireApproval=never 146 | ./cellularctl setup deploy --createcells=True 147 | ``` 148 | 149 | For more about AWS CDK and its environment variables see https://docs.aws.amazon.com/cdk/v2/guide/cli.html#cli-deploy. 150 | 151 | The solution may take a number of minutes to fully deploy after the above steps. Check in Step Functions State machines view to look for completed execution on the `Cellular-CheckCanary` job. 152 | 153 | ### Updating the solution 154 | 155 | After updating the router CDK component run the following. This will trigger a CDK build but not deploy the routing container. 156 | 157 | ``` 158 | ./cellularctl router deploy 159 | ``` 160 | 161 | After updating the router CDK component run the following. This will trigger generate a new CDK template and upload it to S3. This in turn will trigger a CodePipeline that first updates the sandbox cell, checks it for aliveness and then updates all other cells. 162 | 163 | ``` 164 | ./cellularctl cell generate_template 165 | ``` 166 | 167 | To build and push a new container for the cell router run the following. '--deploy' will trigger a refresh in ECS, so that the current deployment uses the image. 168 | 169 | ``` 170 | ./cellularctl router build --deploy 171 | ``` 172 | 173 | To build and push a new container for the cells run the following. '--deploy' will trigger a refresh in ECS, so that the current deployment in each cell uses the image. 174 | 175 | ``` 176 | ./cellularctl cell build --deploy 177 | ``` 178 | 179 | To update everything run the following: 180 | 181 | ``` 182 | ./cellularctl router build --deploy 183 | ./cellularctl cell build --deploy 184 | ./cellularctl router deploy 185 | ./cellularctl cell generate_template 186 | ``` 187 | 188 | Note that most of these commands return before the update has fully finished deploying. You can observe the deployment in CodePipeline or ECS. 189 | 190 | ## Using the solution 191 | 192 | ### Allowing ingress 193 | 194 | Inbound traffic is blocked per default to the public load balancers of the solution. In order to use the solution, you need to allow traffic from your public IP address into the solution. (Opening the solution to the public would also be an option, but is not encouraged for security reasons.) There is a managed prefix list that is referenced by all public load balancers. You can add your public IP address using this command. 195 | 196 | ```bash 197 | # This will use checkip.amazonaws.com to retrieve your public IP address 198 | ./cellularctl setup allowingress 199 | 200 | # Or, if you want to specify your IP manually: 201 | myip=$(curl -4 checkip.amazonaws.com) 202 | ./cellularctl setup allowingress $myip 203 | ``` 204 | 205 | ### Authorisation and Authentification 206 | 207 | Note that the solution does not include any functionality for authorisation and authentification. For use in a production envirnoment, we strongly recommend adding identity and access management. Cf., for example https://docs.aws.amazon.com/wellarchitected/latest/security-pillar/identity-and-access-management.html. 208 | 209 | 210 | ### Using the `clientctl` convenience tool 211 | 212 | The cell-based architecture operates as a key / value store where it partitions users across the cells. To interact with the system begin by creating a couple of users: 213 | 214 | ```bash 215 | ./cellularctl router getdnsname 216 | export routerurl=$(./cellularctl router getdnsname) 217 | ./clientctl register user1 218 | ./clientctl register user2 219 | ``` 220 | 221 | Then put and get some key / value pairs using the different users: 222 | 223 | ```bash 224 | ./clientctl exec put user1 foo bar 225 | ./clientctl exec put user2 foo baz 226 | ./clientctl exec get user1 foo 227 | ./clientctl exec get user2 foo 228 | ``` 229 | 230 | To observe which cell a user is allocated to, you can use the `getcell` parameter 231 | ```bash 232 | ./clientctl exec getcell user1 233 | ./clientctl exec getcell user2 234 | ``` 235 | 236 | ### Using the `curl` tool 237 | 238 | To see how the communication is handled you can use the `curl` command to interact with the cell-based architecture. Using the same `routerurl` environment variable defined above try the following: 239 | 240 | ```bash 241 | # register a user3 with the system 242 | result=$(curl -X POST "$routerurl/register" -H 'Content-Type: application/json' -d '{"username": "user3"}') 243 | # store the api key for next steps 244 | user3apikey=$(echo $result | jq .apikey) 245 | 246 | # next login to the system to see which cell the user is assigned to 247 | result=$(curl -X POST "$routerurl/login" -H 'Content-Type: application/json' -d "{\"username\": \"user3\", \"apikey\": $user3apikey}") 248 | 249 | # save the cell DNS name and bearer token 250 | export CELL="http://$(echo $result | jq .dns_name_cell -r)" 251 | export TOKEN="$(echo $result | jq .token -r)" 252 | 253 | # put a value into the cell 254 | curl -X POST $CELL/put -H 'Content-Type: application/json' -H 'Authorization: Bearer '$TOKEN -d '{"key": "foo", "value": "biz"}' 255 | 256 | # get the same value back out 257 | curl -X POST $CELL/get -H 'Content-Type: application/json' -H 'Authorization: Bearer '$TOKEN -d '{"key": "foo"}' 258 | {"value":"biz"} 259 | ``` 260 | 261 | ### Getting cell assignments 262 | 263 | To list currently registered users run the following: 264 | 265 | ``` 266 | ./cellularctl user list 267 | ``` 268 | 269 | To get the cell assignment for a user (with name "username") run the following: 270 | 271 | ``` 272 | ./cellularctl user cell username 273 | ``` 274 | 275 | ## Uninstalling the Solution 276 | 277 | In order to uninstall the solution, delete all CloudFormation stacks that were created. You can use the following command to trigger a destroy action on all those stacks. 278 | 279 | ```bash 280 | ./cellularctl setup destroy 281 | ``` 282 | 283 | Note that the command does not wait for the actions to finish. Be sure to go to the CloudFormation console and clean up any stacks that failed to destroy automatically. 284 | 285 | # Advanced Patterns 286 | 287 | ## Decoupling storage and compute 288 | 289 | This solution bundles the storage and compute into an individual cell. However a cell-based architecture can be designed to have a stateless front layer and a stateful backend layer. The frontend can the be treated as disposable. If a deployment to a cell breaks, it can be discarded. 290 | 291 | In the backend we can implement recovery between cells. For example by replicating data in an active-passive fashion. Or even writing to multiple cells and using a quorum to guarantee consistency. 292 | 293 | Using a stateless frontend also opens the door to other patterns such as shuffle sharding. 294 | 295 | ## Cost Optimization 296 | 297 | Some resources in your architecture will have a fixed cost. Adding these resources to each cell, reduces the failure scenarios but increases their cost. Allocating the resources centrally means we need to be careful about making changes to them. 298 | 299 | We can also think about a tiered approach where we have some parts of the infrastructure where we have a small blast radius and therefore need fewer controls and central resources for which we need tighter controls. Or cells can be bundled. For example one VPC and the associated resources for 10 cells. This means that we still reduce the blast radius but at a tenth of the cost. 300 | 301 | ## Multi-cell Transactions 302 | 303 | Consider whether requests should span cells. This is a map-reduce approach where an external service coordinates a workflow across the cells. This service should go to the cells through their external endpoints and not access any internals directly. This may become necessary but will significantly increase the complexity of the solution so leverage this pattern with care. 304 | 305 | ## Scaling out for Complex Systems 306 | 307 | Larger, more complex systems can look like microservices. Many of the patterns from that world can apply here as well. A cell may internally still be composed of multiple microservices. The size of the cells is important and remember Conway's law which would have the system architecture reflect the organization's structure; this may not be the optimal system architecture. Complexity will increase as more layers are added. 308 | 309 | # Contributors 310 | 311 | The following individuals contributed to this document: 312 | 313 | - Robert Himmelmann 314 | - Jason Barto 315 | 316 | # Related Documentation 317 | 318 | https://docs.aws.amazon.com/wellarchitected/latest/reducing-scope-of-impact-with-cell-based-architecture/reducing-scope-of-impact-with-cell-based-architecture.html 319 | 320 | https://docs.aws.amazon.com/wellarchitected/latest/reliability-pillar/rel_fault_isolation_use_bulkhead.html 321 | 322 | # Revisions 323 | 324 | | Date | Change | 325 | | ---- | ------ | 326 | | Dec 2023 | Initial release | 327 | 328 | # Notices 329 | 330 | Customers are responsible for making their own independent assessment of the information in this document. This document: (a) is for informational purposes only, (b) represents AWS current product offerings and practices, which are subject to change without notice, and (c) does not create any commitments or assurances from AWS and its affiliates, suppliers or licensors. AWS products or services are provided “as is” without warranties, representations, or conditions of any kind, whether express or implied. AWS responsibilities and liabilities to its customers are controlled by AWS agreements, and this document is not part of, nor does it modify, any agreement between AWS and its customers. 331 | 332 | # AWS Glossary 333 | 334 | For the latest AWS terminology, see the [AWS glossary](https://docs.aws.amazon.com/general/latest/gr/glos-chap.html) in the AWS General Reference. 335 | 336 | --- 337 | 338 | # MARK FOR DELETION 339 | 340 | ## Scenarios 341 | 342 | This architecture is designed to limit the blast radius of bad deployments, human error and poison pill scenarios. Cells don't share resources and failure of once cell won't affect other cells. 343 | 344 | In order to simulate a bad deployment, uncomment the line 345 | ``` 346 | this.create_denyNacls(vpc) 347 | ``` 348 | 349 | in cell-stack.ts. Then execute the following: 350 | 351 | ``` 352 | ./cellularctl generate_template 353 | ``` 354 | 355 | This will generate and upload a new version of the cell template which now contains a Network ACL that denies all outgoing traffic for all resources in the cell VPC. 356 | The new template will trigger an execution of the codepipeline. The deployment to the sandbox cell will be successful but the canaries will fail afterwards and halt the deployment. 357 | Here, the canaries simulate a business metrics. For example, after a new deployment to a cell, a increase in support cases or a decrease in sales would be an indication that something is wrong and that the new change shouldn't be deployed to more cells. 358 | -------------------------------------------------------------------------------- /cellularctl: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | python source/cellularctl/cellularctl.py $* -------------------------------------------------------------------------------- /clientctl: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | python source/client/clientctl.py $* -------------------------------------------------------------------------------- /content/images/arch_diagram.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-solutions-library-samples/guidance-for-cell-based-architecture-on-aws/a86892b4760adf1812b0fcec4b10eae84959e788/content/images/arch_diagram.png -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | requests 2 | fire 3 | boto3 4 | flask 5 | flask_httpauth 6 | pyyaml 7 | -------------------------------------------------------------------------------- /source/cdk/.npmignore: -------------------------------------------------------------------------------- 1 | *.ts 2 | !*.d.ts 3 | 4 | # CDK asset staging directory 5 | .cdk.staging 6 | cdk.out 7 | -------------------------------------------------------------------------------- /source/cdk/README.md: -------------------------------------------------------------------------------- 1 | # Welcome to your CDK TypeScript project 2 | 3 | This is a blank project for CDK development with TypeScript. 4 | 5 | The `cdk.json` file tells the CDK Toolkit how to execute your app. 6 | 7 | ## Useful commands 8 | 9 | * `npm run build` compile typescript to js 10 | * `npm run watch` watch for changes and compile 11 | * `npm run test` perform the jest unit tests 12 | * `cdk deploy` deploy this stack to your default AWS account/region 13 | * `cdk diff` compare deployed stack with current state 14 | * `cdk synth` emits the synthesized CloudFormation template 15 | -------------------------------------------------------------------------------- /source/cdk/bin/cdk.ts: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env node 2 | import 'source-map-support/register'; 3 | import * as cdk from 'aws-cdk-lib'; 4 | import {CellStack} from './cell-stack'; 5 | import {ReposStack} from './repos-stack'; 6 | import {RouterStack} from './router-stack'; 7 | import {Tags} from 'aws-cdk-lib'; 8 | 9 | const app = new cdk.App(); 10 | 11 | new ReposStack(app, 'Cellular-Repos', { 12 | env: {account: process.env.CDK_DEFAULT_ACCOUNT, region: process.env.CDK_DEFAULT_REGION}, 13 | }); 14 | 15 | const cellStack = new CellStack(app, 'Cellular-Cell-sandbox', { 16 | env: {account: process.env.CDK_DEFAULT_ACCOUNT, region: process.env.CDK_DEFAULT_REGION}, 17 | }); 18 | Tags.of(cellStack).add('auto-delete', 'no') 19 | 20 | const routerStack = new RouterStack(app, 'Cellular-Router', { 21 | env: {account: process.env.CDK_DEFAULT_ACCOUNT, region: process.env.CDK_DEFAULT_REGION}, 22 | }); 23 | Tags.of(routerStack).add('auto-delete', 'no') -------------------------------------------------------------------------------- /source/cdk/bin/cell-stack.ts: -------------------------------------------------------------------------------- 1 | import * as cdk from 'aws-cdk-lib'; 2 | import { 3 | aws_dynamodb as dynamodb, 4 | aws_ec2 as ec2, 5 | aws_ecs as ecs, 6 | aws_ecs_patterns as ecsPatterns, 7 | aws_iam as iam, 8 | aws_kinesis as kinesis, 9 | Stack, 10 | StackProps 11 | } from 'aws-cdk-lib'; 12 | import {Construct} from 'constructs'; 13 | import * as synthetics from 'aws-cdk-lib/aws-synthetics'; 14 | import * as fs from 'fs'; 15 | import {AclCidr, AclTraffic, Action, TrafficDirection} from "aws-cdk-lib/aws-ec2"; 16 | 17 | export class CellStack extends Stack { 18 | constructor(scope: Construct, id: string, props?: StackProps) { 19 | super(scope, id, props); 20 | 21 | const cell_id = new cdk.CfnParameter(this, "cell_id", { 22 | type: "String", 23 | description: "The unique ID of this cell. (Will be appended to all resources)", 24 | // Necessary, e.g., for synthetics name. 25 | allowedPattern: '[a-z\\-0-9]*', 26 | constraintDescription: 'A name consists of lowercase letters, numbers, ' + 27 | 'hyphens or underscores with no spaces.' 28 | }); 29 | 30 | const image_uri = new cdk.CfnParameter(this, "image_uri", { 31 | type: "String", 32 | description: "URI of the repository", 33 | }); 34 | 35 | const datalakeStream = kinesis.Stream.fromStreamArn(this, 'dataLakeStream', 36 | cdk.Fn.importValue('cellDataLakeArn').toString()) 37 | const ddb_table = new dynamodb.Table(this, 'Cell-Table', { 38 | tableName: 'Cell-' + cell_id.valueAsString, 39 | partitionKey: {name: 'username', type: dynamodb.AttributeType.STRING}, 40 | sortKey: {name: 'key', type: dynamodb.AttributeType.STRING}, 41 | billingMode: dynamodb.BillingMode.PAY_PER_REQUEST, 42 | removalPolicy: cdk.RemovalPolicy.DESTROY, 43 | kinesisStream: datalakeStream, 44 | }) 45 | 46 | new cdk.CfnOutput(this, 'ddbTableName', { 47 | value: ddb_table.tableName, 48 | //exportName: 'ddbTableName' 49 | }); 50 | 51 | const vpc = new ec2.Vpc(this, 'Cell-VPC', { 52 | vpcName: "Cell-VPC-" + cell_id.valueAsString, 53 | ipAddresses: ec2.IpAddresses.cidr("10.0.0.0/16"), 54 | natGateways: 1 // For testing only 55 | }) 56 | // Uncomment this to simulate a bad deployment. 57 | // this.create_denyNacls(vpc) 58 | 59 | const service = this.ecs_service(vpc, cell_id, image_uri, ddb_table); 60 | 61 | this.create_canary(cell_id, service) 62 | } 63 | 64 | create_denyNacls(vpc: ec2.Vpc) { 65 | const nacl = new ec2.NetworkAcl(this, 'DenyAllNacl', { 66 | vpc: vpc, 67 | networkAclName: 'denyAllNacl', 68 | subnetSelection: { 69 | subnetType: ec2.SubnetType.PUBLIC, 70 | onePerAz: false, 71 | } 72 | }) 73 | nacl.addEntry('BlockAllOutgoing', { 74 | cidr: AclCidr.anyIpv4(), 75 | ruleNumber: 99, 76 | traffic: AclTraffic.allTraffic(), 77 | direction: TrafficDirection.EGRESS, 78 | ruleAction: Action.DENY 79 | }) 80 | } 81 | 82 | create_canary(cell_id: cdk.CfnParameter, service: ecsPatterns.ApplicationLoadBalancedFargateService) { 83 | const file = fs.readFileSync('./canary/canary-script-2.py') 84 | 85 | const canary = new synthetics.Canary(this, 'Inline Canary', { 86 | canaryName: 'cell-canary-' + cell_id.valueAsString, 87 | test: synthetics.Test.custom({ 88 | code: synthetics.Code.fromInline(file.toString()), 89 | handler: 'index.handler', 90 | }), 91 | environmentVariables: { 92 | dnsName: service.loadBalancer.loadBalancerDnsName, 93 | cellid: cell_id.valueAsString, 94 | }, 95 | schedule: synthetics.Schedule.rate(cdk.Duration.minutes(1)), 96 | startAfterCreation: true, 97 | runtime: new synthetics.Runtime('syn-python-selenium-4.0', synthetics.RuntimeFamily.PYTHON), 98 | timeToLive: cdk.Duration.hours(1), 99 | }); 100 | } 101 | 102 | ecs_service(vpc: ec2.Vpc, cell_id: cdk.CfnParameter, image_uri: cdk.CfnParameter, ddb_table: dynamodb.Table) 103 | : ecsPatterns.ApplicationLoadBalancedFargateService { 104 | 105 | const taskRole = new iam.Role(this, 'CellularRouterTaskRole', { 106 | assumedBy: new iam.ServicePrincipal('ecs-tasks.amazonaws.com'), 107 | inlinePolicies: { 108 | 'policy': new iam.PolicyDocument({ 109 | statements: [new iam.PolicyStatement({ 110 | actions: [ 111 | "dynamodb:BatchGet*", 112 | "dynamodb:DescribeStream", 113 | "dynamodb:DescribeTable", 114 | "dynamodb:Get*", 115 | "dynamodb:Query", 116 | "dynamodb:Scan", 117 | "dynamodb:BatchWrite*", 118 | "dynamodb:Delete*", 119 | "dynamodb:Update*", 120 | "dynamodb:PutItem" 121 | ], 122 | resources: ['arn:aws:dynamodb:*:*:table/' + ddb_table.tableName], 123 | })], 124 | }) 125 | } 126 | }); 127 | const executionRole = new iam.Role(this, 'CellularRouterExecutionRole', { 128 | assumedBy: new iam.ServicePrincipal('ecs-tasks.amazonaws.com'), 129 | inlinePolicies: { 130 | 'policy': new iam.PolicyDocument({ 131 | statements: [new iam.PolicyStatement({ 132 | actions: [ 133 | "ecr:GetAuthorizationToken", 134 | "ecr:BatchCheckLayerAvailability", 135 | "ecr:GetDownloadUrlForLayer", 136 | "ecr:BatchGetImage", 137 | "logs:CreateLogStream", 138 | "logs:PutLogEvents" 139 | ], 140 | resources: ['*'], 141 | })], 142 | }) 143 | } 144 | }); 145 | const cluster = new ecs.Cluster(this, 'Cell-Cluster', { 146 | vpc: vpc, 147 | clusterName: 'Cell-Cluster-' + cell_id.valueAsString, 148 | }); 149 | const service = new ecsPatterns.ApplicationLoadBalancedFargateService( 150 | this, 'Cell-Service', { 151 | cluster, 152 | loadBalancerName: "Cell-LoadBalancer-" + cell_id.valueAsString, 153 | serviceName: "Cell-Service-" + cell_id.valueAsString, 154 | memoryLimitMiB: 1024, 155 | cpu: 512, 156 | minHealthyPercent: 0, // Helps with faster deployments for testing 157 | taskImageOptions: { 158 | image: ecs.ContainerImage.fromRegistry(image_uri.valueAsString), 159 | environment: { 160 | cellId: cell_id.valueAsString, 161 | tableName: ddb_table.tableName, 162 | tableArn: ddb_table.tableArn, 163 | }, 164 | containerPort: 8080, 165 | taskRole: taskRole, 166 | executionRole: executionRole, 167 | }, 168 | openListener: false, 169 | }); 170 | service.targetGroup.setAttribute('deregistration_delay.timeout_seconds', '10'); 171 | 172 | const lbSecurityGroup = new ec2.SecurityGroup(this, 'lb-security-group', { 173 | vpc, 174 | description: 'Allow inbound prefix from the cell prefix list', 175 | allowAllOutbound: false, 176 | }); 177 | lbSecurityGroup.addIngressRule( 178 | ec2.Peer.prefixList(cdk.Fn.importValue('cellsInboundPrefixListId')), 179 | ec2.Port.tcp(80)); 180 | service.targetGroup.setAttribute('deregistration_delay.timeout_seconds', '10'); 181 | service.loadBalancer.addSecurityGroup(lbSecurityGroup) 182 | 183 | new cdk.CfnOutput(this, 'dnsName', { 184 | value: service.loadBalancer.loadBalancerDnsName, 185 | }); 186 | new cdk.CfnOutput(this, 'serviceName', { 187 | value: service.service.serviceName, 188 | }); 189 | new cdk.CfnOutput(this, 'clusterName', { 190 | value: service.cluster.clusterName, 191 | }); 192 | return service; 193 | } 194 | } 195 | -------------------------------------------------------------------------------- /source/cdk/bin/repos-stack.ts: -------------------------------------------------------------------------------- 1 | import { 2 | Stack, StackProps, aws_ecr as ecr, aws_ec2 as ec2, aws_secretsmanager as secretsmanager, 3 | } from 'aws-cdk-lib'; 4 | import {Construct} from 'constructs'; 5 | import * as cdk from 'aws-cdk-lib'; 6 | 7 | export class ReposStack extends Stack { 8 | constructor(scope: Construct, id: string, props?: StackProps) { 9 | super(scope, id, props); 10 | 11 | const repoCell = new ecr.Repository(this, 'RepoCell', { 12 | repositoryName: 'cellular_cell', 13 | imageScanOnPush: true, 14 | }); 15 | new cdk.CfnOutput(this, 'repoCellUri', { 16 | value: repoCell.repositoryUri, 17 | exportName: 'repoCellUri' 18 | }); 19 | 20 | const repoRouting = new ecr.Repository(this, 'RepoRouting', { 21 | repositoryName: 'cellular_routing', 22 | imageScanOnPush: true, 23 | }); 24 | new cdk.CfnOutput(this, 'repoRoutingUri', { 25 | value: repoRouting.repositoryUri, 26 | exportName: 'repoRoutingUri' 27 | }); 28 | 29 | const prefixList = new ec2.PrefixList(this, 'inboundPrefixList', { 30 | maxEntries: 20, 31 | }); 32 | new cdk.CfnOutput(this, 'inboundPrefixListId', { 33 | value: prefixList.prefixListId, 34 | exportName: 'cellsInboundPrefixListId' 35 | }); 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /source/cdk/bin/router-stack.ts: -------------------------------------------------------------------------------- 1 | import { 2 | Stack, StackProps, aws_ecs_patterns as ecsPatterns, aws_ecs as ecs, aws_ec2 as ec2, 3 | aws_s3 as s3, aws_stepfunctions as sfn, aws_codepipeline as codepipeline, 4 | aws_codepipeline_actions as codepipeline_actions, aws_iam as iam, aws_dynamodb as dynamodb, aws_lambda as lambda, 5 | aws_kinesis as kinesis, 6 | } from 'aws-cdk-lib'; 7 | import * as firehose from '@aws-cdk/aws-kinesisfirehose-alpha'; 8 | import * as firehose_destinations from '@aws-cdk/aws-kinesisfirehose-destinations-alpha'; 9 | import {Construct} from 'constructs'; 10 | import * as cdk from 'aws-cdk-lib'; 11 | import * as fs from 'fs'; 12 | import * as path from 'path'; 13 | import { DefinitionBody } from 'aws-cdk-lib/aws-stepfunctions'; 14 | 15 | export class RouterStack extends Stack { 16 | constructor(scope: Construct, id: string, props?: StackProps) { 17 | super(scope, id, props); 18 | 19 | const image_uri = new cdk.CfnParameter(this, "image_uri", { 20 | type: "String", 21 | description: "URI of the repository", 22 | }); 23 | 24 | const vpc = new ec2.Vpc(this, 'Router-VPC', { 25 | vpcName: "CellRouter-VPC", 26 | ipAddresses: ec2.IpAddresses.cidr("10.0.0.0/16"), 27 | natGateways: 1 // For testing only 28 | }) 29 | 30 | const cells_table = new dynamodb.Table(this, 'Cells-Table', { 31 | tableName: 'Cellular-Routing-Cells', 32 | partitionKey: {name: 'cell_id', type: dynamodb.AttributeType.STRING}, 33 | billingMode: dynamodb.BillingMode.PAY_PER_REQUEST, 34 | removalPolicy: cdk.RemovalPolicy.DESTROY, 35 | }) 36 | new cdk.CfnOutput(this, 'cellsTable', { 37 | value: cells_table.tableName, 38 | }); 39 | 40 | const users_table = new dynamodb.Table(this, 'Users-Table', { 41 | tableName: 'Cellular-Routing-Users', 42 | partitionKey: {name: 'username', type: dynamodb.AttributeType.STRING}, 43 | billingMode: dynamodb.BillingMode.PAY_PER_REQUEST, 44 | removalPolicy: cdk.RemovalPolicy.DESTROY, 45 | }) 46 | new cdk.CfnOutput(this, 'usersTable', { 47 | value: users_table.tableName, 48 | }); 49 | 50 | const bucket = this.bucket(); 51 | 52 | const updateCellsSm = this.createAndUpdateStatemachines() 53 | 54 | this.ecsService(vpc, image_uri, cells_table, users_table) 55 | 56 | const checkCanarySm = this.checkcanaryStatemachine() 57 | 58 | const pipelineLambda = this.pipelineLambda(cells_table, bucket, updateCellsSm) 59 | 60 | this.codePipeline(bucket, updateCellsSm, checkCanarySm, pipelineLambda) 61 | 62 | this.dataLakeStream(bucket) 63 | } 64 | 65 | bucket(): s3.Bucket { 66 | const bucket = new s3.Bucket(this, 'bucket', { 67 | bucketName: 'cellular-arch-' + cdk.Stack.of(this).account + '-' + cdk.Stack.of(this).region, 68 | removalPolicy: cdk.RemovalPolicy.DESTROY, 69 | autoDeleteObjects: true, 70 | versioned: true, 71 | }); 72 | 73 | new cdk.CfnOutput(this, 'bucketName', { 74 | value: bucket.bucketName, 75 | }); 76 | new cdk.CfnOutput(this, 'bucketRegionalDomainName', { 77 | value: bucket.bucketRegionalDomainName, 78 | }); 79 | return bucket 80 | } 81 | 82 | ecsService(vpc: ec2.Vpc, image_uri: cdk.CfnParameter, cells_table: dynamodb.Table, users_table: dynamodb.Table) 83 | : ecsPatterns.ApplicationLoadBalancedFargateService { 84 | const executionRole = new iam.Role(this, 'CellularRouterExecutionRole', { 85 | assumedBy: new iam.ServicePrincipal('ecs-tasks.amazonaws.com'), 86 | inlinePolicies: { 87 | 'policy': new iam.PolicyDocument({ 88 | statements: [new iam.PolicyStatement({ 89 | actions: [ 90 | "ecr:GetAuthorizationToken", 91 | "ecr:BatchCheckLayerAvailability", 92 | "ecr:GetDownloadUrlForLayer", 93 | "ecr:BatchGetImage", 94 | "logs:CreateLogStream", 95 | "logs:PutLogEvents" 96 | ], 97 | resources: ['*'], 98 | })], 99 | }) 100 | } 101 | }); 102 | 103 | const taskRole = new iam.Role(this, 'CellularRouterTaskRole', { 104 | assumedBy: new iam.ServicePrincipal('ecs-tasks.amazonaws.com'), 105 | inlinePolicies: { 106 | 'policy': new iam.PolicyDocument({ 107 | statements: [new iam.PolicyStatement({ 108 | actions: [ 109 | "dynamodb:BatchGet*", 110 | "dynamodb:DescribeStream", 111 | "dynamodb:DescribeTable", 112 | "dynamodb:Get*", 113 | "dynamodb:Query", 114 | "dynamodb:Scan", 115 | "dynamodb:BatchWrite*", 116 | "dynamodb:Delete*", 117 | "dynamodb:Update*", 118 | "dynamodb:PutItem" 119 | ], 120 | resources: [ 121 | 'arn:aws:dynamodb:*:*:table/' + cells_table.tableName, 122 | 'arn:aws:dynamodb:*:*:table/' + users_table.tableName, 123 | ], 124 | }), new iam.PolicyStatement({ 125 | actions: [ 126 | "cloudformation:DescribeStacks", 127 | ], 128 | resources: [ 129 | '*' 130 | ], 131 | })], 132 | }) 133 | } 134 | }); 135 | 136 | 137 | const cluster = new ecs.Cluster(this, 'Cell-Cluster', { 138 | vpc: vpc, 139 | clusterName: 'Cell-Router', 140 | }); 141 | const service = new ecsPatterns.ApplicationLoadBalancedFargateService( 142 | this, 'CellRouter-Service', { 143 | cluster, 144 | loadBalancerName: "CellRouter-LoadBalancer", 145 | serviceName: "CellRouter-Service", 146 | memoryLimitMiB: 1024, 147 | cpu: 512, 148 | minHealthyPercent: 0, // Helps with faster deployments for testing 149 | taskImageOptions: { 150 | image: ecs.ContainerImage.fromRegistry(image_uri.valueAsString), 151 | environment: { 152 | cellsTableName: cells_table.tableName, 153 | usersTableName: users_table.tableName, 154 | }, 155 | containerPort: 8080, 156 | executionRole, 157 | taskRole, 158 | }, 159 | openListener: false, 160 | }); 161 | 162 | const lbSecurityGroup = new ec2.SecurityGroup(this, 'lb-security-group', { 163 | vpc, 164 | description: 'Allow inbound prefix from the cell prefix list', 165 | allowAllOutbound: false, 166 | }); 167 | lbSecurityGroup.addIngressRule( 168 | ec2.Peer.prefixList(cdk.Fn.importValue('cellsInboundPrefixListId')), 169 | ec2.Port.tcp(80)); 170 | service.targetGroup.setAttribute('deregistration_delay.timeout_seconds', '10'); 171 | service.loadBalancer.addSecurityGroup(lbSecurityGroup) 172 | 173 | new cdk.CfnOutput(this, 'serviceName', { 174 | value: service.service.serviceName, 175 | }); 176 | new cdk.CfnOutput(this, 'clusterName', { 177 | value: service.cluster.clusterName, 178 | }); 179 | new cdk.CfnOutput(this, 'dnsName', { 180 | value: service.loadBalancer.loadBalancerDnsName, 181 | }); 182 | 183 | return service; 184 | } 185 | 186 | codePipeline(bucket: s3.Bucket, 187 | updateCellsSm: sfn.StateMachine, 188 | checkCanarySm: sfn.StateMachine, 189 | pipelineLambda: lambda.Function) { 190 | const pipeline = new codepipeline.Pipeline(this, 'CellPipeline', { 191 | pipelineName: 'CellPipeline' 192 | }) 193 | 194 | const sourceOutput = new codepipeline.Artifact(); 195 | pipeline.addStage({ 196 | stageName: 'Source', 197 | actions: [new codepipeline_actions.S3SourceAction({ 198 | actionName: 'S3Source', 199 | bucket: bucket, 200 | bucketKey: 'template_cell.yaml', 201 | output: sourceOutput, 202 | })], 203 | }); 204 | 205 | pipeline.addStage({ 206 | stageName: 'DeployToSandbox', 207 | actions: [new codepipeline_actions.StepFunctionInvokeAction({ 208 | actionName: 'Invoke', 209 | stateMachine: updateCellsSm, 210 | stateMachineInput: codepipeline_actions.StateMachineInput.literal( 211 | { 212 | cellIds: ['sandbox'], 213 | templateUrl: 'https://' + bucket.bucketRegionalDomainName + '/template_cell.yaml', 214 | }), 215 | })], 216 | }); 217 | 218 | pipeline.addStage({ 219 | stageName: 'CheckCanaryForSandbox', 220 | actions: [new codepipeline_actions.StepFunctionInvokeAction({ 221 | actionName: 'Invoke', 222 | stateMachine: checkCanarySm, 223 | stateMachineInput: codepipeline_actions.StateMachineInput.literal( 224 | { 225 | cellIds: ['sandbox'], 226 | waitseconds: 360, 227 | }), 228 | })], 229 | }); 230 | 231 | pipeline.addStage({ 232 | stageName: 'DeployToOtherCells', 233 | actions: [new codepipeline_actions.LambdaInvokeAction({ 234 | actionName: 'Invoke', 235 | lambda: pipelineLambda, 236 | variablesNamespace: 'cells', 237 | userParameters: { 238 | 'action': 'ALL_ACTIVE_EXCEPT_SANDBOX' 239 | } 240 | })], 241 | }); 242 | } 243 | 244 | pipelineLambda(cells_table: dynamodb.Table, bucket: s3.Bucket, updateCellsFunction: sfn.StateMachine) { 245 | const role = new iam.Role(this, 'CellLambdaRole', { 246 | assumedBy: new iam.ServicePrincipal('lambda.amazonaws.com'), 247 | inlinePolicies: { 248 | 'policy': new iam.PolicyDocument({ 249 | statements: [new iam.PolicyStatement({ 250 | actions: [ 251 | "dynamodb:BatchGet*", 252 | "dynamodb:DescribeStream", 253 | "dynamodb:DescribeTable", 254 | "dynamodb:Get*", 255 | "dynamodb:Query", 256 | "dynamodb:Scan", 257 | "dynamodb:BatchWrite*", 258 | "dynamodb:Delete*", 259 | "dynamodb:Update*", 260 | "dynamodb:PutItem" 261 | ], 262 | resources: [ 263 | 'arn:aws:dynamodb:*:*:table/' + cells_table.tableName, 264 | ], 265 | }), new iam.PolicyStatement({ 266 | actions: [ 267 | "logs:CreateLogGroup", 268 | "logs:CreateLogStream", 269 | "logs:PutLogEvents", 270 | ], 271 | resources: [ 272 | '*', 273 | ], 274 | }), new iam.PolicyStatement({ 275 | actions: [ 276 | "states:StartExecution", 277 | ], 278 | resources: [ 279 | updateCellsFunction.stateMachineArn 280 | ], 281 | }) 282 | ], 283 | }) 284 | } 285 | }); 286 | 287 | return new lambda.Function(this, 'CellPipelineLambda', { 288 | functionName: 'cellPipelineLambda', 289 | //code: lambda.Code.fromAsset(path.join(__dirname, '../lambda/cells_for_codepipeline'), { 290 | code: lambda.Code.fromAsset(path.join(__dirname, '../../cells_for_codepipeline'), { 291 | bundling: { 292 | image: lambda.Runtime.PYTHON_3_9.bundlingImage, 293 | command: [ 294 | 'bash', '-c', 295 | 'pip install -r requirements.txt -t /asset-output && cp -au . /asset-output' 296 | ], 297 | }, 298 | }), 299 | runtime: lambda.Runtime.PYTHON_3_9, 300 | handler: 'handler.handler', 301 | role: role, 302 | environment: { 303 | 'cellsTable': cells_table.tableName, 304 | 'templateUrl': 'https://' + bucket.bucketRegionalDomainName + '/template_cell.yaml', 305 | 'updateCellsFunctionArn': updateCellsFunction.stateMachineArn, 306 | 'templateBucketName': bucket.bucketRegionalDomainName, 307 | } 308 | }); 309 | } 310 | 311 | createAndUpdateStatemachines(): sfn.StateMachine { 312 | const deploymentRole = new iam.Role(this, 'CellularRouterSfnRole', { 313 | assumedBy: new iam.ServicePrincipal('states.amazonaws.com'), 314 | roleName: 'CellBasedSfnRole', 315 | inlinePolicies: { 316 | 'policy': new iam.PolicyDocument({ 317 | statements: [new iam.PolicyStatement({ 318 | actions: [ 319 | "cloudformation:*", 320 | "s3:*", 321 | "ssm:GetParameters", 322 | "ec2:*", 323 | "dynamodb:*", 324 | "ecs:*", 325 | "iam:*", 326 | "logs:*", 327 | "elasticloadbalancingv2:*", 328 | "elasticloadbalancing:*", 329 | "states:*", 330 | "synthetics:*", 331 | "lambda:*", 332 | "kinesis:*", 333 | "codepipeline:PutJobSuccessResult", 334 | "codepipeline:PutJobFailureResult", 335 | ], 336 | resources: ['*'], 337 | })], 338 | }) 339 | } 340 | }); 341 | 342 | const createCellFunction = this.create_statemachine( 343 | 'CreateCellSfn', 344 | 'Cellular-Create-Cell', 345 | 'sfn_create_cell.asl.json', 346 | deploymentRole); 347 | new cdk.CfnOutput(this, 'create-cell-function', { 348 | value: createCellFunction.stateMachineArn, 349 | }); 350 | 351 | const updateStatemachine = this.create_statemachine( 352 | 'UpdateCellsSfn', 353 | 'Cellular-Update-Cells', 354 | 'sfn_update_cell.asl.json', 355 | deploymentRole); 356 | 357 | new cdk.CfnOutput(this, 'update-cells-function', { 358 | value: updateStatemachine.stateMachineArn, 359 | }); 360 | 361 | return updateStatemachine 362 | } 363 | 364 | checkcanaryStatemachine() { 365 | const role = new iam.Role(this, 'CellularRouterSfnCanaryRole', { 366 | assumedBy: new iam.ServicePrincipal('states.amazonaws.com'), 367 | roleName: 'CellBasedSfnCanaryRole', 368 | inlinePolicies: { 369 | 'policy': new iam.PolicyDocument({ 370 | statements: [new iam.PolicyStatement({ 371 | actions: [ 372 | "logs:*", 373 | "states:*", 374 | "synthetics:*", 375 | "codepipeline:PutJobSuccessResult", 376 | "codepipeline:PutJobFailureResult", 377 | ], 378 | resources: ['*'], 379 | })], 380 | }) 381 | } 382 | }); 383 | 384 | const statemachine = this.create_statemachine( 385 | 'CellsCanarySfn', 386 | 'Cellular-CheckCanary', 387 | 'sfn_check_canary.asl.json', 388 | role); 389 | 390 | new cdk.CfnOutput(this, 'check-canary-statemachine', { 391 | value: statemachine.stateMachineArn, 392 | }); 393 | 394 | return statemachine 395 | } 396 | 397 | create_statemachine(constructName: string, sfnName: string, fileName: string, 398 | role: iam.Role) { 399 | const file = fs.readFileSync('./statemachines/' + fileName); 400 | const statemachine = new sfn.StateMachine(this, constructName, { 401 | stateMachineName: sfnName, 402 | definitionBody: DefinitionBody.fromChainable(new sfn.Pass(this, constructName + 'StartState')), 403 | role: role, 404 | }); 405 | const cfnStatemachine = statemachine.node.defaultChild as sfn.CfnStateMachine; 406 | cfnStatemachine.definitionString = file.toString(); 407 | return statemachine; 408 | } 409 | 410 | dataLakeStream(bucket: s3.Bucket) { 411 | const stream = new kinesis.Stream(this, 'cellDataLakeStream', { 412 | streamName: 'cell-datalake-stream', 413 | }); 414 | 415 | new cdk.CfnOutput(this, 'dataLakeStream', { 416 | value: stream.streamArn, 417 | exportName: 'cellDataLakeArn' 418 | }); 419 | 420 | new firehose.DeliveryStream(this, 'Delivery Stream', { 421 | sourceStream: stream, 422 | destinations: [new firehose_destinations.S3Bucket(bucket, { 423 | dataOutputPrefix: 'datalake', 424 | })], 425 | }); 426 | } 427 | } 428 | -------------------------------------------------------------------------------- /source/cdk/canary/canary-script-2.py: -------------------------------------------------------------------------------- 1 | import json 2 | import http.client 3 | import urllib.parse 4 | import boto3 5 | import os 6 | from aws_synthetics.selenium import synthetics_webdriver as syn_webdriver 7 | from aws_synthetics.common import synthetics_logger as logger 8 | 9 | dnsNameCell = os.environ.get('dnsName') 10 | cellid = os.environ.get('cellid') 11 | 12 | def verify_request(method, url, post_data=None, headers={}): 13 | parsed_url = urllib.parse.urlparse(url) 14 | #user_agent = str(syn_webdriver.get_canary_user_agent_string()) 15 | #if "User-Agent" in headers: 16 | # headers["User-Agent"] = " ".join([user_agent, headers["User-Agent"]]) 17 | #else: 18 | # headers["User-Agent"] = "{}".format(user_agent) 19 | 20 | logger.info("Making request with Method: '%s' URL: %s: Data: %s Headers: %s" % ( 21 | method, url, post_data, json.dumps(headers))) 22 | 23 | if parsed_url.scheme == "https": 24 | conn = http.client.HTTPSConnection(parsed_url.hostname, parsed_url.port) 25 | else: 26 | conn = http.client.HTTPConnection(parsed_url.hostname, parsed_url.port) 27 | 28 | conn.request(method, url, json.dumps(post_data), headers) 29 | response = conn.getresponse() 30 | logger.info("Status Code: %s " % response.status) 31 | logger.info("Response Headers: %s" % json.dumps(response.headers.as_string())) 32 | 33 | if not response.status or response.status < 200 or response.status > 299: 34 | try: 35 | logger.error("Response: %s" % response.read().decode()) 36 | finally: 37 | if response.reason: 38 | conn.close() 39 | raise Exception("Failed: %s" % response.reason) 40 | else: 41 | conn.close() 42 | raise Exception("Failed with status code: %s" % response.status) 43 | 44 | logger.info("Response: %s" % response.read().decode()) 45 | logger.info("HTTP request successfully executed") 46 | conn.close() 47 | 48 | def main(): 49 | url1 = 'http://' + dnsNameCell + '/put' 50 | method1 = 'POST' 51 | postData1 = {'key': 'canary_test', 'value': 'value'} 52 | headers1 = { 53 | "Authorization": "Bearer canary", 54 | 'Content-type': 'application/json', 55 | } 56 | verify_request(method1, url1, postData1, headers1) 57 | 58 | logger.info("Canary successfully executed") 59 | 60 | def handler(event, context): 61 | logger.info("Selenium Python API canary") 62 | main() -------------------------------------------------------------------------------- /source/cdk/canary/requirements.txt: -------------------------------------------------------------------------------- 1 | pyjwt 2 | requests -------------------------------------------------------------------------------- /source/cdk/cdk.json: -------------------------------------------------------------------------------- 1 | { 2 | "app": "npx ts-node --prefer-ts-exts bin/cdk.ts", 3 | "requireApproval": "never", 4 | "watch": { 5 | "include": [ 6 | "**" 7 | ], 8 | "exclude": [ 9 | "README.md", 10 | "cdk*.json", 11 | "**/*.d.ts", 12 | "**/*.js", 13 | "tsconfig.json", 14 | "package*.json", 15 | "yarn.lock", 16 | "node_modules", 17 | "test" 18 | ] 19 | }, 20 | "context": { 21 | "@aws-cdk/aws-apigateway:usagePlanKeyOrderInsensitiveId": true, 22 | "@aws-cdk/core:stackRelativeExports": true, 23 | "@aws-cdk/aws-rds:lowercaseDbIdentifier": true, 24 | "@aws-cdk/aws-lambda:recognizeVersionProps": true, 25 | "@aws-cdk/aws-lambda:recognizeLayerVersion": true, 26 | "@aws-cdk/aws-cloudfront:defaultSecurityPolicyTLSv1.2_2021": true, 27 | "@aws-cdk-containers/ecs-service-extensions:enableDefaultLogDriver": true, 28 | "@aws-cdk/aws-ec2:uniqueImdsv2TemplateName": true, 29 | "@aws-cdk/core:checkSecretUsage": true, 30 | "@aws-cdk/aws-iam:minimizePolicies": true, 31 | "@aws-cdk/core:validateSnapshotRemovalPolicy": true, 32 | "@aws-cdk/aws-codepipeline:crossAccountKeyAliasStackSafeResourceName": true, 33 | "@aws-cdk/core:target-partitions": [ 34 | "aws", 35 | "aws-cn" 36 | ] 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /source/cdk/jest.config.js: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | testEnvironment: 'node', 3 | roots: ['/test'], 4 | testMatch: ['**/*.test.ts'], 5 | transform: { 6 | '^.+\\.tsx?$': 'ts-jest' 7 | } 8 | }; 9 | -------------------------------------------------------------------------------- /source/cdk/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "cdk", 3 | "version": "0.1.0", 4 | "bin": { 5 | "cdk": "bin/cdk.js" 6 | }, 7 | "scripts": { 8 | "build": "tsc", 9 | "watch": "tsc -w", 10 | "test": "jest", 11 | "cdk": "cdk" 12 | }, 13 | "devDependencies": { 14 | "@aws-cdk/aws-kinesisfirehose-destinations-alpha": "^2.100.0-alpha.0", 15 | "@types/jest": "^29.5.5", 16 | "@types/node": "^16.0.0", 17 | "@types/prettier": "^2.6.0", 18 | "aws-cdk": "^2.100.0", 19 | "constructs": "^10.3.0", 20 | "jest": "^29.7.0", 21 | "source-map-support": "^0.5.21", 22 | "ts-jest": "^29.0.0", 23 | "ts-node": "^10.9.1", 24 | "typescript": "^5.2.2" 25 | }, 26 | "dependencies": { 27 | "@aws-cdk/aws-kinesisfirehose-alpha": "^2.100.0-alpha.0", 28 | "@aws-cdk/aws-kinesisfirehose-destinations-alpha": "^2.100.0-alpha.0", 29 | "aws-cdk-lib": "^2.100.0", 30 | "constructs": "^10.3.0", 31 | "source-map-support": "^0.5.21" 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /source/cdk/statemachines/sfn_check_canary.asl.json: -------------------------------------------------------------------------------- 1 | { 2 | "Comment": "A description of my state machine", 3 | "StartAt": "Map", 4 | "States": { 5 | "Map": { 6 | "Type": "Map", 7 | "Next": "Choice (3)", 8 | "Parameters": { 9 | "cellId.$": "$$.Map.Item.Value", 10 | "waitseconds.$": "$.waitseconds" 11 | }, 12 | "Iterator": { 13 | 14 | "StartAt": "Pass", 15 | "States": { 16 | "Pass": { 17 | "Type": "Pass", 18 | "Next": "GetCanary", 19 | "Parameters": { 20 | "cellId.$": "$.cellId", 21 | "canaryname.$": "States.Format('cell-canary-{}', $.cellId)", 22 | "waitseconds.$": "$.waitseconds" 23 | } 24 | }, 25 | "GetCanary": { 26 | "Type": "Task", 27 | "Next": "Choice (1)", 28 | "Parameters": { 29 | "Name.$": "$.canaryname" 30 | }, 31 | "Resource": "arn:aws:states:::aws-sdk:synthetics:getCanary", 32 | "ResultPath": "$.canary", 33 | "ResultSelector": { 34 | "status.$": "$.Canary.Status.State" 35 | } 36 | }, 37 | "Choice (1)": { 38 | "Type": "Choice", 39 | "Choices": [ 40 | { 41 | "Variable": "$.canary.status", 42 | "StringEquals": "RUNNING", 43 | "Next": "Wait" 44 | } 45 | ], 46 | "Default": "StartCanary" 47 | }, 48 | "StartCanary": { 49 | "Type": "Task", 50 | "Next": "Wait", 51 | "Parameters": { 52 | "Name.$": "$.canaryname" 53 | }, 54 | "Resource": "arn:aws:states:::aws-sdk:synthetics:startCanary", 55 | "ResultPath": null 56 | }, 57 | "Wait": { 58 | "Type": "Wait", 59 | "Next": "GetCanaryRuns", 60 | "SecondsPath": "$.waitseconds" 61 | }, 62 | "GetCanaryRuns": { 63 | "Type": "Task", 64 | "Parameters": { 65 | "Name.$": "$.canaryname", 66 | "MaxResults": 1 67 | }, 68 | "Resource": "arn:aws:states:::aws-sdk:synthetics:getCanaryRuns", 69 | "ResultPath": "$.lastrun", 70 | "ResultSelector": { 71 | "Status.$": "$.CanaryRuns[0].Status", 72 | "Timeline.$": "$.CanaryRuns[0].Timeline" 73 | }, 74 | "Next": "Choice" 75 | }, 76 | "Choice": { 77 | "Type": "Choice", 78 | "Choices": [ 79 | { 80 | "Variable": "$.lastrun.Status.State", 81 | "StringEquals": "PASSED", 82 | "Next": "Success" 83 | } 84 | ], 85 | "Default": "Fail" 86 | }, 87 | "Success": { 88 | "Type": "Succeed" 89 | }, 90 | "Fail": { 91 | "Type": "Fail" 92 | } 93 | } 94 | }, 95 | "ItemsPath": "$.cellIds", 96 | "Catch": [ 97 | { 98 | "ErrorEquals": [ 99 | "States.ALL" 100 | ], 101 | "Next": "Choice (2)", 102 | "ResultPath": "$.error" 103 | } 104 | ], 105 | "ResultPath": "$.map" 106 | }, 107 | "Choice (3)": { 108 | "Type": "Choice", 109 | "Choices": [ 110 | { 111 | "Variable": "$.pipeline_id", 112 | "IsPresent": true, 113 | "Next": "PutJobSuccessResult" 114 | } 115 | ], 116 | "Default": "Success (1)" 117 | }, 118 | "PutJobSuccessResult": { 119 | "Type": "Task", 120 | "Parameters": { 121 | "JobId.$": "$.pipeline_id" 122 | }, 123 | "Resource": "arn:aws:states:::aws-sdk:codepipeline:putJobSuccessResult", 124 | "Next": "Success (1)" 125 | }, 126 | "Choice (2)": { 127 | "Type": "Choice", 128 | "Choices": [ 129 | { 130 | "Variable": "$.pipeline_id", 131 | "IsPresent": true, 132 | "Next": "PutJobFailureResult (1)" 133 | } 134 | ], 135 | "Default": "Fail (1)" 136 | }, 137 | "PutJobFailureResult (1)": { 138 | "Type": "Task", 139 | "Next": "Fail (1)", 140 | "Parameters": { 141 | "FailureDetails": { 142 | "Message": "Failed", 143 | "Type": "JobFailed" 144 | }, 145 | "JobId.$": "$.pipeline_id" 146 | }, 147 | "Resource": "arn:aws:states:::aws-sdk:codepipeline:putJobFailureResult" 148 | }, 149 | "Fail (1)": { 150 | "Type": "Fail" 151 | }, 152 | "Success (1)": { 153 | "Type": "Succeed" 154 | } 155 | } 156 | } -------------------------------------------------------------------------------- /source/cdk/statemachines/sfn_create_cell.asl.json: -------------------------------------------------------------------------------- 1 | { 2 | "Comment": "A description of my state machine", 3 | "StartAt": "Pass", 4 | "States": { 5 | "Pass": { 6 | "Type": "Pass", 7 | "Next": "DynamoDB PutItem", 8 | "Parameters": { 9 | "stackName.$": "States.Format('Cellular-Cell-{}', $.cellId)", 10 | "cellId.$": "$.cellId", 11 | "templateUrl.$": "$.templateUrl", 12 | "stage.$": "$.stage", 13 | "imageuri.$": "$.imageuri" 14 | } 15 | }, 16 | "DynamoDB PutItem": { 17 | "Type": "Task", 18 | "Resource": "arn:aws:states:::dynamodb:putItem", 19 | "Parameters": { 20 | "TableName": "Cellular-Routing-Cells", 21 | "Item": { 22 | "cell_id": { 23 | "S.$": "$.cellId" 24 | }, 25 | "stackName": { 26 | "S.$": "$.stackName" 27 | }, 28 | "stackStatus": { 29 | "S": "creating" 30 | }, 31 | "sfn-execution-id": { 32 | "S.$": "$$.Execution.Id" 33 | }, 34 | "stage": { 35 | "S.$": "$.stage" 36 | } 37 | } 38 | }, 39 | "Next": "CreateStack", 40 | "ResultPath": null 41 | }, 42 | "CreateStack": { 43 | "Type": "Task", 44 | "Next": "Wait", 45 | "Parameters": { 46 | "StackName.$": "$.stackName", 47 | "TemplateURL.$": "$.templateUrl", 48 | "Capabilities": [ 49 | "CAPABILITY_NAMED_IAM" 50 | ], 51 | "Parameters": [ 52 | { 53 | "ParameterKey": "cellid", 54 | "ParameterValue.$": "$.cellId" 55 | }, 56 | { 57 | "ParameterKey": "imageuri", 58 | "ParameterValue.$": "$.imageuri" 59 | } 60 | ] 61 | }, 62 | "Resource": "arn:aws:states:::aws-sdk:cloudformation:createStack", 63 | "ResultPath": null, 64 | "Catch": [ 65 | { 66 | "ErrorEquals": [ 67 | "States.TaskFailed" 68 | ], 69 | "Next": "DynamoDB UpdateItem (1)", 70 | "ResultPath": "$.error" 71 | } 72 | ] 73 | }, 74 | "Wait": { 75 | "Type": "Wait", 76 | "Seconds": 5, 77 | "Next": "DescribeStacks" 78 | }, 79 | "DescribeStacks": { 80 | "Type": "Task", 81 | "Parameters": { 82 | "StackName.$": "$.stackName" 83 | }, 84 | "Resource": "arn:aws:states:::aws-sdk:cloudformation:describeStacks", 85 | "Next": "Choice", 86 | "ResultSelector": { 87 | "state.$": "$.Stacks[0].StackStatus" 88 | }, 89 | "ResultPath": "$.result" 90 | }, 91 | "Choice": { 92 | "Type": "Choice", 93 | "Choices": [ 94 | { 95 | "Variable": "$.result.state", 96 | "StringEquals": "CREATE_IN_PROGRESS", 97 | "Next": "Wait" 98 | }, 99 | { 100 | "Variable": "$.result.state", 101 | "StringEquals": "CREATE_COMPLETE", 102 | "Next": "DynamoDB UpdateItem" 103 | } 104 | ], 105 | "Default": "DynamoDB UpdateItem (1)" 106 | }, 107 | "DynamoDB UpdateItem (1)": { 108 | "Type": "Task", 109 | "Resource": "arn:aws:states:::dynamodb:updateItem", 110 | "Parameters": { 111 | "TableName": "Cellular-Routing-Cells", 112 | "Key": { 113 | "cell_id": { 114 | "S.$": "$.cellId" 115 | } 116 | }, 117 | "UpdateExpression": "SET stackStatus = :s", 118 | "ExpressionAttributeValues": { 119 | ":s": { 120 | "S": "create_failed" 121 | } 122 | } 123 | }, 124 | "Next": "Fail" 125 | }, 126 | "DynamoDB UpdateItem": { 127 | "Type": "Task", 128 | "Resource": "arn:aws:states:::dynamodb:updateItem", 129 | "Parameters": { 130 | "TableName": "Cellular-Routing-Cells", 131 | "Key": { 132 | "cell_id": { 133 | "S.$": "$.cellId" 134 | } 135 | }, 136 | "UpdateExpression": "SET stackStatus = :s", 137 | "ExpressionAttributeValues": { 138 | ":s": { 139 | "S": "active" 140 | } 141 | } 142 | }, 143 | "Next": "Success" 144 | }, 145 | "Fail": { 146 | "Type": "Fail" 147 | }, 148 | "Success": { 149 | "Type": "Succeed" 150 | } 151 | } 152 | } -------------------------------------------------------------------------------- /source/cdk/statemachines/sfn_update_cell.asl.json: -------------------------------------------------------------------------------- 1 | { 2 | "Comment": "A description of my state machine", 3 | "StartAt": "Map", 4 | "States": { 5 | "Map": { 6 | "Type": "Map", 7 | "Next": "Choice (3)", 8 | "Parameters": { 9 | "cellId.$": "$$.Map.Item.Value", 10 | "templateUrl.$": "$.templateUrl" 11 | }, 12 | "Iterator": { 13 | "StartAt": "Pass", 14 | "States": { 15 | "Pass": { 16 | "Type": "Pass", 17 | "Next": "DynamoDB UpdateItem", 18 | "Parameters": { 19 | "stackName.$": "States.Format('Cellular-Cell-{}', $.cellId)", 20 | "cellId.$": "$.cellId", 21 | "templateUrl.$": "$.templateUrl" 22 | } 23 | }, 24 | "DynamoDB UpdateItem": { 25 | "Type": "Task", 26 | "Resource": "arn:aws:states:::dynamodb:updateItem", 27 | "Parameters": { 28 | "TableName": "Cellular-Routing-Cells", 29 | "Key": { 30 | "cell_id": { 31 | "S.$": "$.cellId" 32 | } 33 | }, 34 | "UpdateExpression": "SET stackStatus = :s", 35 | "ExpressionAttributeValues": { 36 | ":s": { 37 | "S": "updating" 38 | } 39 | }, 40 | "ConditionExpression": "attribute_exists(cell_id)" 41 | }, 42 | "Next": "UpdateStack", 43 | "ResultPath": null 44 | }, 45 | "UpdateStack": { 46 | "Type": "Task", 47 | "Next": "Wait", 48 | "Parameters": { 49 | "StackName.$": "$.stackName", 50 | "TemplateURL.$": "$.templateUrl", 51 | "Capabilities": [ 52 | "CAPABILITY_NAMED_IAM" 53 | ], 54 | "Parameters": [ 55 | { 56 | "ParameterKey": "cellid", 57 | "ParameterValue.$": "$.cellId" 58 | }, 59 | { 60 | "ParameterKey": "imageuri", 61 | "UsePreviousValue": "true" 62 | } 63 | ] 64 | }, 65 | "Resource": "arn:aws:states:::aws-sdk:cloudformation:updateStack", 66 | "ResultPath": null, 67 | "Catch": [ 68 | { 69 | "ErrorEquals": [ 70 | "States.TaskFailed" 71 | ], 72 | "Next": "Choice (1)", 73 | "ResultPath": "$.error" 74 | } 75 | ] 76 | }, 77 | "Choice (1)": { 78 | "Type": "Choice", 79 | "Choices": [ 80 | { 81 | "Variable": "$.error.Cause", 82 | "StringMatches": "No updates are to be performed*", 83 | "Next": "DDB Update Success" 84 | } 85 | ], 86 | "Default": "DDB Update Failed" 87 | }, 88 | "Wait": { 89 | "Type": "Wait", 90 | "Seconds": 5, 91 | "Next": "DescribeStacks" 92 | }, 93 | "DescribeStacks": { 94 | "Type": "Task", 95 | "Parameters": { 96 | "StackName.$": "$.stackName" 97 | }, 98 | "Resource": "arn:aws:states:::aws-sdk:cloudformation:describeStacks", 99 | "Next": "Choice", 100 | "ResultSelector": { 101 | "state.$": "$.Stacks[0].StackStatus" 102 | }, 103 | "ResultPath": "$.result" 104 | }, 105 | "Choice": { 106 | "Type": "Choice", 107 | "Choices": [ 108 | { 109 | "Or": [ 110 | { 111 | "Variable": "$.result.state", 112 | "StringEquals": "UPDATE_IN_PROGRESS" 113 | }, 114 | { 115 | "Variable": "$.result.state", 116 | "StringEquals": "UPDATE_COMPLETE_CLEANUP_IN_PROGRESS" 117 | } 118 | ], 119 | "Next": "Wait" 120 | }, 121 | { 122 | "Variable": "$.result.state", 123 | "StringEquals": "UPDATE_COMPLETE", 124 | "Next": "DDB Update Success" 125 | } 126 | ], 127 | "Default": "DDB Update Failed" 128 | }, 129 | "DDB Update Failed": { 130 | "Type": "Task", 131 | "Resource": "arn:aws:states:::dynamodb:updateItem", 132 | "Parameters": { 133 | "TableName": "Cellular-Routing-Cells", 134 | "Key": { 135 | "cell_id": { 136 | "S.$": "$.cellId" 137 | } 138 | }, 139 | "UpdateExpression": "SET stackStatus = :s", 140 | "ExpressionAttributeValues": { 141 | ":s": { 142 | "S": "update_failed" 143 | } 144 | }, 145 | "ConditionExpression": "attribute_exists(cell_id)" 146 | }, 147 | "Next": "Pass (1)", 148 | "ResultPath": null 149 | }, 150 | "Pass (1)": { 151 | "Type": "Pass", 152 | "Next": "Fail", 153 | "Parameters": { 154 | "cellId.$": "$.cellId", 155 | "result": "failure" 156 | } 157 | }, 158 | "DDB Update Success": { 159 | "Type": "Task", 160 | "Resource": "arn:aws:states:::dynamodb:updateItem", 161 | "Parameters": { 162 | "TableName": "Cellular-Routing-Cells", 163 | "Key": { 164 | "cell_id": { 165 | "S.$": "$.cellId" 166 | } 167 | }, 168 | "UpdateExpression": "SET stackStatus = :s", 169 | "ExpressionAttributeValues": { 170 | ":s": { 171 | "S": "active" 172 | } 173 | } 174 | }, 175 | "Next": "Pass (2)", 176 | "ResultPath": null 177 | }, 178 | "Pass (2)": { 179 | "Type": "Pass", 180 | "Next": "Success", 181 | "Result": { 182 | "cellId.$": "$.cellId", 183 | "result": "success" 184 | } 185 | }, 186 | "Fail": { 187 | "Type": "Fail" 188 | }, 189 | "Success": { 190 | "Type": "Succeed" 191 | } 192 | } 193 | }, 194 | "ItemsPath": "$.cellIds", 195 | "Catch": [ 196 | { 197 | "ErrorEquals": [ 198 | "States.ALL" 199 | ], 200 | "Next": "Choice (2)", 201 | "ResultPath": "$.error" 202 | } 203 | ], 204 | "ResultPath": "$.map" 205 | }, 206 | "Choice (3)": { 207 | "Type": "Choice", 208 | "Choices": [ 209 | { 210 | "Variable": "$.pipeline_id", 211 | "IsPresent": true, 212 | "Next": "PutJobSuccessResult" 213 | } 214 | ], 215 | "Default": "Success (1)" 216 | }, 217 | "PutJobSuccessResult": { 218 | "Type": "Task", 219 | "Parameters": { 220 | "JobId.$": "$.pipeline_id" 221 | }, 222 | "Resource": "arn:aws:states:::aws-sdk:codepipeline:putJobSuccessResult", 223 | "Next": "Success (1)" 224 | }, 225 | "Choice (2)": { 226 | "Type": "Choice", 227 | "Choices": [ 228 | { 229 | "Variable": "$.pipeline_id", 230 | "IsPresent": true, 231 | "Next": "PutJobFailureResult (1)" 232 | } 233 | ], 234 | "Default": "Fail (1)" 235 | }, 236 | "PutJobFailureResult (1)": { 237 | "Type": "Task", 238 | "Next": "Fail (1)", 239 | "Parameters": { 240 | "FailureDetails": { 241 | "Message": "Failed", 242 | "Type": "JobFailed" 243 | }, 244 | "JobId.$": "$.pipeline_id" 245 | }, 246 | "Resource": "arn:aws:states:::aws-sdk:codepipeline:putJobFailureResult" 247 | }, 248 | "Fail (1)": { 249 | "Type": "Fail" 250 | }, 251 | "Success (1)": { 252 | "Type": "Succeed" 253 | } 254 | } 255 | } -------------------------------------------------------------------------------- /source/cdk/templates/.dummy: -------------------------------------------------------------------------------- 1 | Needed for CDK deployment -------------------------------------------------------------------------------- /source/cdk/test/cdk.test.ts: -------------------------------------------------------------------------------- 1 | // import * as cdk from 'aws-cdk-lib'; 2 | // import { Template } from 'aws-cdk-lib/assertions'; 3 | // import * as Cdk from '../lib/cdk-stack'; 4 | 5 | // example test. To run these tests, uncomment this file along with the 6 | // example resource in lib/cdk-stack.ts 7 | test('SQS Queue Created', () => { 8 | // const app = new cdk.App(); 9 | // // WHEN 10 | // const stack = new Cdk.CdkStack(app, 'MyTestStack'); 11 | // // THEN 12 | // const template = Template.fromStack(stack); 13 | 14 | // template.hasResourceProperties('AWS::SQS::Queue', { 15 | // VisibilityTimeout: 300 16 | // }); 17 | }); 18 | -------------------------------------------------------------------------------- /source/cdk/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "target": "ES2018", 4 | "module": "commonjs", 5 | "lib": [ 6 | "es2018" 7 | ], 8 | "declaration": true, 9 | "strict": true, 10 | "noImplicitAny": true, 11 | "strictNullChecks": true, 12 | "noImplicitThis": true, 13 | "alwaysStrict": true, 14 | "noUnusedLocals": false, 15 | "noUnusedParameters": false, 16 | "noImplicitReturns": true, 17 | "noFallthroughCasesInSwitch": false, 18 | "inlineSourceMap": true, 19 | "inlineSources": true, 20 | "experimentalDecorators": true, 21 | "strictPropertyInitialization": false, 22 | "typeRoots": [ 23 | "./node_modules/@types" 24 | ] 25 | }, 26 | "exclude": [ 27 | "node_modules", 28 | "cdk.out" 29 | ] 30 | } 31 | -------------------------------------------------------------------------------- /source/cell-container/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM --platform=linux/amd64 public.ecr.aws/docker/library/python:3.11-slim 2 | # https://gallery.ecr.aws/docker/library/python 3 | 4 | RUN apt-get update && \ 5 | apt-get install -y --no-install-recommends \ 6 | curl \ 7 | && apt-get upgrade \ 8 | && apt-get clean \ 9 | && rm -rf /var/lib/apt/lists/* 10 | 11 | RUN groupadd --gid 9999 app \ 12 | && useradd --uid 9999 --gid 9999 -m app 13 | USER app 14 | WORKDIR /cell 15 | 16 | COPY requirements.txt . 17 | RUN pip3 install -r requirements.txt --no-cache-dir 18 | 19 | COPY app.py . 20 | 21 | HEALTHCHECK --interval=1m --timeout=30s \ 22 | CMD curl -f http://localhost:8080/ || exit 1 23 | CMD [ "python3", "-m" , "flask", "run", "--host=0.0.0.0", "--port=8080"] 24 | -------------------------------------------------------------------------------- /source/cell-container/app.py: -------------------------------------------------------------------------------- 1 | from flask import Flask, request, jsonify 2 | import os 3 | import boto3 4 | from flask_httpauth import HTTPTokenAuth 5 | 6 | app = Flask(__name__) 7 | auth = HTTPTokenAuth(scheme='Bearer') 8 | 9 | table_name = os.environ.get('tableName') 10 | cell_id = os.environ.get('cellId') 11 | dynamodb = boto3.resource('dynamodb') 12 | ddb_table = dynamodb.Table(table_name) 13 | 14 | @auth.verify_token 15 | def verify_token(token): 16 | # Token is the user name. For a production environment, replace this with an authorisation mechanism such as JWT 17 | return token 18 | 19 | 20 | @app.route('/') 21 | def hello_world(): 22 | return 'Hey, we have Flask in a Docker container! (V2)' 23 | 24 | 25 | @app.route('/put', methods=['POST']) 26 | @auth.login_required 27 | def put(): 28 | print(request.get_data(), flush=True) 29 | r = request.get_json() 30 | print(r, flush=True) 31 | ddb_table.put_item( 32 | Item={ 33 | 'username': auth.current_user(), 34 | 'key': r['key'], 35 | 'value': r['value'], 36 | } 37 | ) 38 | return "Success" 39 | 40 | 41 | @app.route('/get', methods=['POST']) 42 | @auth.login_required 43 | def get(): 44 | r = request.get_json() 45 | item = ddb_table.get_item(Key={ 46 | 'username': auth.current_user(), 47 | 'key': r['key'], 48 | }) 49 | if 'Item' not in item: 50 | return 'Item not found', 404 51 | return jsonify({ 52 | 'value': item['Item']['value'] 53 | }) 54 | 55 | 56 | @app.route('/delete', methods=['POST']) 57 | @auth.login_required 58 | def delete(): 59 | r = request.get_json() 60 | ddb_table.delete_item(Key={ 61 | 'username': auth.current_user(), 62 | 'key': r['key'], 63 | }) 64 | return "Success" 65 | 66 | 67 | @app.route('/validate', methods=['POST', 'GET']) 68 | @auth.login_required 69 | def validate(): 70 | return jsonify({ 71 | 'username': auth.current_user(), 72 | 'cellid': cell_id, 73 | }) 74 | 75 | 76 | @app.route('/env') 77 | def env(): 78 | return table_name 79 | 80 | 81 | if __name__ == '__main__': 82 | app.run(host='0.0.0.0', port=80) -------------------------------------------------------------------------------- /source/cell-container/requirements.txt: -------------------------------------------------------------------------------- 1 | Flask 2 | boto3 3 | Flask-HTTPAuth -------------------------------------------------------------------------------- /source/cells_for_codepipeline/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-solutions-library-samples/guidance-for-cell-based-architecture-on-aws/a86892b4760adf1812b0fcec4b10eae84959e788/source/cells_for_codepipeline/__init__.py -------------------------------------------------------------------------------- /source/cells_for_codepipeline/cell_lib.py: -------------------------------------------------------------------------------- 1 | import boto3 2 | import os 3 | import json 4 | 5 | dynamodb = boto3.resource('dynamodb') 6 | codepipeline = boto3.client('codepipeline') 7 | stepfunction = boto3.client('stepfunctions') 8 | 9 | 10 | class CellClient(): 11 | def __init__(self): 12 | self.init_config() 13 | 14 | def init_config(self): 15 | self.cells_table = dynamodb.Table(os.environ['cellsTable']) 16 | self.step_function_arn = os.environ['updateCellsFunctionArn'] 17 | self.template_bucket_name = os.environ['templateBucketName'] 18 | 19 | def list_cells(self): 20 | cells = self.cells_table.scan()['Items'] 21 | return [c['cell_id'] for c in cells if c['cell_id'] != 'Sandbox'] 22 | 23 | def update_cells(self, job_id): 24 | stepfunction.start_execution( 25 | stateMachineArn=self.step_function_arn, 26 | name='update-{}'.format(job_id), 27 | input=json.dumps({ 28 | 'cellIds': self.list_cells(), 29 | 'templateUrl': 'https://{}/template_cell.yaml'.format(self.template_bucket_name), 30 | 'pipeline_id': job_id 31 | }), 32 | ) 33 | -------------------------------------------------------------------------------- /source/cells_for_codepipeline/handler.py: -------------------------------------------------------------------------------- 1 | import json 2 | import logging 3 | import boto3 4 | from cell_lib import CellClient 5 | 6 | logger = logging.getLogger() 7 | logger.setLevel(logging.INFO) 8 | 9 | codepipeline = boto3.client('codepipeline') 10 | stepfunction = boto3.client('stepfunctions') 11 | 12 | cellClient = CellClient() 13 | 14 | 15 | def handler(event, context): 16 | logger.info(json.dumps(event)) 17 | job_id = event['CodePipeline.job']['id'] 18 | try: 19 | cellClient.update_cells(job_id) 20 | except Exception as error: 21 | logger.exception(error) 22 | response = codepipeline.put_job_failure_result( 23 | jobId=job_id, 24 | failureDetails={ 25 | 'type': 'JobFailed', 26 | 'message': f'{error.__class__.__name__}: {str(error)}' 27 | } 28 | ) 29 | logger.debug(response) 30 | -------------------------------------------------------------------------------- /source/cells_for_codepipeline/handler_test.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | import boto3 3 | from cells_for_codepipeline.cell_lib import CellClient 4 | 5 | dynamodb = boto3.resource('dynamodb') 6 | cloudformation = boto3.client('cloudformation') 7 | stepfunction = boto3.client('stepfunctions') 8 | 9 | 10 | def get_cf_output(stackname, key): 11 | response = cloudformation.describe_stacks(StackName=stackname) 12 | outputs = response["Stacks"][0]["Outputs"] 13 | for output in outputs: 14 | if output["OutputKey"] == key: 15 | return output["OutputValue"] 16 | raise Exception( 17 | '"{}" does not exist for stack "{}"'.format(key, stackname)) 18 | 19 | 20 | class TestClient(CellClient): 21 | def init_config(self): 22 | self.cells_table = dynamodb.Table( 23 | get_cf_output('Cellular-Router', 'cellsTable')) 24 | self.step_function_arn = get_cf_output( 25 | 'Cellular-Router', 'updatecellsfunction') 26 | self.template_bucket_name = get_cf_output( 27 | 'Cellular-Router', 'bucketName') 28 | 29 | 30 | class Test_Handler(unittest.TestCase): 31 | def test_test(self): 32 | client = TestClient() 33 | cells = client.list_cells() 34 | self.assertTrue('Sandbox' not in cells) 35 | self.assertTrue(len(cells) >= 2) 36 | 37 | 38 | if __name__ == '__main__': 39 | unittest.main() 40 | -------------------------------------------------------------------------------- /source/cells_for_codepipeline/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-solutions-library-samples/guidance-for-cell-based-architecture-on-aws/a86892b4760adf1812b0fcec4b10eae84959e788/source/cells_for_codepipeline/requirements.txt -------------------------------------------------------------------------------- /source/cellularctl/cellular.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import boto3 3 | import json 4 | import subprocess 5 | import os 6 | from pathlib import Path 7 | 8 | if 'AWS_REGION' in os.environ: 9 | region = os.environ['AWS_REGION'] 10 | # print('Using region ' + region) 11 | boto3.setup_default_session(region_name=region) 12 | 13 | if 'CDK_DOCKER' in os.environ: 14 | docker = os.environ['CDK_DOCKER'] 15 | # print('Using docker alternative ' + docker) 16 | else: 17 | docker = 'docker' 18 | 19 | 20 | cloudformation = boto3.client('cloudformation') 21 | dynamodb = boto3.resource('dynamodb') 22 | stepfunction = boto3.client('stepfunctions') 23 | s3 = boto3.client('s3') 24 | ecs = boto3.client('ecs') 25 | ec2 = boto3.client('ec2') 26 | aws_lambda = boto3.client('lambda') 27 | synthetics = boto3.client('synthetics') 28 | secretsmanager = boto3.client('secretsmanager') 29 | 30 | 31 | def run_cmd(cmd, dir=''): 32 | print() 33 | print('+ "{}" in directory "{}"'.format(cmd, dir)) 34 | cwd = str(Path(__file__).parent.parent) + '/' + dir 35 | try: 36 | subprocess.run(cmd, shell=True, check=True, cwd=cwd) 37 | except subprocess.CalledProcessError: 38 | print('Command returned non-zero exit status.') 39 | sys.exit(1) 40 | 41 | 42 | def get_cf_output(stackname, key): 43 | response = cloudformation.describe_stacks(StackName=stackname) 44 | outputs = response["Stacks"][0]["Outputs"] 45 | for output in outputs: 46 | if output["OutputKey"] == key: 47 | return output["OutputValue"] 48 | raise Exception( 49 | '"{}" does not exist for stack "{}"'.format(key, stackname)) 50 | 51 | 52 | def recreate_containers(stack_name): 53 | ecs.update_service( 54 | cluster=get_cf_output(stack_name, 'clusterName'), 55 | service=get_cf_output(stack_name, 'serviceName'), 56 | forceNewDeployment=True 57 | ) 58 | 59 | 60 | def get_cells(): 61 | cells_table = dynamodb.Table( 62 | get_cf_output('Cellular-Router', 'cellsTable')) 63 | return cells_table.scan()['Items'] 64 | 65 | 66 | def build_repo(repo, dir): 67 | run_cmd( 68 | f'''aws ecr get-login-password | 69 | {docker} login --username AWS --password-stdin {repo}''') 70 | run_cmd(f'{docker} build -t {repo}:latest .', dir=dir) 71 | run_cmd(f'{docker} push {repo}:latest') 72 | 73 | 74 | def get_aws_env(): 75 | env = dict() 76 | for key in ['AWS_ACCESS_KEY_ID', 'AWS_SECRET_ACCESS_KEY', 77 | 'AWS_SESSION_TOKEN']: 78 | if key in os.environ.keys(): 79 | env[key] = os.environ[key] 80 | return env 81 | 82 | 83 | def run_local(dir, env): 84 | run_cmd(f'{docker} build --tag {dir} .', dir) 85 | command = [docker, 'run', '-i', '-t', '-p', '8080:8080'] 86 | env['AWS_DEFAULT_REGION'] = boto3.session.Session().region_name 87 | env.update(get_aws_env()) 88 | for k, v in env.items(): 89 | command.append('-e') 90 | command.append('{}={}'.format(k, v)) 91 | command.append(dir) 92 | try: 93 | run_cmd(' '.join(command), dir) 94 | except KeyboardInterrupt: 95 | pass 96 | 97 | 98 | def start_sfn(arn, input={}, name=None): 99 | res = stepfunction.start_execution( 100 | stateMachineArn=arn, 101 | name=name, 102 | input=json.dumps(input), 103 | ) 104 | print(res) 105 | 106 | 107 | def delete_cell(cell_id): 108 | cells_table = dynamodb.Table( 109 | get_cf_output('Cellular-Router', 'cellsTable')) 110 | item = cells_table.get_item(Key={'cell_id': cell_id}) 111 | if 'Item' not in item: 112 | raise Exception('Cell "{}" does not exist'.format(cell_id)) 113 | print('Deleting DDB table item for cell') 114 | cells_table.delete_item(Key={'cell_id': cell_id}) 115 | stack_name = item['Item']['stackName'] 116 | print('Requesting deletion of stack "{}"'.format(stack_name)) 117 | cloudformation.delete_stack(StackName=stack_name) 118 | 119 | 120 | def generate_template(upload=True): 121 | run_cmd('cdk synth Cellular-Cell-sandbox > templates/template_cell.yaml', 122 | 'cdk') 123 | bucket = get_cf_output('Cellular-Router', 'bucketName') 124 | if upload: 125 | for p in Path('source/cdk/templates').glob('*.yaml'): 126 | print('Uploading ' + p.name) 127 | s3.upload_file(str(p), bucket, p.name) 128 | 129 | 130 | def tagnodelete(): 131 | res = aws_lambda.list_functions() 132 | for f in res['Functions']: 133 | name = f['FunctionName'] 134 | if name.startswith('cwsyn-cell-canary-'): 135 | print('Tagging function "{}"'.format(name)) 136 | aws_lambda.tag_resource( 137 | Resource=f['FunctionArn'], 138 | Tags={ 139 | "auto-delete": "no" 140 | } 141 | ) 142 | 143 | 144 | def invoke_lambda(): 145 | r = aws_lambda.invoke( 146 | FunctionName='cellCanaryToken', 147 | Payload=json.dumps({"cellid": "test"}), 148 | # Payload='{ "cellid": "test" }', 149 | ) 150 | print(json.load(r['Payload'])) 151 | 152 | 153 | def start_stop_canaries(action): 154 | res = synthetics.describe_canaries() 155 | for c in res['Canaries']: 156 | name = c['Name'] 157 | if name.startswith('cell-canary-'): 158 | if action == 'start': 159 | print('Starting canary "{}"'.format(name)) 160 | synthetics.start_canary(Name=name) 161 | elif action == 'stop': 162 | print('Stopping canary "{}"'.format(name)) 163 | synthetics.stop_canary(Name=name) 164 | else: 165 | raise Exception('Unknown action "{}"'.format(action)) 166 | 167 | 168 | def destroy_stack(stack_name): 169 | cloudformation.delete_stack(StackName=stack_name) 170 | 171 | 172 | def get_user(username): 173 | users_table = dynamodb.Table( 174 | get_cf_output('Cellular-Router', 'usersTable')) 175 | result = users_table.get_item(Key={'username': username}) 176 | if 'Item' in result: 177 | return result['Item'] 178 | return None 179 | 180 | 181 | def get_users(): 182 | users_table = dynamodb.Table( 183 | get_cf_output('Cellular-Router', 'usersTable')) 184 | users = users_table.scan() 185 | return users 186 | 187 | def allow_ingress(ip): 188 | prefixListID = get_cf_output('Cellular-Repos', 'inboundPrefixListId') 189 | lists = ec2.describe_managed_prefix_lists( 190 | Filters=[{ 191 | 'Name': 'prefix-list-id', 192 | 'Values': [ 193 | prefixListID, 194 | ] 195 | }, 196 | ],) 197 | version = lists['PrefixLists'][0]['Version'] 198 | print(version+1) 199 | ec2.modify_managed_prefix_list( 200 | PrefixListId=prefixListID, 201 | AddEntries=[ 202 | { 203 | 'Cidr': ip+'/32', 204 | }, 205 | ], 206 | CurrentVersion = version 207 | ) -------------------------------------------------------------------------------- /source/cellularctl/cellularctl.py: -------------------------------------------------------------------------------- 1 | import os 2 | import fire 3 | import yaml 4 | import cellular 5 | import requests 6 | from datetime import datetime 7 | 8 | cdkRequireApproval = 'broadening' 9 | if 'cdkRequireApproval' in os.environ: 10 | cdkRequireApproval = os.environ['cdkRequireApproval'] 11 | 12 | # The following classes build the CLI actions. Each class and function in a 13 | # class corresponds to an action. 14 | 15 | 16 | class Cell(object): 17 | def list(self): 18 | cells = cellular.get_cells() 19 | print(yaml.dump(cells)) 20 | 21 | def create(self, name, stage='prod'): 22 | """Calls the stepfunction to create a new cell""" 23 | arn = cellular.get_cf_output('Cellular-Router', 'createcellfunction') 24 | bucket = cellular.get_cf_output( 25 | 'Cellular-Router', 'bucketRegionalDomainName') 26 | imageuri = cellular.get_cf_output('Cellular-Repos', 'repoCellUri') 27 | now = datetime.now().strftime("%Y-%m-%d_%H-%M-%S") 28 | cellular.start_sfn(arn, { 29 | 'cellId': name, 30 | 'templateUrl': 'https://{}/template_cell.yaml'.format(bucket), 31 | 'stage': stage, 32 | 'imageuri': imageuri, 33 | }, name='create-cell-{}-{}'.format(name, now)) 34 | 35 | def delete(self, name): 36 | cellular.delete_cell(name) 37 | 38 | def update(self, *cells): 39 | """Calls the stepfunction to update multiple cells""" 40 | if len(cells) == 0: 41 | print('No cells to update') 42 | return 43 | arn = cellular.get_cf_output('Cellular-Router', 'updatecellsfunction') 44 | bucket = cellular.get_cf_output( 45 | 'Cellular-Router', 'bucketRegionalDomainName') 46 | cellular.start_sfn(arn, { 47 | # 'cellIds': [{'cellId': 'cell1'}, {'cellId': 'cell2'}], 48 | 'cellIds': list(cells), 49 | 'templateUrl': 'https://{}/template_cell.yaml'.format(bucket), 50 | }, name='update-cells-{}'.format(datetime.now().strftime("%Y-%m-%d_%H-%M-%S"))) 51 | 52 | def build(self, deploy=False): 53 | """Builds the cell container and pushed it to the repo. Deploy restarts 54 | containers in all cells, thereby pulling the lasted image from ECR. """ 55 | repo = cellular.get_cf_output('Cellular-Repos', 'repoCellUri') 56 | cellular.build_repo(repo, 'cell-container') 57 | if deploy: 58 | for cell in cellular.get_cells(): 59 | cellular.recreate_containers(cell['stackName']) 60 | print('Restarting container for ' + cell['cell_id']) 61 | 62 | def runlocal(self): 63 | """Run the cell container in a locally (using docker run). 64 | Useful for rapid testing.""" 65 | cellular.run_local('cell-container', { 66 | 'tableName': cellular.get_cf_output('Cellular-Cell-sandbox', 'ddbTableName'), 67 | }) 68 | 69 | def generate_template(self, noupload=False): 70 | """Recreate the Cfn template used to create cells and upload it to S3.""" 71 | cellular.generate_template(not noupload) 72 | 73 | def deploysandbox(self): 74 | """Deploy the Sandbox cell directly via the cdk CLI (Use only for updates). 75 | Doesn't interact with DDB table and doesn't supply parameters. """ 76 | imageuri = cellular.get_cf_output('Cellular-Repos', 'repoCellUri') 77 | cellular.run_cmd('cdk deploy Cellular-Cell-sandbox ' + 78 | '--require-approval {} '.format(cdkRequireApproval) + 79 | '--parameters cellid=sandbox ' + 80 | '--parameters imageuri='+imageuri, 'cdk') 81 | 82 | 83 | class Router(object): 84 | def build(self, deploy=False): 85 | """Builds the cell container and pushed it to the repo. 86 | --deploy restarts the container, thereby pulling the 87 | last image from ECR. """ 88 | repo = cellular.get_cf_output('Cellular-Repos', 'repoRoutingUri') 89 | cellular.build_repo(repo, 'routing-container') 90 | if deploy: 91 | cellular.recreate_containers('Cellular-Router') 92 | print('Restarting container') 93 | 94 | def runlocal(self): 95 | """Run the router container in a locally (using docker run). Useful for rapid testing.""" 96 | cellular.run_local('routing-container', { 97 | 'cellsTableName': cellular.get_cf_output('Cellular-Router', 'cellsTable'), 98 | 'usersTableName': cellular.get_cf_output('Cellular-Router', 'usersTable'), 99 | }) 100 | 101 | def deploy(self): 102 | imageuri = cellular.get_cf_output('Cellular-Repos', 'repoRoutingUri') 103 | cellular.run_cmd('cdk deploy Cellular-Router ' 104 | '--require-approval {} '.format(cdkRequireApproval) + 105 | '--parameters imageuri={}'.format(imageuri), 'cdk') 106 | 107 | def getdnsname(self): 108 | print(cellular.get_cf_output('Cellular-Router', 'dnsName')) 109 | 110 | 111 | class Setup: 112 | def deploy(self, createcells=False): 113 | """Deploy the solution to a new region or account.""" 114 | cellular.run_cmd('npm ci', 'cdk') 115 | cellular.run_cmd('npm run build', 'cdk') 116 | cellular.run_cmd('cdk bootstrap', 'cdk') 117 | # cellular.run_cmd('cdk deploy Cellular-Repos', 'cdk') 118 | Repos().deploy() 119 | Router().build() 120 | Cell().build() 121 | Router().deploy() 122 | cellular.generate_template() 123 | Cell().create('sandbox', 'sandbox') 124 | if createcells: 125 | Cell().create('cell1') 126 | Cell().create('cell2') 127 | 128 | def destroy(self): 129 | for c in cellular.get_cells(): 130 | cellular.destroy_stack(c['stackName']) 131 | cellular.destroy_stack('Cellular-Router') 132 | cellular.destroy_stack('Cellular-Repos') 133 | 134 | def tagnodelete(self): 135 | cellular.tagnodelete() 136 | 137 | def allowingress(self, ip='myip'): 138 | """Add an IP v4 address to the prefix list to allow inbound traffic. 139 | Per default gets your IP address from https://checkip.amazonaws.com""" 140 | if ip == 'myip': 141 | ip = requests.get('https://checkip.amazonaws.com').text.strip() 142 | cellular.allow_ingress(ip) 143 | 144 | 145 | class Repos: 146 | def deploy(self): 147 | cellular.run_cmd('cdk deploy Cellular-Repos ' 148 | '--require-approval {} '.format(cdkRequireApproval), 'cdk') 149 | 150 | 151 | class Canary: 152 | def startall(self): 153 | cellular.start_stop_canaries('start') 154 | 155 | def stopall(self): 156 | cellular.start_stop_canaries('stop') 157 | 158 | def check(self, *cells): 159 | arn = cellular.get_cf_output( 160 | 'Cellular-Router', 'checkcanarystatemachine') 161 | now = datetime.now().strftime("%Y-%m-%d_%H-%M-%S") 162 | cellular.start_sfn(arn, { 163 | 'cellIds': list(cells), 164 | 'waitseconds': 0, 165 | }, name='check-canary-{}'.format(now)) 166 | 167 | 168 | class Function: 169 | def call(self): 170 | """Invoke a lambda function for testing.""" 171 | cellular.invoke_lambda() 172 | 173 | 174 | class User: 175 | def get(self, username): 176 | user = cellular.get_user(username) 177 | if user is None: 178 | 'User "{}" does not exist'.format(username) 179 | print(user) 180 | 181 | def list(self): 182 | for u in cellular.get_users()['Items']: 183 | print(u['username']) 184 | 185 | def cell(self, username): 186 | user = cellular.get_user(username) 187 | if user is None: 188 | 'User "{}" does not exist'.format(username) 189 | print(user['cell']) 190 | 191 | 192 | class Main: 193 | def cell(self): 194 | return Cell 195 | 196 | def router(self): 197 | return Router 198 | 199 | def function(self): 200 | return Function 201 | 202 | def repos(self): 203 | return Repos 204 | 205 | def user(self): 206 | return User 207 | 208 | def setup(self): 209 | return Setup 210 | 211 | def canary(self): 212 | return Canary 213 | 214 | 215 | if __name__ == '__main__': 216 | fire.Fire(Main) 217 | -------------------------------------------------------------------------------- /source/client/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/aws-solutions-library-samples/guidance-for-cell-based-architecture-on-aws/a86892b4760adf1812b0fcec4b10eae84959e788/source/client/__init__.py -------------------------------------------------------------------------------- /source/client/client_lib.py: -------------------------------------------------------------------------------- 1 | import requests 2 | 3 | 4 | class Client: 5 | def __init__(self, router, username): 6 | self.router = router 7 | self.username = username 8 | self.dnsNameCell = None 9 | 10 | def request(self, uri, data=None): 11 | try: 12 | r = requests.post('http://' + self.router + uri, json=data, timeout=5) 13 | except requests.exceptions.ConnectTimeout: 14 | print('Request timed out. Did you allow inbound traffic from your external IP? (See README.md)') 15 | exit(1) 16 | self.check_request_status(r) 17 | return r 18 | 19 | def request_cell(self, uri, data=None): 20 | if not self.dnsNameCell: 21 | raise Exception('Not logged in.') 22 | r = requests.post('http://' + self.dnsNameCell + uri, json=data, timeout=5, 23 | headers={'Authorization': 'Bearer ' + self.username}) 24 | self.check_request_status(r) 25 | return r 26 | 27 | def check_request_status(self, r): 28 | r.raise_for_status() 29 | 30 | def register(self): 31 | r = self.request('/register', {'username': self.username}) 32 | 33 | def login(self): 34 | r = self.request('/login', { 35 | 'username': self.username, 36 | }) 37 | j = r.json() 38 | self.dnsNameCell = j['dns_name_cell'] 39 | 40 | def put(self, key, value): 41 | self.request_cell('/put', data={'key': key, 'value': value}) 42 | 43 | def get(self, key): 44 | try: 45 | res = self.request_cell('/get', data={'key': key}) 46 | except requests.exceptions.HTTPError as e: 47 | # 404 is a normal case here - return an error 48 | if e.response.status_code == 404: 49 | raise KeyError 50 | else: 51 | raise e 52 | 53 | return res.json()['value'] 54 | 55 | def delete(self, key): 56 | self.request_cell('/delete', data={'key': key}) 57 | 58 | def validate(self): 59 | return self.request_cell('/validate') 60 | -------------------------------------------------------------------------------- /source/client/clientctl.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | from client_lib import Client 4 | import fire 5 | 6 | if not 'routerurl' in os.environ: 7 | print('Evironment variable "routerurl" not set.') 8 | print('Please use, .e.g.:') 9 | print('export routerurl=$(./cellularctl router getdnsname)') 10 | sys.exit(1) 11 | routerurl = os.environ['routerurl'] 12 | 13 | def login(username): 14 | c = Client(routerurl, username) 15 | c.login() 16 | return c 17 | 18 | class Exec: 19 | def validate(self, username): 20 | c = login(username) 21 | print(c.validate().json()) 22 | 23 | def put(self, username, key, value): 24 | c = login(username) 25 | c.put(key, value) 26 | 27 | def get(self, username, key): 28 | c = login(username) 29 | print(c.get(key)) 30 | 31 | def delete(self, username, key): 32 | c = login(username) 33 | c.delete(key) 34 | 35 | def getcell(self, username): 36 | c = login(username) 37 | print(c.validate().json()['cellid']) 38 | 39 | 40 | class Main: 41 | def register(self, username): 42 | '''Registers a new users, optionally can specify the cell. 43 | Will create file "username.apikey"''' 44 | c = Client(routerurl, username) 45 | c.register() 46 | 47 | def exec(self): 48 | '''Execute a command against the server. Each command first logs in to the router. 49 | Each command uses "username.apikey" from the current working directory.''' 50 | return Exec 51 | 52 | 53 | if __name__ == '__main__': 54 | fire.Fire(Main) 55 | -------------------------------------------------------------------------------- /source/client/test_cell.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from testlibs import get_cf_output, dynamodb 3 | from boto3.dynamodb.conditions import Key 4 | import client_lib 5 | 6 | cell_table = dynamodb.Table(get_cf_output('Cellular-Cell-sandbox', 'ddbTableName')) 7 | 8 | 9 | class TestClient(client_lib.Client): 10 | server = '127.0.0.1:8080' 11 | 12 | def __init__(self, username): 13 | super().__init__(None, username) 14 | self.dnsNameCell = TestClient.server 15 | 16 | 17 | class MyTestCase(unittest.TestCase): 18 | def setUp(self): 19 | self.client = TestClient(self._testMethodName) 20 | 21 | def test_put(self): 22 | self.client.put('key', 'value') 23 | item = cell_table.get_item(Key={ 24 | 'username': self.client.username, 25 | 'key': 'key', 26 | }) 27 | self.assertTrue('Item' in item) 28 | self.assertEqual('value', item['Item']['value']) 29 | 30 | def test_put_and_get(self): 31 | self.client.put('key', 'value') 32 | res = self.client.get('key') 33 | self.assertEqual('value', res) 34 | 35 | def test_put_and_get2(self): 36 | self.client.put('key1', 'value1') 37 | self.client.put('key2', 'value2') 38 | self.assertEqual('value1', self.client.get('key1')) 39 | self.assertEqual('value2', self.client.get('key2')) 40 | 41 | def test_get_non_existing(self): 42 | with self.assertRaises(KeyError): 43 | self.client.get('key1') 44 | 45 | def test_delete(self): 46 | self.client.put('key4', 'value') 47 | self.client.delete('key4') 48 | with self.assertRaises(KeyError): 49 | self.client.get('key4') 50 | 51 | def tearDown(self): 52 | rows = cell_table.query( 53 | KeyConditionExpression=Key('username').eq(self.client.username), 54 | ) 55 | with cell_table.batch_writer() as writer: 56 | for item in rows['Items']: 57 | writer.delete_item(Key={ 58 | 'username': item['username'], 59 | 'key': item['key'], 60 | }) 61 | 62 | 63 | server_remote = get_cf_output('Cellular-Router', 'dnsName') 64 | server_local = '127.0.0.1:8080' 65 | TestClient.server = server_local 66 | 67 | if __name__ == '__main__': 68 | unittest.main() 69 | -------------------------------------------------------------------------------- /source/client/test_integration.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from testlibs import get_cf_output, users_table 3 | import client_lib 4 | 5 | 6 | class TestClient(client_lib.Client): 7 | def __init__(self, router, username): 8 | super().__init__(router, username) 9 | 10 | 11 | class MyTestCase(unittest.TestCase): 12 | def setUp(self): 13 | server = 'http://' + get_cf_output('Cellular-Router', 'dnsName') 14 | self.client = client_lib.Client(server, self._testMethodName) 15 | 16 | def test_put_and_get(self): 17 | self.client.register() 18 | self.client.login() 19 | self.client.put('key1', 'value2') 20 | res = self.client.get('key1') 21 | self.assertEqual(res, 'value2') 22 | 23 | def tearDown(self): 24 | # TODO delete items 25 | self.client.delete('key1') 26 | users_table.delete_item(Key={'username': self.client.username}) 27 | 28 | 29 | if __name__ == '__main__': 30 | unittest.main() 31 | -------------------------------------------------------------------------------- /source/client/test_router.py: -------------------------------------------------------------------------------- 1 | import requests 2 | import unittest 3 | from testlibs import users_table, get_cf_output 4 | import client_lib 5 | 6 | tc = unittest.TestCase() 7 | 8 | 9 | class TestClient(client_lib.Client): 10 | def __init__(self, router, username): 11 | super().__init__(router, username) 12 | self.expected_status = None 13 | 14 | def check_request_status(self, r): 15 | expected = self.expected_status 16 | self.expected_status = None 17 | if expected != None: 18 | tc.assertEqual(r.status_code, expected) 19 | else: 20 | super().check_request_status(r) 21 | 22 | 23 | class Test_Router(unittest.TestCase): 24 | server = '127.0.0.1:8080' 25 | 26 | def userid(self): 27 | return self._testMethodName 28 | 29 | def apikey(self): 30 | return self._testMethodName + '--APIKEY' 31 | 32 | def setUp(self): 33 | self.users = list() 34 | self.users.append(self.userid()) 35 | self.client = TestClient(self.server, self.userid()) 36 | self.client.apikey = self.apikey() 37 | 38 | def addUser(self, username=None, apikey=None): 39 | if username == None: 40 | username = self.userid() 41 | if apikey == None: 42 | apikey = self.apikey() 43 | self.users.append(username) 44 | users_table.put_item( 45 | Item={ 46 | 'username': username, 47 | 'apikey': apikey, 48 | 'cell': 'sandbox', 49 | } 50 | ) 51 | 52 | def test_register2(self): 53 | c = TestClient(self.server, self.userid()) 54 | c.register() 55 | item = users_table.get_item(Key={'username': self.userid()}) 56 | self.assertTrue('Item' in item) 57 | self.assertTrue('cell' in item['Item']) 58 | 59 | def test_register_and_login(self): 60 | self.addUser('test2@test.com') 61 | r = requests.post('http://' + self.server + '/login', 62 | json={'username': 'test2@test.com'}) 63 | self.assertEqual(r.status_code, 200) 64 | 65 | def test_register_and_login2(self): 66 | self.addUser(self.userid()) 67 | self.client.login() 68 | self.assertIsNotNone(self.client.dnsNameCell) 69 | 70 | 71 | def test_register_twice(self): 72 | self.addUser('test') 73 | r = requests.post('http://' + self.server + '/register', json={'username': 'test'}) 74 | self.assertEqual(r.status_code, 409) 75 | 76 | def test_register_twice2(self): 77 | self.client.register() 78 | self.client.expected_status = 409 79 | try: 80 | self.client.register() 81 | except requests.exceptions.JSONDecodeError: 82 | pass 83 | 84 | def test_validate(self): 85 | self.addUser() 86 | r = requests.post('http://' + self.server + '/login', 87 | json={'username': self.userid()}) 88 | self.assertEqual(r.status_code, 200) 89 | r = requests.get('http://' + self.server + '/validate', headers={'Authorization': 'Bearer ' + self.userid()}) 90 | self.assertEqual(r.status_code, 200) 91 | self.assertEqual(r.json()['username'], self.userid()) 92 | 93 | # @unittest.skip("Not implemented yet") 94 | def test_url_in_login(self): 95 | self.addUser('test5') 96 | r = requests.post('http://' + self.server + '/login', 97 | json={'username': 'test5'}) 98 | dns_name_cell = get_cf_output('Cellular-Cell-sandbox', 'dnsName') 99 | self.assertEqual(r.json()['dns_name_cell'], dns_name_cell) 100 | 101 | def tearDown(self): 102 | for user in self.users: 103 | users_table.delete_item(Key={'username': user}) 104 | 105 | 106 | server_remote = get_cf_output('Cellular-Router', 'dnsName') 107 | server_local = '127.0.0.1:8080' 108 | Test_Router.server = server_local 109 | 110 | if __name__ == '__main__': 111 | unittest.main() 112 | -------------------------------------------------------------------------------- /source/client/testlibs.py: -------------------------------------------------------------------------------- 1 | import boto3 2 | 3 | cf_client = boto3.client('cloudformation') 4 | 5 | 6 | def get_cf_output(stackname, key): 7 | response = cf_client.describe_stacks(StackName=stackname) 8 | outputs = response["Stacks"][0]["Outputs"] 9 | for output in outputs: 10 | if output["OutputKey"] == key: 11 | return output["OutputValue"] 12 | raise Exception('"{}" does not exist for stack "{}"'.format(key, stackname)) 13 | 14 | 15 | dynamodb = boto3.resource('dynamodb') 16 | users_table = dynamodb.Table(get_cf_output('Cellular-Router', 'usersTable')) 17 | -------------------------------------------------------------------------------- /source/routing-container/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM --platform=linux/amd64 public.ecr.aws/docker/library/python:3.11-slim 2 | # https://gallery.ecr.aws/docker/library/python 3 | 4 | RUN apt-get update && \ 5 | apt-get install -y --no-install-recommends \ 6 | curl \ 7 | && apt-get upgrade \ 8 | && apt-get clean \ 9 | && rm -rf /var/lib/apt/lists/* 10 | 11 | RUN groupadd --gid 9999 app \ 12 | && useradd --uid 9999 --gid 9999 -m app 13 | 14 | USER app 15 | 16 | WORKDIR /cell 17 | 18 | COPY --chown=app:app requirements.txt . 19 | RUN pip3 install -r requirements.txt --no-cache-dir 20 | 21 | COPY --chown=app:app *.py ./ 22 | 23 | HEALTHCHECK --interval=1m --timeout=30s \ 24 | CMD curl -f http://localhost:8080/ || exit 1 25 | 26 | CMD [ "python3", "-m" , "flask", "run", "--host=0.0.0.0", "--port=8080"] 27 | -------------------------------------------------------------------------------- /source/routing-container/app.py: -------------------------------------------------------------------------------- 1 | import routing 2 | from flask import Flask, request, jsonify 3 | from flask_httpauth import HTTPTokenAuth 4 | 5 | app = Flask(__name__) 6 | auth = HTTPTokenAuth(scheme='Bearer') 7 | 8 | 9 | @auth.verify_token 10 | def verify_token(token): 11 | # Token is the user name. For a production environment, replace this with an authorisation mechanism such as JWT 12 | return token 13 | 14 | 15 | @app.route('/') 16 | def hello_world(): 17 | return 'Hey, we have Flask in a Docker container!' 18 | 19 | 20 | @app.route('/cells') 21 | def cells(): 22 | return jsonify(routing.get_cells()) 23 | 24 | 25 | @app.route('/register', methods=['POST']) 26 | def register(): 27 | r = request.get_json() 28 | username = r['username'] 29 | if routing.get_user(username) is not None: 30 | return "User already exists", 409 31 | routing.create_user(username) 32 | return jsonify({ 33 | 'status': 'Sucess', 34 | 'username': username 35 | }) 36 | 37 | 38 | @app.route('/login', methods=['POST']) 39 | def login(): 40 | r = request.get_json() 41 | user = routing.get_user(r['username']) 42 | if user is None: 43 | return "Login failed", 401 44 | return jsonify({ 45 | 'dns_name_cell': routing.get_dns_name(user['cell']), 46 | }) 47 | 48 | 49 | @app.route('/validate') 50 | @auth.login_required 51 | def validate(): 52 | return jsonify({ 53 | 'username': auth.current_user() 54 | }) 55 | 56 | 57 | if __name__ == '__main__': 58 | app.run(host='0.0.0.0', port=80) -------------------------------------------------------------------------------- /source/routing-container/requirements.txt: -------------------------------------------------------------------------------- 1 | Flask 2 | boto3 3 | flask_httpauth -------------------------------------------------------------------------------- /source/routing-container/routing.py: -------------------------------------------------------------------------------- 1 | import os 2 | import boto3 3 | import random 4 | from boto3.dynamodb.conditions import Attr 5 | import secrets 6 | 7 | dynamodb = boto3.resource('dynamodb') 8 | cloudformation = boto3.client('cloudformation') 9 | cells_table = dynamodb.Table(os.environ.get('cellsTableName')) 10 | users_table = dynamodb.Table(os.environ.get('usersTableName')) 11 | 12 | 13 | def get_cf_output(stackname, key): 14 | response = cloudformation.describe_stacks(StackName=stackname) 15 | outputs = response["Stacks"][0]["Outputs"] 16 | for output in outputs: 17 | if output["OutputKey"] == key: 18 | return output["OutputValue"] 19 | raise Exception( 20 | '"{}" does not exist for stack "{}"'.format(key, stackname)) 21 | 22 | 23 | def get_cells(): 24 | res = cells_table.scan( 25 | FilterExpression=Attr('stackStatus').eq('active'), 26 | ProjectionExpression='cell_id' 27 | ) 28 | return res['Items'] 29 | 30 | 31 | def assign_cell(): 32 | cells = cells_table.scan( 33 | FilterExpression=Attr('stackStatus').eq( 34 | 'active') & Attr('stage').eq('prod'), 35 | ProjectionExpression='cell_id' 36 | ) 37 | return random.choice(cells['Items'])['cell_id'] 38 | 39 | 40 | def get_user(username): 41 | item = users_table.get_item(Key={'username': username}) 42 | if 'Item' in item: 43 | return item['Item'] 44 | return None 45 | 46 | 47 | def get_dns_name(cell_id): 48 | cell = cells_table.get_item(Key={'cell_id': cell_id}) 49 | stackname = cell['Item']['stackName'] 50 | return get_cf_output(stackname, 'dnsName') 51 | 52 | 53 | def create_user(username): 54 | users_table.put_item( 55 | Item={ 56 | 'username': username, 57 | 'cell': assign_cell() 58 | } 59 | ) --------------------------------------------------------------------------------